diff --git a/packages/cli/src/commands/scan.ts b/packages/cli/src/commands/scan.ts index 22d7c012..493a9144 100644 --- a/packages/cli/src/commands/scan.ts +++ b/packages/cli/src/commands/scan.ts @@ -126,6 +126,13 @@ export async function runScan(path: string, opts: ScanOptions = {}): Promise { if (status === "error") { console.warn(`codehub scan: ${spec.id} errored: ${note ?? ""}`); + } else if (status === "warn" && note) { + // Advisory: the scan ran but emitted a note (non-clean exit code, + // SARIF parse fallback). Not a skip — the scanner still produced + // output. Label it distinctly so a non-zero-but-ran exit (e.g. + // osv-scanner exit 127 = general error) is not mistaken for "did + // not run". + console.warn(`codehub scan: ${spec.id}: ${note}`); } else if (status === "skipped" && note) { console.warn(`codehub scan: ${spec.id} skipped: ${note}`); } else { diff --git a/packages/scanners/src/runner.test.ts b/packages/scanners/src/runner.test.ts index 42d338a6..9d404f3a 100644 --- a/packages/scanners/src/runner.test.ts +++ b/packages/scanners/src/runner.test.ts @@ -97,6 +97,54 @@ test("runScanners invokes onProgress lifecycle for each scanner", async () => { assert.ok(bEvents.some((e) => e.status === "done")); }); +test("runScanners routes onWarn to `warn` status and does not double-emit the skip note", async () => { + // A wrapper that warns via onWarn AND returns `skipped` (the missing-binary + // shape). Previously the runner re-emitted the same note on the terminal + // `skipped` event, producing two identical lines. + const spec = makeSpec("pip-audit"); + const skipMsg = "pip-audit: binary 'pip-audit' not found on PATH"; + const wrapper: ScannerWrapper = { + spec, + run: async (c: ScannerRunContext): Promise => { + c.onWarn?.(skipMsg); + return { spec, sarif: emptySarifFor(spec), skipped: skipMsg, durationMs: 1 }; + }, + }; + const events: Array<{ status: string; note: string | undefined }> = []; + await runScanners("/tmp/repo", [wrapper], { + onProgress: (_spec, status, note) => events.push({ status, note }), + }); + // The note must appear exactly once (via the `warn` event), and the + // terminal `skipped` event must carry NO note (no duplicate). + const noteOccurrences = events.filter((e) => e.note === skipMsg); + assert.equal(noteOccurrences.length, 1, `note should print once, got ${noteOccurrences.length}`); + assert.equal(noteOccurrences[0]?.status, "warn"); + const terminal = events.filter((e) => e.status === "skipped"); + assert.equal(terminal.length, 1); + assert.equal(terminal[0]?.note, undefined, "terminal skipped event must not repeat the note"); +}); + +test("runScanners emits a single `done` (no note) when a wrapper warns but does not skip", async () => { + // The osv-scanner exit-127 shape: warns via onWarn but returns SARIF + // (not skipped). Should produce exactly one terminal `done`, not a + // contradictory `skipped` + `done` pair. + const spec = makeSpec("osv-scanner"); + const wrapper: ScannerWrapper = { + spec, + run: async (c: ScannerRunContext): Promise => { + c.onWarn?.("osv-scanner: general error (exit 127)"); + return { spec, sarif: emptySarifFor(spec), durationMs: 1 }; + }, + }; + const events: Array<{ status: string; note: string | undefined }> = []; + await runScanners("/tmp/repo", [wrapper], { + onProgress: (_spec, status, note) => events.push({ status, note }), + }); + assert.equal(events.filter((e) => e.status === "warn").length, 1); + assert.equal(events.filter((e) => e.status === "done").length, 1); + assert.equal(events.filter((e) => e.status === "skipped").length, 0); +}); + test("runScanners respects concurrency cap", async () => { let inFlight = 0; let peak = 0; diff --git a/packages/scanners/src/runner.ts b/packages/scanners/src/runner.ts index 149a1c63..cf909862 100644 --- a/packages/scanners/src/runner.ts +++ b/packages/scanners/src/runner.ts @@ -23,7 +23,21 @@ import { type ScannerWrapper, } from "./spec.js"; -export type ScannerStatus = "start" | "done" | "error" | "skipped"; +/** + * Per-scanner lifecycle status. + * + * - `start` — the wrapper began executing. + * - `warn` — the wrapper surfaced an advisory via `onWarn` (e.g. a + * non-clean exit code, or a SARIF-parse fallback). The scan + * still ran and produced (possibly empty) output. Distinct + * from `skipped` so callers don't mislabel "ran with a + * non-zero exit" as "did not run". + * - `done` — the wrapper finished and produced output. + * - `skipped` — the scan did not run (binary missing, etc.); `note` + * carries the reason. + * - `error` — the wrapper threw; `note` carries the error message. + */ +export type ScannerStatus = "start" | "warn" | "done" | "error" | "skipped"; export interface RunScannersOptions { /** Cap on parallel scanners. Default min(availableParallelism(), 4). */ @@ -75,13 +89,32 @@ export async function runScanners( const started = performance.now(); opts.onProgress?.(spec, "start"); try { + // Wrappers call `onWarn` for advisories (non-clean exit code, SARIF + // parse fallback) and ALSO return a `skipped` string when the scan + // did not run. Route `onWarn` to the `warn` status (scan ran, here's + // a note) — NOT `skipped` — and track that it fired. Without this: + // (1) a missing-binary wrapper that calls onWarn AND returns + // `skipped` re-printed the same line twice (the duplicate + // `pip-audit skipped: ...` lines), and + // (2) an advisory-only wrapper (osv-scanner exit 127) was mislabeled + // "skipped: ..." and then immediately followed by a "done" line. + // We coalesce the terminal event: when the wrapper already surfaced + // its `skipped` reason via `onWarn`, emit the terminal lifecycle + // status with no duplicate note. + let warned = false; const result = await wrapper.run({ projectPath, timeoutMs, - onWarn: (m: string) => opts.onProgress?.(spec, "skipped", m), + onWarn: (m: string) => { + warned = true; + opts.onProgress?.(spec, "warn", m); + }, }); runs[myIdx] = result; - opts.onProgress?.(spec, result.skipped ? "skipped" : "done", result.skipped); + const terminal: ScannerStatus = result.skipped ? "skipped" : "done"; + // If the wrapper already explained itself via onWarn, don't repeat + // the note on the terminal line. + opts.onProgress?.(spec, terminal, warned ? undefined : result.skipped); } catch (err) { const message = err instanceof Error ? err.message : String(err); errored.push({ spec, error: message }); diff --git a/packages/scanners/src/wrappers/bandit.ts b/packages/scanners/src/wrappers/bandit.ts index 8e35a402..23697dbe 100644 --- a/packages/scanners/src/wrappers/bandit.ts +++ b/packages/scanners/src/wrappers/bandit.ts @@ -1,29 +1,93 @@ /** * Bandit wrapper — Python SAST. * - * Invocation: `bandit -r -f sarif` + * Invocation: `bandit -r -f sarif --quiet` * * Requires the `bandit[sarif]` install (which pulls in - * `bandit-sarif-formatter` ≥1.1.1). If the formatter is missing, bandit - * falls back to text output and our SARIF parser emits an empty log + - * warning — graceful degradation. + * `bandit-sarif-formatter` ≥1.1.1). IMPORTANT: when that extra is NOT + * installed, `sarif` is absent from bandit's dynamic `-f/--format` choice + * list, so argparse REJECTS `-f sarif` with exit code 2 and a `usage: …` + * message on stderr — bandit does NOT fall back to text output. We detect + * that specific failure and emit an actionable advisory pointing at the + * `bandit[sarif]` install, instead of the misleading generic "stdout was + * not valid JSON" note. + * + * Bandit exit codes: 0 = no issues, 1 = issues found, 2 = usage/argparse + * or config error (NOT a finding). We treat 0 and 1 as "ran"; 2+ is a + * genuine invocation failure. * * Note: bandit writes SARIF to stdout when `-f sarif` is given WITHOUT - * `-o `. Passing `.` as the target would confuse bandit (it treats - * `.` as "current directory" but recurses via `-r` explicitly); we pass - * `projectPath` so the wrapper works from any CWD. + * `-o `. We pass `-r ` so the wrapper works from any CWD. */ import { BANDIT_SPEC } from "../catalog.js"; import type { ScannerRunContext, ScannerRunResult, ScannerWrapper } from "../spec.js"; -import { DEFAULT_DEPS, invokeScanner, type WrapperDeps } from "./shared.js"; +import { emptySarifFor } from "../spec.js"; +import { DEFAULT_DEPS, parseSarifOrEmpty, type WrapperDeps } from "./shared.js"; + +/** + * Detect bandit's argparse rejection of `-f sarif` (exit 2 + a `usage:` + * banner mentioning the format flag) and return an actionable advisory. + * Returns `undefined` when the exit looks like a normal run (0/1) or an + * unrelated failure (handled by the generic exit-code note). + */ +export function banditExitAdvisory(exitCode: number, stderr: string): string | undefined { + if (exitCode === 0 || exitCode === 1) return undefined; + const looksLikeUsage = /\busage:\s*bandit\b/i.test(stderr); + if (exitCode === 2 && looksLikeUsage) { + return ( + `${BANDIT_SPEC.id}: rejected '-f sarif' (exit 2). The SARIF formatter is not ` + + `installed — install the extra: ${BANDIT_SPEC.installCmd}. ` + + `(Bandit does not fall back to text output here; it exits with a usage error.)` + ); + } + return `${BANDIT_SPEC.id}: exit code ${exitCode}; stderr: ${truncate(stderr, 160)}`; +} export function createBanditWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrapper { return { spec: BANDIT_SPEC, run: async (ctx: ScannerRunContext): Promise => { + const started = performance.now(); + const probe = await deps.which("bandit"); + if (!probe.found) { + const msg = `${BANDIT_SPEC.id}: binary 'bandit' not found on PATH (install: ${BANDIT_SPEC.installCmd}).`; + ctx.onWarn?.(msg); + return { + spec: BANDIT_SPEC, + sarif: emptySarifFor(BANDIT_SPEC), + skipped: msg, + durationMs: performance.now() - started, + }; + } const args: readonly string[] = ["-r", ctx.projectPath, "-f", "sarif", "--quiet"]; - return invokeScanner(BANDIT_SPEC, ctx, "bandit", args, deps); + const result = await deps.runBinary("bandit", args, { + timeoutMs: ctx.timeoutMs, + cwd: ctx.projectPath, + }); + const advisory = banditExitAdvisory(result.exitCode, result.stderr); + if (advisory !== undefined) { + // Argparse / invocation failure: stdout is empty (the usage banner + // went to stderr). Surface the specific advisory and skip the generic + // "stdout was not valid JSON" note, which would be misleading here. + ctx.onWarn?.(advisory); + return { + spec: BANDIT_SPEC, + sarif: emptySarifFor(BANDIT_SPEC), + durationMs: performance.now() - started, + }; + } + const sarif = parseSarifOrEmpty(result.stdout, BANDIT_SPEC, ctx.onWarn); + return { + spec: BANDIT_SPEC, + sarif, + durationMs: performance.now() - started, + }; }, }; } + +function truncate(s: string, max: number): string { + if (s.length <= max) return s.trim(); + return `${s.slice(0, max).trim()}…`; +} diff --git a/packages/scanners/src/wrappers/osv-scanner.ts b/packages/scanners/src/wrappers/osv-scanner.ts index 648a01b6..76184c72 100644 --- a/packages/scanners/src/wrappers/osv-scanner.ts +++ b/packages/scanners/src/wrappers/osv-scanner.ts @@ -1,31 +1,98 @@ /** * OSV-Scanner wrapper — dependency vulnerability scanner. * - * Invocation: `osv-scanner scan --format=sarif --offline-vulnerabilities - * --recursive .` + * Invocation: `osv-scanner scan source --format=sarif --recursive .` * - * We enable offline mode by default — OpenCodeHub's v1.0 posture is that - * the vulnerability database is pre-synced by `codehub db-sync`. - * `--offline-vulnerabilities` keeps the tool from reaching osv.dev while - * still allowing local lockfile resolution to touch manifests on disk. + * Exit-code semantics (osv-scanner v2, per + * https://google.github.io/osv-scanner/output/ "Return Codes"): + * + * 0 — packages found, no vulnerabilities/findings. + * 1 — packages found, vulnerabilities/findings present (the COMMON + * non-zero case; NOT an error). + * 2-126 — reserved for vulnerability/finding-related results. + * 127 — GENERAL ERROR (e.g. the offline vulnerability DB could not be + * loaded). Note: the scan may still have walked the filesystem and + * parsed lockfiles before failing, so partial stderr ("Scanned … + * uv.lock … found N packages") does NOT mean the scan succeeded. + * 128 — no packages discovered (scanning format picked up no files). + * 129-255 — non-result-related errors. + * + * The shared `invokeScanner` treats only 0 and 1 as "ran cleanly". For + * osv-scanner that is too coarse: it would flag exit 1 (vulns found — the + * whole point of the scan) as a warning, and it would surface exit 127 as + * a generic "exit code 127" without explaining that the most likely cause + * is a missing offline database. We interpret osv's codes explicitly here. + * + * Offline posture: earlier revisions passed `--offline-vulnerabilities` by + * default on the assumption the DB was pre-synced by `codehub db-sync`. + * On a fresh checkout with no synced DB, osv-scanner walks the tree, then + * fails to load the offline DB and exits 127 — a confusing "it ran but + * errored" signal. We DROP the default offline flag so the common case + * (online lookup) works out of the box; operators who need air-gapped + * scans pass it back via osv-scanner's own env/flags after `db-sync`. */ import { OSV_SCANNER_SPEC } from "../catalog.js"; import type { ScannerRunContext, ScannerRunResult, ScannerWrapper } from "../spec.js"; -import { DEFAULT_DEPS, invokeScanner, type WrapperDeps } from "./shared.js"; +import { emptySarifFor } from "../spec.js"; +import { DEFAULT_DEPS, parseSarifOrEmpty, type WrapperDeps } from "./shared.js"; -const OSV_ARGS: readonly string[] = [ - "scan", - "--format=sarif", - "--offline-vulnerabilities", - "--recursive", - ".", -]; +const OSV_ARGS: readonly string[] = ["scan", "source", "--format=sarif", "--recursive", "."]; + +/** + * Map an osv-scanner v2 exit code to an advisory note, or `undefined` when + * the exit represents a clean / findings-present run that needs no warning. + */ +export function osvExitAdvisory(exitCode: number, stderr: string): string | undefined { + // 0 = no findings; 1-126 = findings present (the normal non-zero outcome). + if (exitCode >= 0 && exitCode <= 126) return undefined; + if (exitCode === 127) { + return ( + `${OSV_SCANNER_SPEC.id}: general error (exit 127). Common cause: the offline ` + + `vulnerability DB is missing — run \`codehub db-sync\` then retry, or omit ` + + `offline mode. stderr: ${truncate(stderr, 160)}` + ); + } + if (exitCode === 128) { + // No packages discovered. Benign for repos with no lockfiles/manifests. + return `${OSV_SCANNER_SPEC.id}: no packages discovered (exit 128); nothing to scan.`; + } + return `${OSV_SCANNER_SPEC.id}: exit code ${exitCode}; stderr: ${truncate(stderr, 160)}`; +} export function createOsvScannerWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrapper { return { spec: OSV_SCANNER_SPEC, - run: (ctx: ScannerRunContext): Promise => - invokeScanner(OSV_SCANNER_SPEC, ctx, "osv-scanner", OSV_ARGS, deps), + run: async (ctx: ScannerRunContext): Promise => { + const started = performance.now(); + const probe = await deps.which("osv-scanner"); + if (!probe.found) { + const msg = `${OSV_SCANNER_SPEC.id}: binary 'osv-scanner' not found on PATH (install: ${OSV_SCANNER_SPEC.installCmd}).`; + ctx.onWarn?.(msg); + return { + spec: OSV_SCANNER_SPEC, + sarif: emptySarifFor(OSV_SCANNER_SPEC), + skipped: msg, + durationMs: performance.now() - started, + }; + } + const result = await deps.runBinary("osv-scanner", OSV_ARGS, { + timeoutMs: ctx.timeoutMs, + cwd: ctx.projectPath, + }); + const sarif = parseSarifOrEmpty(result.stdout, OSV_SCANNER_SPEC, ctx.onWarn); + const advisory = osvExitAdvisory(result.exitCode, result.stderr); + if (advisory !== undefined) ctx.onWarn?.(advisory); + return { + spec: OSV_SCANNER_SPEC, + sarif, + durationMs: performance.now() - started, + }; + }, }; } + +function truncate(s: string, max: number): string { + if (s.length <= max) return s.trim(); + return `${s.slice(0, max).trim()}…`; +} diff --git a/packages/scanners/src/wrappers/wrappers.test.ts b/packages/scanners/src/wrappers/wrappers.test.ts index 784c0ea1..773c863f 100644 --- a/packages/scanners/src/wrappers/wrappers.test.ts +++ b/packages/scanners/src/wrappers/wrappers.test.ts @@ -129,14 +129,61 @@ test("betterleaks wrapper uses `dir` mode and injects vendored config", async () assert.match(cfgArg ?? "", /betterleaks\.default\.toml$/); }); -test("osv-scanner wrapper sends --offline-vulnerabilities", async () => { +test("osv-scanner wrapper invokes `scan source` with --format=sarif (online default)", async () => { const sarif = fakeSarif("osv-scanner", "GHSA-xyz"); const { deps, calls } = makeFakeDeps(() => ({ stdout: JSON.stringify(sarif) })); const wrapper = createOsvScannerWrapper(deps); const out = await wrapper.run(ctx); assert.equal(out.sarif.runs[0]?.tool.driver.name, "osv-scanner"); - assert.ok(calls[0]?.args.includes("--offline-vulnerabilities")); - assert.ok(calls[0]?.args.includes("--format=sarif")); + const args = calls[0]?.args ?? []; + assert.equal(args[0], "scan"); + assert.equal(args[1], "source"); + assert.ok(args.includes("--format=sarif")); + assert.ok(args.includes("--recursive")); + // Offline-by-default removed: it required a pre-synced DB and otherwise + // produced a confusing exit-127 "ran but errored" signal on fresh repos. + assert.ok(!args.includes("--offline-vulnerabilities"), "must not force offline mode by default"); +}); + +test("osv-scanner wrapper does NOT warn on exit 1 (vulnerabilities found)", async () => { + const sarif = fakeSarif("osv-scanner", "GHSA-xyz"); + const warnings: string[] = []; + const { deps } = makeFakeDeps(() => ({ stdout: JSON.stringify(sarif), exitCode: 1 })); + const wrapper = createOsvScannerWrapper(deps); + await wrapper.run({ ...ctx, onWarn: (m) => warnings.push(m) }); + // Exit 1 = packages found + vulns present. That's the scan working as + // intended, not a failure — no advisory should fire. + assert.equal(warnings.length, 0, `expected no warnings on exit 1, got: ${warnings.join(" | ")}`); +}); + +test("osv-scanner wrapper warns on exit 127 (general error) with an offline-DB hint", async () => { + const warnings: string[] = []; + const { deps } = makeFakeDeps(() => ({ + stdout: "", + stderr: "failed to load offline database", + exitCode: 127, + })); + const wrapper = createOsvScannerWrapper(deps); + await wrapper.run({ ...ctx, onWarn: (m) => warnings.push(m) }); + // stdout was empty → parseSarifOrEmpty warns, plus the exit-127 advisory. + const combined = warnings.join(" | "); + assert.ok( + combined.includes("general error"), + `expected general-error advisory, got: ${combined}`, + ); + assert.ok(combined.includes("db-sync"), "exit-127 advisory should hint at codehub db-sync"); +}); + +test("osv-scanner wrapper reports exit 128 as 'no packages discovered'", async () => { + const warnings: string[] = []; + const sarif = fakeSarif("osv-scanner", "GHSA-xyz"); + const { deps } = makeFakeDeps(() => ({ stdout: JSON.stringify(sarif), exitCode: 128 })); + const wrapper = createOsvScannerWrapper(deps); + await wrapper.run({ ...ctx, onWarn: (m) => warnings.push(m) }); + assert.ok( + warnings.some((w) => w.includes("no packages discovered")), + `expected a 'no packages discovered' note, got: ${warnings.join(" | ")}`, + ); }); test("bandit wrapper passes -f sarif and recurses via -r ", async () => { @@ -149,6 +196,34 @@ test("bandit wrapper passes -f sarif and recurses via -r ", async ( assert.deepEqual([...(calls[0]?.args ?? [])], ["-r", ctx.projectPath, "-f", "sarif", "--quiet"]); }); +test("bandit wrapper emits a SARIF-formatter advisory on exit 2 + usage banner", async () => { + const warnings: string[] = []; + const { deps } = makeFakeDeps(() => ({ + stdout: "", + stderr: + "usage: bandit [-h] [-r] [-a {file,vuln}] [-n CONTEXT_LINES] ...\nbandit: error: argument -f/--format: invalid choice: 'sarif'", + exitCode: 2, + })); + const wrapper = createBanditWrapper(deps); + const out = await wrapper.run({ ...ctx, onWarn: (m) => warnings.push(m) }); + const combined = warnings.join(" | "); + assert.ok(combined.includes("SARIF formatter is not"), `got: ${combined}`); + assert.ok(combined.includes("bandit[sarif]"), "advisory should point at the bandit[sarif] extra"); + // No misleading "stdout was not valid JSON" note on this path. + assert.ok(!combined.includes("not valid JSON"), "should suppress the generic JSON note"); + assert.equal(out.sarif.runs[0]?.results?.length, 0); +}); + +test("bandit wrapper does NOT warn on exit 1 (issues found)", async () => { + const sarif = fakeSarif("bandit", "B101"); + const warnings: string[] = []; + const { deps } = makeFakeDeps(() => ({ stdout: JSON.stringify(sarif), exitCode: 1 })); + const wrapper = createBanditWrapper(deps); + const out = await wrapper.run({ ...ctx, onWarn: (m) => warnings.push(m) }); + assert.equal(warnings.length, 0, `expected no warnings on exit 1, got: ${warnings.join(" | ")}`); + assert.equal(out.sarif.runs[0]?.results?.[0]?.ruleId, "B101"); +}); + test("biome wrapper prefers global binary when available", async () => { const sarif = fakeSarif("biome", "suspicious/noExplicitAny"); const { deps, calls } = makeFakeDeps(() => ({ stdout: JSON.stringify(sarif) })); diff --git a/packages/scip-ingest/src/runners/index.test.ts b/packages/scip-ingest/src/runners/index.test.ts index db4ccbfe..4a280187 100644 --- a/packages/scip-ingest/src/runners/index.test.ts +++ b/packages/scip-ingest/src/runners/index.test.ts @@ -17,7 +17,7 @@ import { mkdtempSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { test } from "node:test"; -import { defaultCobolProleapPaths, runIndexer } from "./index.js"; +import { defaultCobolProleapPaths, detectVersionManagerShimFailure, runIndexer } from "./index.js"; test("runIndexer(cobol-proleap): skips with fallback message when opt-in is absent", async () => { const dir = mkdtempSync(join(tmpdir(), "scip-ingest-")); @@ -78,3 +78,35 @@ test("defaultCobolProleapPaths: resolves under ~/.codehub/vendor/proleap", () => assert.equal(paths.jarPath, "/Users/alice/.codehub/vendor/proleap/proleap-cobol-parser.jar"); assert.equal(paths.wrapperDir, "/Users/alice/.codehub/vendor/proleap"); }); + +test("detectVersionManagerShimFailure: matches the mise no-version-set shim error", () => { + const stderr = + "mise ERROR No version is set for shim: scip-python\n" + + "Set a global default version with one of the following:\n" + + "mise use -g node@22.22.0\n" + + "mise ERROR Run with --verbose or MISE_VERBOSE=1 for more information"; + const reason = detectVersionManagerShimFailure("scip-python", stderr); + assert.ok(reason !== undefined, "should detect the mise shim failure"); + assert.match(reason ?? "", /version-manager shim/); + assert.match(reason ?? "", /mise use scip-python@latest/); + assert.match(reason ?? "", /Skipping scip-python/); +}); + +test("detectVersionManagerShimFailure: matches an asdf no-version-set error", () => { + const reason = detectVersionManagerShimFailure( + "scip-python", + "No version is set for command scip-python\nConsider adding one of the following...", + ); + assert.ok(reason !== undefined); + assert.match(reason ?? "", /version-manager shim/); +}); + +test("detectVersionManagerShimFailure: ignores a genuine indexer crash", () => { + // A real scip-python crash (traceback) must NOT be treated as a shim + // failure — it should still throw upstream so the operator sees it. + const stderr = + "Traceback (most recent call last):\n" + + ' File "scip_python/main.py", line 42, in \n' + + "SyntaxError: invalid syntax in module foo.py"; + assert.equal(detectVersionManagerShimFailure("scip-python", stderr), undefined); +}); diff --git a/packages/scip-ingest/src/runners/index.ts b/packages/scip-ingest/src/runners/index.ts index 094ef21f..1d8b9ebd 100644 --- a/packages/scip-ingest/src/runners/index.ts +++ b/packages/scip-ingest/src/runners/index.ts @@ -415,6 +415,25 @@ export async function runIndexer( }; } if (indexOutcome.kind === "failed") { + // Tool-manager shim that resolved on PATH but cannot pick a version + // (e.g. mise/asdf shims when no version is pinned for the current + // project). The binary "exists" so `which`/spawn succeed, but the shim + // exits non-zero with a "No version is set" message before the real + // indexer ever runs. This is an environment/config gap, NOT an indexer + // crash — surface it as a graceful, actionable skip instead of throwing + // (a throw becomes the alarming "indexer failed" warning upstream). + const shimReason = detectVersionManagerShimFailure(plan.cmd, indexOutcome.stderr); + if (shimReason !== undefined) { + return { + kind, + scipPath, + tool: plan.tool, + version: "", + skipped: true, + skipReason: shimReason, + durationMs: Date.now() - start, + }; + } throw new Error( `scip-ingest: ${kind} indexer ${plan.cmd} exited ${indexOutcome.exitCode}. Stderr: ${indexOutcome.stderr.slice(0, 400)}`, ); @@ -867,6 +886,38 @@ export function checkKotlinMinVersion( return { ok: true }; } +/** + * Detect a version-manager shim that resolved on PATH but failed to select + * a version for the current project (mise/asdf/rtx "No version is set for + * shim: "). When the indexer is invoked through such a shim and the + * project hasn't pinned a version, the shim exits non-zero with this message + * BEFORE the real indexer runs — the binary appears present but is not + * actually runnable here. + * + * Returns an actionable skip reason when the failure matches, otherwise + * `undefined` (so the caller throws the generic indexer-crash error). The + * reason points the operator at both fixes: pin a version for the project, + * or install the indexer outside the shim so it resolves unconditionally. + * + * Exported for unit tests. + */ +export function detectVersionManagerShimFailure(cmd: string, stderr: string): string | undefined { + // mise: "mise ERROR No version is set for shim: scip-python" + // asdf: "No version is set for command " / "asdf: No version set" + // rtx (legacy mise name): "rtx ERROR No version is set for shim: " + const noVersionSet = + /no version (?:is )?set(?: for (?:shim|command))?/i.test(stderr) || + /please specify a version|run `mise use|run `asdf (?:local|global)/i.test(stderr); + const looksLikeShim = /\b(mise|asdf|rtx)\b/i.test(stderr) || noVersionSet; + if (!noVersionSet || !looksLikeShim) return undefined; + return ( + `${cmd} is exposed via a version-manager shim (mise/asdf) but no version ` + + `is pinned for this project, so the shim exited before the indexer ran. ` + + `Fix: pin it for the project (e.g. \`mise use ${cmd}@latest\`) or install ` + + `${cmd} outside the shim so it resolves unconditionally. Skipping ${cmd} for this run.` + ); +} + type CommandOutcome = | { kind: "ok"; stdout: string; stderr: string } | { kind: "failed"; exitCode: number; stdout: string; stderr: string }