theagenticguy · theagenticguy · May 29, 2026 · May 29, 2026
@@ -126,6 +126,13 @@ export async function runScan(path: string, opts: ScanOptions = {}): Promise<Sca
     onProgress: (spec: ScannerSpec, status: ScannerStatus, note?: string) => {
       if (status === "error") {
         console.warn(`codehub scan: ${spec.id} errored: ${note ?? ""}`);
+      } else if (status === "warn" && note) {
+        // Advisory: the scan ran but emitted a note (non-clean exit code,
+        // SARIF parse fallback). Not a skip — the scanner still produced
+        // output. Label it distinctly so a non-zero-but-ran exit (e.g.
+        // osv-scanner exit 127 = general error) is not mistaken for "did
+        // not run".
+        console.warn(`codehub scan: ${spec.id}: ${note}`);
       } else if (status === "skipped" && note) {
         console.warn(`codehub scan: ${spec.id} skipped: ${note}`);
       } else {

@@ -97,6 +97,54 @@ test("runScanners invokes onProgress lifecycle for each scanner", async () => {
   assert.ok(bEvents.some((e) => e.status === "done"));
 });
 
+test("runScanners routes onWarn to `warn` status and does not double-emit the skip note", async () => {
+  // A wrapper that warns via onWarn AND returns `skipped` (the missing-binary
+  // shape). Previously the runner re-emitted the same note on the terminal
+  // `skipped` event, producing two identical lines.
+  const spec = makeSpec("pip-audit");
+  const skipMsg = "pip-audit: binary 'pip-audit' not found on PATH";
+  const wrapper: ScannerWrapper = {
+    spec,
+    run: async (c: ScannerRunContext): Promise<ScannerRunResult> => {
+      c.onWarn?.(skipMsg);
+      return { spec, sarif: emptySarifFor(spec), skipped: skipMsg, durationMs: 1 };
+    },
+  };
+  const events: Array<{ status: string; note: string | undefined }> = [];
+  await runScanners("/tmp/repo", [wrapper], {
+    onProgress: (_spec, status, note) => events.push({ status, note }),
+  });
+  // The note must appear exactly once (via the `warn` event), and the
+  // terminal `skipped` event must carry NO note (no duplicate).
+  const noteOccurrences = events.filter((e) => e.note === skipMsg);
+  assert.equal(noteOccurrences.length, 1, `note should print once, got ${noteOccurrences.length}`);
+  assert.equal(noteOccurrences[0]?.status, "warn");
+  const terminal = events.filter((e) => e.status === "skipped");
+  assert.equal(terminal.length, 1);
+  assert.equal(terminal[0]?.note, undefined, "terminal skipped event must not repeat the note");
+});
+
+test("runScanners emits a single `done` (no note) when a wrapper warns but does not skip", async () => {
+  // The osv-scanner exit-127 shape: warns via onWarn but returns SARIF
+  // (not skipped). Should produce exactly one terminal `done`, not a
+  // contradictory `skipped` + `done` pair.
+  const spec = makeSpec("osv-scanner");
+  const wrapper: ScannerWrapper = {
+    spec,
+    run: async (c: ScannerRunContext): Promise<ScannerRunResult> => {
+      c.onWarn?.("osv-scanner: general error (exit 127)");
+      return { spec, sarif: emptySarifFor(spec), durationMs: 1 };
+    },
+  };
+  const events: Array<{ status: string; note: string | undefined }> = [];
+  await runScanners("/tmp/repo", [wrapper], {
+    onProgress: (_spec, status, note) => events.push({ status, note }),
+  });
+  assert.equal(events.filter((e) => e.status === "warn").length, 1);
+  assert.equal(events.filter((e) => e.status === "done").length, 1);
+  assert.equal(events.filter((e) => e.status === "skipped").length, 0);
+});
+
 test("runScanners respects concurrency cap", async () => {
   let inFlight = 0;
   let peak = 0;

@@ -23,7 +23,21 @@ import {
   type ScannerWrapper,
 } from "./spec.js";
 
-export type ScannerStatus = "start" | "done" | "error" | "skipped";
+/**
+ * Per-scanner lifecycle status.
+ *
+ *   - `start`   — the wrapper began executing.
+ *   - `warn`    — the wrapper surfaced an advisory via `onWarn` (e.g. a
+ *                 non-clean exit code, or a SARIF-parse fallback). The scan
+ *                 still ran and produced (possibly empty) output. Distinct
+ *                 from `skipped` so callers don't mislabel "ran with a
+ *                 non-zero exit" as "did not run".
+ *   - `done`    — the wrapper finished and produced output.
+ *   - `skipped` — the scan did not run (binary missing, etc.); `note`
+ *                 carries the reason.
+ *   - `error`   — the wrapper threw; `note` carries the error message.
+ */
+export type ScannerStatus = "start" | "warn" | "done" | "error" | "skipped";
 
 export interface RunScannersOptions {
   /** Cap on parallel scanners. Default min(availableParallelism(), 4). */
@@ -75,13 +89,32 @@ export async function runScanners(
       const started = performance.now();
       opts.onProgress?.(spec, "start");
       try {
+        // Wrappers call `onWarn` for advisories (non-clean exit code, SARIF
+        // parse fallback) and ALSO return a `skipped` string when the scan
+        // did not run. Route `onWarn` to the `warn` status (scan ran, here's
+        // a note) — NOT `skipped` — and track that it fired. Without this:
+        //   (1) a missing-binary wrapper that calls onWarn AND returns
+        //       `skipped` re-printed the same line twice (the duplicate
+        //       `pip-audit skipped: ...` lines), and
+        //   (2) an advisory-only wrapper (osv-scanner exit 127) was mislabeled
+        //       "skipped: ..." and then immediately followed by a "done" line.
+        // We coalesce the terminal event: when the wrapper already surfaced
+        // its `skipped` reason via `onWarn`, emit the terminal lifecycle
+        // status with no duplicate note.
+        let warned = false;
         const result = await wrapper.run({
           projectPath,
           timeoutMs,
-          onWarn: (m: string) => opts.onProgress?.(spec, "skipped", m),
+          onWarn: (m: string) => {
+            warned = true;
+            opts.onProgress?.(spec, "warn", m);
+          },
         });
         runs[myIdx] = result;
-        opts.onProgress?.(spec, result.skipped ? "skipped" : "done", result.skipped);
+        const terminal: ScannerStatus = result.skipped ? "skipped" : "done";
+        // If the wrapper already explained itself via onWarn, don't repeat
+        // the note on the terminal line.
+        opts.onProgress?.(spec, terminal, warned ? undefined : result.skipped);
       } catch (err) {
         const message = err instanceof Error ? err.message : String(err);
         errored.push({ spec, error: message });

@@ -1,29 +1,93 @@
 /**
  * Bandit wrapper — Python SAST.
  *
- * Invocation: `bandit -r <projectPath> -f sarif`
+ * Invocation: `bandit -r <projectPath> -f sarif --quiet`
  *
  * Requires the `bandit[sarif]` install (which pulls in
- * `bandit-sarif-formatter` ≥1.1.1). If the formatter is missing, bandit
- * falls back to text output and our SARIF parser emits an empty log +
- * warning — graceful degradation.
+ * `bandit-sarif-formatter` ≥1.1.1). IMPORTANT: when that extra is NOT
+ * installed, `sarif` is absent from bandit's dynamic `-f/--format` choice
+ * list, so argparse REJECTS `-f sarif` with exit code 2 and a `usage: …`
+ * message on stderr — bandit does NOT fall back to text output. We detect
+ * that specific failure and emit an actionable advisory pointing at the
+ * `bandit[sarif]` install, instead of the misleading generic "stdout was
+ * not valid JSON" note.
+ *
+ * Bandit exit codes: 0 = no issues, 1 = issues found, 2 = usage/argparse
+ * or config error (NOT a finding). We treat 0 and 1 as "ran"; 2+ is a
+ * genuine invocation failure.
  *
  * Note: bandit writes SARIF to stdout when `-f sarif` is given WITHOUT
- * `-o <file>`. Passing `.` as the target would confuse bandit (it treats
- * `.` as "current directory" but recurses via `-r` explicitly); we pass
- * `projectPath` so the wrapper works from any CWD.
+ * `-o <file>`. We pass `-r <projectPath>` so the wrapper works from any CWD.
  */
 
 import { BANDIT_SPEC } from "../catalog.js";
 import type { ScannerRunContext, ScannerRunResult, ScannerWrapper } from "../spec.js";
-import { DEFAULT_DEPS, invokeScanner, type WrapperDeps } from "./shared.js";
+import { emptySarifFor } from "../spec.js";
+import { DEFAULT_DEPS, parseSarifOrEmpty, type WrapperDeps } from "./shared.js";
+
+/**
+ * Detect bandit's argparse rejection of `-f sarif` (exit 2 + a `usage:`
+ * banner mentioning the format flag) and return an actionable advisory.
+ * Returns `undefined` when the exit looks like a normal run (0/1) or an
+ * unrelated failure (handled by the generic exit-code note).
+ */
+export function banditExitAdvisory(exitCode: number, stderr: string): string | undefined {
+  if (exitCode === 0 || exitCode === 1) return undefined;
+  const looksLikeUsage = /\busage:\s*bandit\b/i.test(stderr);
+  if (exitCode === 2 && looksLikeUsage) {
+    return (
+      `${BANDIT_SPEC.id}: rejected '-f sarif' (exit 2). The SARIF formatter is not ` +
+      `installed — install the extra: ${BANDIT_SPEC.installCmd}. ` +
+      `(Bandit does not fall back to text output here; it exits with a usage error.)`
+    );
+  }
+  return `${BANDIT_SPEC.id}: exit code ${exitCode}; stderr: ${truncate(stderr, 160)}`;
+}
 
 export function createBanditWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrapper {
   return {
     spec: BANDIT_SPEC,
     run: async (ctx: ScannerRunContext): Promise<ScannerRunResult> => {
+      const started = performance.now();
+      const probe = await deps.which("bandit");
+      if (!probe.found) {
+        const msg = `${BANDIT_SPEC.id}: binary 'bandit' not found on PATH (install: ${BANDIT_SPEC.installCmd}).`;
+        ctx.onWarn?.(msg);
+        return {
+          spec: BANDIT_SPEC,
+          sarif: emptySarifFor(BANDIT_SPEC),
+          skipped: msg,
+          durationMs: performance.now() - started,
+        };
+      }
       const args: readonly string[] = ["-r", ctx.projectPath, "-f", "sarif", "--quiet"];
-      return invokeScanner(BANDIT_SPEC, ctx, "bandit", args, deps);
+      const result = await deps.runBinary("bandit", args, {
+        timeoutMs: ctx.timeoutMs,
+        cwd: ctx.projectPath,
+      });
+      const advisory = banditExitAdvisory(result.exitCode, result.stderr);
+      if (advisory !== undefined) {
+        // Argparse / invocation failure: stdout is empty (the usage banner
+        // went to stderr). Surface the specific advisory and skip the generic
+        // "stdout was not valid JSON" note, which would be misleading here.
+        ctx.onWarn?.(advisory);
+        return {
+          spec: BANDIT_SPEC,
+          sarif: emptySarifFor(BANDIT_SPEC),
+          durationMs: performance.now() - started,
+        };
+      }
+      const sarif = parseSarifOrEmpty(result.stdout, BANDIT_SPEC, ctx.onWarn);
+      return {
+        spec: BANDIT_SPEC,
+        sarif,
+        durationMs: performance.now() - started,
+      };
     },
   };
 }
+
+function truncate(s: string, max: number): string {
+  if (s.length <= max) return s.trim();
+  return `${s.slice(0, max).trim()}…`;
+}
@@ -1,31 +1,98 @@
 /**
  * OSV-Scanner wrapper — dependency vulnerability scanner.
  *
- * Invocation: `osv-scanner scan --format=sarif --offline-vulnerabilities
- *              --recursive .`
+ * Invocation: `osv-scanner scan source --format=sarif --recursive .`
  *
- * We enable offline mode by default — OpenCodeHub's v1.0 posture is that
- * the vulnerability database is pre-synced by `codehub db-sync`.
- * `--offline-vulnerabilities` keeps the tool from reaching osv.dev while
- * still allowing local lockfile resolution to touch manifests on disk.
+ * Exit-code semantics (osv-scanner v2, per
+ * https://google.github.io/osv-scanner/output/ "Return Codes"):
+ *
+ *   0   — packages found, no vulnerabilities/findings.
+ *   1   — packages found, vulnerabilities/findings present (the COMMON
+ *         non-zero case; NOT an error).
+ *   2-126 — reserved for vulnerability/finding-related results.
+ *   127 — GENERAL ERROR (e.g. the offline vulnerability DB could not be
+ *         loaded). Note: the scan may still have walked the filesystem and
+ *         parsed lockfiles before failing, so partial stderr ("Scanned …
+ *         uv.lock … found N packages") does NOT mean the scan succeeded.
+ *   128 — no packages discovered (scanning format picked up no files).
+ *   129-255 — non-result-related errors.
+ *
+ * The shared `invokeScanner` treats only 0 and 1 as "ran cleanly". For
+ * osv-scanner that is too coarse: it would flag exit 1 (vulns found — the
+ * whole point of the scan) as a warning, and it would surface exit 127 as
+ * a generic "exit code 127" without explaining that the most likely cause
+ * is a missing offline database. We interpret osv's codes explicitly here.
+ *
+ * Offline posture: earlier revisions passed `--offline-vulnerabilities` by
+ * default on the assumption the DB was pre-synced by `codehub db-sync`.
+ * On a fresh checkout with no synced DB, osv-scanner walks the tree, then
+ * fails to load the offline DB and exits 127 — a confusing "it ran but
+ * errored" signal. We DROP the default offline flag so the common case
+ * (online lookup) works out of the box; operators who need air-gapped
+ * scans pass it back via osv-scanner's own env/flags after `db-sync`.
  */
 
 import { OSV_SCANNER_SPEC } from "../catalog.js";
 import type { ScannerRunContext, ScannerRunResult, ScannerWrapper } from "../spec.js";
-import { DEFAULT_DEPS, invokeScanner, type WrapperDeps } from "./shared.js";
+import { emptySarifFor } from "../spec.js";
+import { DEFAULT_DEPS, parseSarifOrEmpty, type WrapperDeps } from "./shared.js";
 
-const OSV_ARGS: readonly string[] = [
-  "scan",
-  "--format=sarif",
-  "--offline-vulnerabilities",
-  "--recursive",
-  ".",
-];
+const OSV_ARGS: readonly string[] = ["scan", "source", "--format=sarif", "--recursive", "."];
+
+/**
+ * Map an osv-scanner v2 exit code to an advisory note, or `undefined` when
+ * the exit represents a clean / findings-present run that needs no warning.
+ */
+export function osvExitAdvisory(exitCode: number, stderr: string): string | undefined {
+  // 0 = no findings; 1-126 = findings present (the normal non-zero outcome).
+  if (exitCode >= 0 && exitCode <= 126) return undefined;
+  if (exitCode === 127) {
+    return (
+      `${OSV_SCANNER_SPEC.id}: general error (exit 127). Common cause: the offline ` +
+      `vulnerability DB is missing — run \`codehub db-sync\` then retry, or omit ` +
+      `offline mode. stderr: ${truncate(stderr, 160)}`
+    );
+  }
+  if (exitCode === 128) {
+    // No packages discovered. Benign for repos with no lockfiles/manifests.
+    return `${OSV_SCANNER_SPEC.id}: no packages discovered (exit 128); nothing to scan.`;
+  }
+  return `${OSV_SCANNER_SPEC.id}: exit code ${exitCode}; stderr: ${truncate(stderr, 160)}`;
+}
 
 export function createOsvScannerWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrapper {
   return {
     spec: OSV_SCANNER_SPEC,
-    run: (ctx: ScannerRunContext): Promise<ScannerRunResult> =>
-      invokeScanner(OSV_SCANNER_SPEC, ctx, "osv-scanner", OSV_ARGS, deps),
+    run: async (ctx: ScannerRunContext): Promise<ScannerRunResult> => {
+      const started = performance.now();
+      const probe = await deps.which("osv-scanner");
+      if (!probe.found) {
+        const msg = `${OSV_SCANNER_SPEC.id}: binary 'osv-scanner' not found on PATH (install: ${OSV_SCANNER_SPEC.installCmd}).`;
+        ctx.onWarn?.(msg);
+        return {
+          spec: OSV_SCANNER_SPEC,
+          sarif: emptySarifFor(OSV_SCANNER_SPEC),
+          skipped: msg,
+          durationMs: performance.now() - started,
+        };
+      }
+      const result = await deps.runBinary("osv-scanner", OSV_ARGS, {
+        timeoutMs: ctx.timeoutMs,
+        cwd: ctx.projectPath,
+      });
+      const sarif = parseSarifOrEmpty(result.stdout, OSV_SCANNER_SPEC, ctx.onWarn);
+      const advisory = osvExitAdvisory(result.exitCode, result.stderr);
+      if (advisory !== undefined) ctx.onWarn?.(advisory);
+      return {
+        spec: OSV_SCANNER_SPEC,
+        sarif,
+        durationMs: performance.now() - started,
+      };
+    },
   };
 }
+
+function truncate(s: string, max: number): string {
+  if (s.length <= max) return s.trim();
+  return `${s.slice(0, max).trim()}…`;
+}