Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions packages/cli/src/commands/scan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,13 @@ export async function runScan(path: string, opts: ScanOptions = {}): Promise<Sca
onProgress: (spec: ScannerSpec, status: ScannerStatus, note?: string) => {
if (status === "error") {
console.warn(`codehub scan: ${spec.id} errored: ${note ?? ""}`);
} else if (status === "warn" && note) {
// Advisory: the scan ran but emitted a note (non-clean exit code,
// SARIF parse fallback). Not a skip — the scanner still produced
// output. Label it distinctly so a non-zero-but-ran exit (e.g.
// osv-scanner exit 127 = general error) is not mistaken for "did
// not run".
console.warn(`codehub scan: ${spec.id}: ${note}`);
} else if (status === "skipped" && note) {
console.warn(`codehub scan: ${spec.id} skipped: ${note}`);
} else {
Expand Down
48 changes: 48 additions & 0 deletions packages/scanners/src/runner.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,54 @@ test("runScanners invokes onProgress lifecycle for each scanner", async () => {
assert.ok(bEvents.some((e) => e.status === "done"));
});

test("runScanners routes onWarn to `warn` status and does not double-emit the skip note", async () => {
// A wrapper that warns via onWarn AND returns `skipped` (the missing-binary
// shape). Previously the runner re-emitted the same note on the terminal
// `skipped` event, producing two identical lines.
const spec = makeSpec("pip-audit");
const skipMsg = "pip-audit: binary 'pip-audit' not found on PATH";
const wrapper: ScannerWrapper = {
spec,
run: async (c: ScannerRunContext): Promise<ScannerRunResult> => {
c.onWarn?.(skipMsg);
return { spec, sarif: emptySarifFor(spec), skipped: skipMsg, durationMs: 1 };
},
};
const events: Array<{ status: string; note: string | undefined }> = [];
await runScanners("/tmp/repo", [wrapper], {
onProgress: (_spec, status, note) => events.push({ status, note }),
});
// The note must appear exactly once (via the `warn` event), and the
// terminal `skipped` event must carry NO note (no duplicate).
const noteOccurrences = events.filter((e) => e.note === skipMsg);
assert.equal(noteOccurrences.length, 1, `note should print once, got ${noteOccurrences.length}`);
assert.equal(noteOccurrences[0]?.status, "warn");
const terminal = events.filter((e) => e.status === "skipped");
assert.equal(terminal.length, 1);
assert.equal(terminal[0]?.note, undefined, "terminal skipped event must not repeat the note");
});

test("runScanners emits a single `done` (no note) when a wrapper warns but does not skip", async () => {
// The osv-scanner exit-127 shape: warns via onWarn but returns SARIF
// (not skipped). Should produce exactly one terminal `done`, not a
// contradictory `skipped` + `done` pair.
const spec = makeSpec("osv-scanner");
const wrapper: ScannerWrapper = {
spec,
run: async (c: ScannerRunContext): Promise<ScannerRunResult> => {
c.onWarn?.("osv-scanner: general error (exit 127)");
return { spec, sarif: emptySarifFor(spec), durationMs: 1 };
},
};
const events: Array<{ status: string; note: string | undefined }> = [];
await runScanners("/tmp/repo", [wrapper], {
onProgress: (_spec, status, note) => events.push({ status, note }),
});
assert.equal(events.filter((e) => e.status === "warn").length, 1);
assert.equal(events.filter((e) => e.status === "done").length, 1);
assert.equal(events.filter((e) => e.status === "skipped").length, 0);
});

test("runScanners respects concurrency cap", async () => {
let inFlight = 0;
let peak = 0;
Expand Down
39 changes: 36 additions & 3 deletions packages/scanners/src/runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,21 @@ import {
type ScannerWrapper,
} from "./spec.js";

export type ScannerStatus = "start" | "done" | "error" | "skipped";
/**
* Per-scanner lifecycle status.
*
* - `start` — the wrapper began executing.
* - `warn` — the wrapper surfaced an advisory via `onWarn` (e.g. a
* non-clean exit code, or a SARIF-parse fallback). The scan
* still ran and produced (possibly empty) output. Distinct
* from `skipped` so callers don't mislabel "ran with a
* non-zero exit" as "did not run".
* - `done` — the wrapper finished and produced output.
* - `skipped` — the scan did not run (binary missing, etc.); `note`
* carries the reason.
* - `error` — the wrapper threw; `note` carries the error message.
*/
export type ScannerStatus = "start" | "warn" | "done" | "error" | "skipped";

export interface RunScannersOptions {
/** Cap on parallel scanners. Default min(availableParallelism(), 4). */
Expand Down Expand Up @@ -75,13 +89,32 @@ export async function runScanners(
const started = performance.now();
opts.onProgress?.(spec, "start");
try {
// Wrappers call `onWarn` for advisories (non-clean exit code, SARIF
// parse fallback) and ALSO return a `skipped` string when the scan
// did not run. Route `onWarn` to the `warn` status (scan ran, here's
// a note) — NOT `skipped` — and track that it fired. Without this:
// (1) a missing-binary wrapper that calls onWarn AND returns
// `skipped` re-printed the same line twice (the duplicate
// `pip-audit skipped: ...` lines), and
// (2) an advisory-only wrapper (osv-scanner exit 127) was mislabeled
// "skipped: ..." and then immediately followed by a "done" line.
// We coalesce the terminal event: when the wrapper already surfaced
// its `skipped` reason via `onWarn`, emit the terminal lifecycle
// status with no duplicate note.
let warned = false;
const result = await wrapper.run({
projectPath,
timeoutMs,
onWarn: (m: string) => opts.onProgress?.(spec, "skipped", m),
onWarn: (m: string) => {
warned = true;
opts.onProgress?.(spec, "warn", m);
},
});
runs[myIdx] = result;
opts.onProgress?.(spec, result.skipped ? "skipped" : "done", result.skipped);
const terminal: ScannerStatus = result.skipped ? "skipped" : "done";
// If the wrapper already explained itself via onWarn, don't repeat
// the note on the terminal line.
opts.onProgress?.(spec, terminal, warned ? undefined : result.skipped);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
errored.push({ spec, error: message });
Expand Down
82 changes: 73 additions & 9 deletions packages/scanners/src/wrappers/bandit.ts
Original file line number Diff line number Diff line change
@@ -1,29 +1,93 @@
/**
* Bandit wrapper — Python SAST.
*
* Invocation: `bandit -r <projectPath> -f sarif`
* Invocation: `bandit -r <projectPath> -f sarif --quiet`
*
* Requires the `bandit[sarif]` install (which pulls in
* `bandit-sarif-formatter` ≥1.1.1). If the formatter is missing, bandit
* falls back to text output and our SARIF parser emits an empty log +
* warning — graceful degradation.
* `bandit-sarif-formatter` ≥1.1.1). IMPORTANT: when that extra is NOT
* installed, `sarif` is absent from bandit's dynamic `-f/--format` choice
* list, so argparse REJECTS `-f sarif` with exit code 2 and a `usage: …`
* message on stderr — bandit does NOT fall back to text output. We detect
* that specific failure and emit an actionable advisory pointing at the
* `bandit[sarif]` install, instead of the misleading generic "stdout was
* not valid JSON" note.
*
* Bandit exit codes: 0 = no issues, 1 = issues found, 2 = usage/argparse
* or config error (NOT a finding). We treat 0 and 1 as "ran"; 2+ is a
* genuine invocation failure.
*
* Note: bandit writes SARIF to stdout when `-f sarif` is given WITHOUT
* `-o <file>`. Passing `.` as the target would confuse bandit (it treats
* `.` as "current directory" but recurses via `-r` explicitly); we pass
* `projectPath` so the wrapper works from any CWD.
* `-o <file>`. We pass `-r <projectPath>` so the wrapper works from any CWD.
*/

import { BANDIT_SPEC } from "../catalog.js";
import type { ScannerRunContext, ScannerRunResult, ScannerWrapper } from "../spec.js";
import { DEFAULT_DEPS, invokeScanner, type WrapperDeps } from "./shared.js";
import { emptySarifFor } from "../spec.js";
import { DEFAULT_DEPS, parseSarifOrEmpty, type WrapperDeps } from "./shared.js";

/**
* Detect bandit's argparse rejection of `-f sarif` (exit 2 + a `usage:`
* banner mentioning the format flag) and return an actionable advisory.
* Returns `undefined` when the exit looks like a normal run (0/1) or an
* unrelated failure (handled by the generic exit-code note).
*/
export function banditExitAdvisory(exitCode: number, stderr: string): string | undefined {
if (exitCode === 0 || exitCode === 1) return undefined;
const looksLikeUsage = /\busage:\s*bandit\b/i.test(stderr);
if (exitCode === 2 && looksLikeUsage) {
return (
`${BANDIT_SPEC.id}: rejected '-f sarif' (exit 2). The SARIF formatter is not ` +
`installed — install the extra: ${BANDIT_SPEC.installCmd}. ` +
`(Bandit does not fall back to text output here; it exits with a usage error.)`
);
}
return `${BANDIT_SPEC.id}: exit code ${exitCode}; stderr: ${truncate(stderr, 160)}`;
}

export function createBanditWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrapper {
return {
spec: BANDIT_SPEC,
run: async (ctx: ScannerRunContext): Promise<ScannerRunResult> => {
const started = performance.now();
const probe = await deps.which("bandit");
if (!probe.found) {
const msg = `${BANDIT_SPEC.id}: binary 'bandit' not found on PATH (install: ${BANDIT_SPEC.installCmd}).`;
ctx.onWarn?.(msg);
return {
spec: BANDIT_SPEC,
sarif: emptySarifFor(BANDIT_SPEC),
skipped: msg,
durationMs: performance.now() - started,
};
}
const args: readonly string[] = ["-r", ctx.projectPath, "-f", "sarif", "--quiet"];
return invokeScanner(BANDIT_SPEC, ctx, "bandit", args, deps);
const result = await deps.runBinary("bandit", args, {
timeoutMs: ctx.timeoutMs,
cwd: ctx.projectPath,
});
const advisory = banditExitAdvisory(result.exitCode, result.stderr);
if (advisory !== undefined) {
// Argparse / invocation failure: stdout is empty (the usage banner
// went to stderr). Surface the specific advisory and skip the generic
// "stdout was not valid JSON" note, which would be misleading here.
ctx.onWarn?.(advisory);
return {
spec: BANDIT_SPEC,
sarif: emptySarifFor(BANDIT_SPEC),
durationMs: performance.now() - started,
};
}
const sarif = parseSarifOrEmpty(result.stdout, BANDIT_SPEC, ctx.onWarn);
return {
spec: BANDIT_SPEC,
sarif,
durationMs: performance.now() - started,
};
},
};
}

function truncate(s: string, max: number): string {
if (s.length <= max) return s.trim();
return `${s.slice(0, max).trim()}…`;
}
99 changes: 83 additions & 16 deletions packages/scanners/src/wrappers/osv-scanner.ts
Original file line number Diff line number Diff line change
@@ -1,31 +1,98 @@
/**
* OSV-Scanner wrapper — dependency vulnerability scanner.
*
* Invocation: `osv-scanner scan --format=sarif --offline-vulnerabilities
* --recursive .`
* Invocation: `osv-scanner scan source --format=sarif --recursive .`
*
* We enable offline mode by default — OpenCodeHub's v1.0 posture is that
* the vulnerability database is pre-synced by `codehub db-sync`.
* `--offline-vulnerabilities` keeps the tool from reaching osv.dev while
* still allowing local lockfile resolution to touch manifests on disk.
* Exit-code semantics (osv-scanner v2, per
* https://google.github.io/osv-scanner/output/ "Return Codes"):
*
* 0 — packages found, no vulnerabilities/findings.
* 1 — packages found, vulnerabilities/findings present (the COMMON
* non-zero case; NOT an error).
* 2-126 — reserved for vulnerability/finding-related results.
* 127 — GENERAL ERROR (e.g. the offline vulnerability DB could not be
* loaded). Note: the scan may still have walked the filesystem and
* parsed lockfiles before failing, so partial stderr ("Scanned …
* uv.lock … found N packages") does NOT mean the scan succeeded.
* 128 — no packages discovered (scanning format picked up no files).
* 129-255 — non-result-related errors.
*
* The shared `invokeScanner` treats only 0 and 1 as "ran cleanly". For
* osv-scanner that is too coarse: it would flag exit 1 (vulns found — the
* whole point of the scan) as a warning, and it would surface exit 127 as
* a generic "exit code 127" without explaining that the most likely cause
* is a missing offline database. We interpret osv's codes explicitly here.
*
* Offline posture: earlier revisions passed `--offline-vulnerabilities` by
* default on the assumption the DB was pre-synced by `codehub db-sync`.
* On a fresh checkout with no synced DB, osv-scanner walks the tree, then
* fails to load the offline DB and exits 127 — a confusing "it ran but
* errored" signal. We DROP the default offline flag so the common case
* (online lookup) works out of the box; operators who need air-gapped
* scans pass it back via osv-scanner's own env/flags after `db-sync`.
*/

import { OSV_SCANNER_SPEC } from "../catalog.js";
import type { ScannerRunContext, ScannerRunResult, ScannerWrapper } from "../spec.js";
import { DEFAULT_DEPS, invokeScanner, type WrapperDeps } from "./shared.js";
import { emptySarifFor } from "../spec.js";
import { DEFAULT_DEPS, parseSarifOrEmpty, type WrapperDeps } from "./shared.js";

const OSV_ARGS: readonly string[] = [
"scan",
"--format=sarif",
"--offline-vulnerabilities",
"--recursive",
".",
];
const OSV_ARGS: readonly string[] = ["scan", "source", "--format=sarif", "--recursive", "."];

/**
* Map an osv-scanner v2 exit code to an advisory note, or `undefined` when
* the exit represents a clean / findings-present run that needs no warning.
*/
export function osvExitAdvisory(exitCode: number, stderr: string): string | undefined {
// 0 = no findings; 1-126 = findings present (the normal non-zero outcome).
if (exitCode >= 0 && exitCode <= 126) return undefined;
if (exitCode === 127) {
return (
`${OSV_SCANNER_SPEC.id}: general error (exit 127). Common cause: the offline ` +
`vulnerability DB is missing — run \`codehub db-sync\` then retry, or omit ` +
`offline mode. stderr: ${truncate(stderr, 160)}`
);
}
if (exitCode === 128) {
// No packages discovered. Benign for repos with no lockfiles/manifests.
return `${OSV_SCANNER_SPEC.id}: no packages discovered (exit 128); nothing to scan.`;
}
return `${OSV_SCANNER_SPEC.id}: exit code ${exitCode}; stderr: ${truncate(stderr, 160)}`;
}

export function createOsvScannerWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrapper {
return {
spec: OSV_SCANNER_SPEC,
run: (ctx: ScannerRunContext): Promise<ScannerRunResult> =>
invokeScanner(OSV_SCANNER_SPEC, ctx, "osv-scanner", OSV_ARGS, deps),
run: async (ctx: ScannerRunContext): Promise<ScannerRunResult> => {
const started = performance.now();
const probe = await deps.which("osv-scanner");
if (!probe.found) {
const msg = `${OSV_SCANNER_SPEC.id}: binary 'osv-scanner' not found on PATH (install: ${OSV_SCANNER_SPEC.installCmd}).`;
ctx.onWarn?.(msg);
return {
spec: OSV_SCANNER_SPEC,
sarif: emptySarifFor(OSV_SCANNER_SPEC),
skipped: msg,
durationMs: performance.now() - started,
};
}
const result = await deps.runBinary("osv-scanner", OSV_ARGS, {
timeoutMs: ctx.timeoutMs,
cwd: ctx.projectPath,
});
const sarif = parseSarifOrEmpty(result.stdout, OSV_SCANNER_SPEC, ctx.onWarn);
const advisory = osvExitAdvisory(result.exitCode, result.stderr);
if (advisory !== undefined) ctx.onWarn?.(advisory);
return {
spec: OSV_SCANNER_SPEC,
sarif,
durationMs: performance.now() - started,
};
},
};
}

function truncate(s: string, max: number): string {
if (s.length <= max) return s.trim();
return `${s.slice(0, max).trim()}…`;
}
Loading
Loading