diff --git a/packages/cli/src/commands/scan/handle-create-new-scan.mts b/packages/cli/src/commands/scan/handle-create-new-scan.mts index 51d7fa98b..7ca986024 100644 --- a/packages/cli/src/commands/scan/handle-create-new-scan.mts +++ b/packages/cli/src/commands/scan/handle-create-new-scan.mts @@ -31,6 +31,7 @@ import { runSocketBasics } from '../../utils/basics/spawn.mts' function excludeFactsJson(paths: string[]): string[] { return paths.filter(p => path.basename(p) !== DOT_SOCKET_DOT_FACTS_JSON) } +import { compressSocketFactsForUpload } from '../../utils/coana/compress-facts.mts' import { findSocketYmlSync } from '../../utils/config.mts' import { getPackageFilesForScan } from '../../utils/fs/path-resolve.mts' import { readOrDefaultSocketJson } from '../../utils/socket/json.mts' @@ -290,28 +291,40 @@ export async function handleCreateNewScan({ } } - const fullScanCResult = await fetchCreateOrgFullScan( - scanPaths, - orgSlug, - { - commitHash, - commitMessage, - committers, - pullRequest, - repoName, - branchName, - scanType: reach.runReachabilityAnalysis - ? SCAN_TYPE_SOCKET_TIER1 - : SCAN_TYPE_SOCKET, - workspace, - }, - { - cwd, - defaultBranch, - pendingHead, - tmp, - }, - ) + // Brotli-compress any .socket.facts.json paths in scanPaths just before + // upload. depscan's api-v0 multipart boundary streams brotli decode based + // on the .br filename suffix. Coana keeps writing plain .socket.facts.json + // on disk, so the local read path (extractTier1ReachabilityScanId) stays + // correct. The cleanup() in the finally block removes the sibling .br + // files whether the upload succeeded or threw. + const compressed = await compressSocketFactsForUpload(scanPaths) + let fullScanCResult: Awaited> + try { + fullScanCResult = await fetchCreateOrgFullScan( + compressed.paths, + orgSlug, + { + commitHash, + commitMessage, + committers, + pullRequest, + repoName, + branchName, + scanType: reach.runReachabilityAnalysis + ? SCAN_TYPE_SOCKET_TIER1 + : SCAN_TYPE_SOCKET, + workspace, + }, + { + cwd, + defaultBranch, + pendingHead, + tmp, + }, + ) + } finally { + await compressed.cleanup() + } const scanId = fullScanCResult.ok ? fullScanCResult.data?.id : undefined diff --git a/packages/cli/src/utils/coana/compress-facts.mts b/packages/cli/src/utils/coana/compress-facts.mts new file mode 100644 index 000000000..0bced004b --- /dev/null +++ b/packages/cli/src/utils/coana/compress-facts.mts @@ -0,0 +1,90 @@ +/** + * Brotli compression for Coana facts files prior to upload. + * + * Key Functions: + * - compressSocketFactsForUpload: Brotli-compress any .socket.facts.json + * entries in scanPaths just before upload, returning swapped paths plus a + * cleanup callback. Coana keeps writing plain JSON; the on-the-wire form + * to depscan is brotli (api-v0 decodes at the multipart boundary). + * + * Integration: + * - Called from handleCreateNewScan immediately before fetchCreateOrgFullScan. + * - Sibling .br files live next to the source so the multipart entry name + * stays inside cwd (depscan strips .. traversal entries). + */ + +import { createReadStream, createWriteStream, existsSync } from 'node:fs' +import path from 'node:path' +import { pipeline } from 'node:stream/promises' +import { createBrotliCompress } from 'node:zlib' + +import { safeDelete } from '@socketsecurity/lib/fs' + +import { DOT_SOCKET_DOT_FACTS_JSON } from '../../constants.mts' + +export type CompressedScanPaths = { + cleanup: () => Promise + paths: string[] +} + +/** + * For each `.socket.facts.json` in `scanPaths`, stream-brotli-compress a + * sibling `.socket.facts.json.br` next to the original file and swap its + * path in. Other paths pass through unchanged. Missing files also pass + * through unchanged (the upload will fail downstream with the same error + * it would have). + * + * Streaming + worker-thread compression keeps the event loop responsive: + * default brotli quality (11) on a 60+MB facts file takes multiple seconds + * of CPU, which would otherwise freeze the spinner / signal handlers / + * any concurrent work. + * + * The `.br` lives next to the source rather than under the OS temp dir + * because depscan's multipart ingest (`addStreamEntry`) rejects entries + * whose names contain `..` traversal segments. The SDK computes the + * multipart entry name via `path.relative(cwd, brPath)`, so an OS-tmpdir + * temp path turns into `../../../var/folders/...` and gets dropped as + * `unmatchedFiles`. Sibling-write keeps the relative path inside cwd, and + * keeps the directory shape symmetric with the plain `.socket.facts.json` + * upload (depscan strips only the `.br` suffix at ingest, so + * `/.socket.facts.json.br` and `/.socket.facts.json` resolve to + * the same storage path). + * + * Concurrent scans against the same source directory are already racy on + * `.socket.facts.json` itself (coana writes to a single path), so the + * sibling `.br` doesn't introduce a new race. + * + * Caller MUST `await cleanup()` (typically in a `finally` block) once the + * upload completes — successful or not — to remove the sibling files. + */ +export async function compressSocketFactsForUpload( + scanPaths: string[], +): Promise { + const brPaths: string[] = [] + const paths = await Promise.all( + scanPaths.map(async p => { + if (path.basename(p) !== DOT_SOCKET_DOT_FACTS_JSON) { + return p + } + if (!existsSync(p)) { + return p + } + const brPath = `${p}.br` + await pipeline( + createReadStream(p), + createBrotliCompress(), + createWriteStream(brPath), + ) + brPaths.push(brPath) + return brPath + }), + ) + const cleanup = async () => { + const targets = brPaths.splice(0) + if (targets.length === 0) { + return + } + await safeDelete(targets, { force: true }) + } + return { __proto__: null, cleanup, paths } as CompressedScanPaths +} diff --git a/packages/cli/test/unit/utils/coana/compress-facts.test.mts b/packages/cli/test/unit/utils/coana/compress-facts.test.mts new file mode 100644 index 000000000..47984d8ae --- /dev/null +++ b/packages/cli/test/unit/utils/coana/compress-facts.test.mts @@ -0,0 +1,120 @@ +/** + * Unit tests for Coana facts-file brotli compression. + * + * Test Coverage: + * - compressSocketFactsForUpload: swaps .socket.facts.json paths for + * brotli-compressed .br temps, leaves other paths alone, cleans up. + * + * Related Files: + * - utils/coana/compress-facts.mts (implementation) + */ + +import { + existsSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from 'node:fs' +import { tmpdir } from 'node:os' +import path from 'node:path' +import { brotliDecompressSync } from 'node:zlib' + +import { describe, expect, it } from 'vitest' + +import { compressSocketFactsForUpload } from '../../../../src/utils/coana/compress-facts.mts' + +describe('compress-facts', () => { + describe('compressSocketFactsForUpload', () => { + it('writes brotli .br as a sibling of the source file', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const inputPath = path.join(wrapDir, '.socket.facts.json') + const payload = { tier1ReachabilityScanId: 'compress-test', a: 1, b: 2 } + writeFileSync(inputPath, JSON.stringify(payload)) + + try { + const result = await compressSocketFactsForUpload([inputPath]) + const swappedPath = result.paths[0]! + + expect(result.paths).toHaveLength(1) + expect(swappedPath).toBe(`${inputPath}.br`) + expect(existsSync(swappedPath)).toBe(true) + // The sibling file is real brotli that round-trips to the original + // JSON. + const roundTripped = brotliDecompressSync( + readFileSync(swappedPath), + ).toString('utf8') + expect(JSON.parse(roundTripped)).toEqual(payload) + + // Cleanup removes the sibling .br file but leaves the source intact. + await result.cleanup() + expect(existsSync(swappedPath)).toBe(false) + expect(existsSync(inputPath)).toBe(true) + } finally { + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('leaves non-facts paths unchanged', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const lock = path.join(wrapDir, 'package-lock.json') + const pkg = path.join(wrapDir, 'package.json') + writeFileSync(lock, '{}') + writeFileSync(pkg, '{}') + + const result = await compressSocketFactsForUpload([lock, pkg]) + try { + expect(result.paths).toEqual([lock, pkg]) + } finally { + await result.cleanup() + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('leaves a missing .socket.facts.json path unchanged', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const missingFacts = path.join(wrapDir, '.socket.facts.json') + // Note: no writeFileSync — file does not exist. + + const result = await compressSocketFactsForUpload([missingFacts]) + try { + expect(result.paths).toEqual([missingFacts]) + } finally { + await result.cleanup() + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('mixes facts and non-facts entries correctly', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const facts = path.join(wrapDir, '.socket.facts.json') + const lock = path.join(wrapDir, 'package-lock.json') + writeFileSync(facts, JSON.stringify({ tier1ReachabilityScanId: 'mix' })) + writeFileSync(lock, '{"name":"x"}') + + const result = await compressSocketFactsForUpload([lock, facts]) + try { + expect(result.paths[0]).toBe(lock) + expect(result.paths[1]).toBe(`${facts}.br`) + const roundTripped = JSON.parse( + brotliDecompressSync(readFileSync(result.paths[1]!)).toString('utf8'), + ) + expect(roundTripped.tier1ReachabilityScanId).toBe('mix') + } finally { + await result.cleanup() + rmSync(wrapDir, { recursive: true, force: true }) + } + }) + + it('cleanup is idempotent (safe to call twice)', async () => { + const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) + const facts = path.join(wrapDir, '.socket.facts.json') + writeFileSync(facts, JSON.stringify({ tier1ReachabilityScanId: 'idem' })) + + const result = await compressSocketFactsForUpload([facts]) + await result.cleanup() + await expect(result.cleanup()).resolves.not.toThrow() + rmSync(wrapDir, { recursive: true, force: true }) + }) + }) +})