-
Notifications
You must be signed in to change notification settings - Fork 41
feat(scan): brotli-compress .socket.facts.json on upload (port of #1291) #1305
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| /** | ||
| * Brotli compression for Coana facts files prior to upload. | ||
| * | ||
| * Key Functions: | ||
| * - compressSocketFactsForUpload: Brotli-compress any .socket.facts.json | ||
| * entries in scanPaths just before upload, returning swapped paths plus a | ||
| * cleanup callback. Coana keeps writing plain JSON; the on-the-wire form | ||
| * to depscan is brotli (api-v0 decodes at the multipart boundary). | ||
| * | ||
| * Integration: | ||
| * - Called from handleCreateNewScan immediately before fetchCreateOrgFullScan. | ||
| * - Sibling .br files live next to the source so the multipart entry name | ||
| * stays inside cwd (depscan strips .. traversal entries). | ||
| */ | ||
|
|
||
| import { createReadStream, createWriteStream, existsSync } from 'node:fs' | ||
| import path from 'node:path' | ||
| import { pipeline } from 'node:stream/promises' | ||
| import { createBrotliCompress } from 'node:zlib' | ||
|
|
||
| import { safeDelete } from '@socketsecurity/lib/fs' | ||
|
|
||
| import { DOT_SOCKET_DOT_FACTS_JSON } from '../../constants.mts' | ||
|
|
||
| export type CompressedScanPaths = { | ||
| cleanup: () => Promise<void> | ||
| paths: string[] | ||
| } | ||
|
|
||
| /** | ||
| * For each `.socket.facts.json` in `scanPaths`, stream-brotli-compress a | ||
| * sibling `.socket.facts.json.br` next to the original file and swap its | ||
| * path in. Other paths pass through unchanged. Missing files also pass | ||
| * through unchanged (the upload will fail downstream with the same error | ||
| * it would have). | ||
| * | ||
| * Streaming + worker-thread compression keeps the event loop responsive: | ||
| * default brotli quality (11) on a 60+MB facts file takes multiple seconds | ||
| * of CPU, which would otherwise freeze the spinner / signal handlers / | ||
| * any concurrent work. | ||
| * | ||
| * The `.br` lives next to the source rather than under the OS temp dir | ||
| * because depscan's multipart ingest (`addStreamEntry`) rejects entries | ||
| * whose names contain `..` traversal segments. The SDK computes the | ||
| * multipart entry name via `path.relative(cwd, brPath)`, so an OS-tmpdir | ||
| * temp path turns into `../../../var/folders/...` and gets dropped as | ||
| * `unmatchedFiles`. Sibling-write keeps the relative path inside cwd, and | ||
| * keeps the directory shape symmetric with the plain `.socket.facts.json` | ||
| * upload (depscan strips only the `.br` suffix at ingest, so | ||
| * `<dir>/.socket.facts.json.br` and `<dir>/.socket.facts.json` resolve to | ||
| * the same storage path). | ||
| * | ||
| * Concurrent scans against the same source directory are already racy on | ||
| * `.socket.facts.json` itself (coana writes to a single path), so the | ||
| * sibling `.br` doesn't introduce a new race. | ||
| * | ||
| * Caller MUST `await cleanup()` (typically in a `finally` block) once the | ||
| * upload completes — successful or not — to remove the sibling files. | ||
| */ | ||
| export async function compressSocketFactsForUpload( | ||
| scanPaths: string[], | ||
| ): Promise<CompressedScanPaths> { | ||
| const brPaths: string[] = [] | ||
| const paths = await Promise.all( | ||
| scanPaths.map(async p => { | ||
| if (path.basename(p) !== DOT_SOCKET_DOT_FACTS_JSON) { | ||
| return p | ||
| } | ||
| if (!existsSync(p)) { | ||
| return p | ||
| } | ||
| const brPath = `${p}.br` | ||
| await pipeline( | ||
| createReadStream(p), | ||
| createBrotliCompress(), | ||
| createWriteStream(brPath), | ||
| ) | ||
| brPaths.push(brPath) | ||
| return brPath | ||
| }), | ||
| ) | ||
| const cleanup = async () => { | ||
| const targets = brPaths.splice(0) | ||
| if (targets.length === 0) { | ||
| return | ||
| } | ||
| await safeDelete(targets, { force: true }) | ||
| } | ||
| return { __proto__: null, cleanup, paths } as CompressedScanPaths | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,120 @@ | ||
| /** | ||
| * Unit tests for Coana facts-file brotli compression. | ||
| * | ||
| * Test Coverage: | ||
| * - compressSocketFactsForUpload: swaps .socket.facts.json paths for | ||
| * brotli-compressed .br temps, leaves other paths alone, cleans up. | ||
| * | ||
| * Related Files: | ||
| * - utils/coana/compress-facts.mts (implementation) | ||
| */ | ||
|
|
||
| import { | ||
| existsSync, | ||
| mkdtempSync, | ||
| readFileSync, | ||
| rmSync, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test file uses
|
||
| writeFileSync, | ||
| } from 'node:fs' | ||
| import { tmpdir } from 'node:os' | ||
| import path from 'node:path' | ||
| import { brotliDecompressSync } from 'node:zlib' | ||
|
|
||
| import { describe, expect, it } from 'vitest' | ||
|
|
||
| import { compressSocketFactsForUpload } from '../../../../src/utils/coana/compress-facts.mts' | ||
|
|
||
| describe('compress-facts', () => { | ||
| describe('compressSocketFactsForUpload', () => { | ||
| it('writes brotli .br as a sibling of the source file', async () => { | ||
| const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) | ||
| const inputPath = path.join(wrapDir, '.socket.facts.json') | ||
| const payload = { tier1ReachabilityScanId: 'compress-test', a: 1, b: 2 } | ||
| writeFileSync(inputPath, JSON.stringify(payload)) | ||
|
|
||
| try { | ||
| const result = await compressSocketFactsForUpload([inputPath]) | ||
| const swappedPath = result.paths[0]! | ||
|
|
||
| expect(result.paths).toHaveLength(1) | ||
| expect(swappedPath).toBe(`${inputPath}.br`) | ||
| expect(existsSync(swappedPath)).toBe(true) | ||
| // The sibling file is real brotli that round-trips to the original | ||
| // JSON. | ||
| const roundTripped = brotliDecompressSync( | ||
| readFileSync(swappedPath), | ||
| ).toString('utf8') | ||
| expect(JSON.parse(roundTripped)).toEqual(payload) | ||
|
|
||
| // Cleanup removes the sibling .br file but leaves the source intact. | ||
| await result.cleanup() | ||
| expect(existsSync(swappedPath)).toBe(false) | ||
| expect(existsSync(inputPath)).toBe(true) | ||
| } finally { | ||
| rmSync(wrapDir, { recursive: true, force: true }) | ||
| } | ||
| }) | ||
|
|
||
| it('leaves non-facts paths unchanged', async () => { | ||
| const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) | ||
| const lock = path.join(wrapDir, 'package-lock.json') | ||
| const pkg = path.join(wrapDir, 'package.json') | ||
| writeFileSync(lock, '{}') | ||
| writeFileSync(pkg, '{}') | ||
|
|
||
| const result = await compressSocketFactsForUpload([lock, pkg]) | ||
| try { | ||
| expect(result.paths).toEqual([lock, pkg]) | ||
| } finally { | ||
| await result.cleanup() | ||
| rmSync(wrapDir, { recursive: true, force: true }) | ||
| } | ||
| }) | ||
|
|
||
| it('leaves a missing .socket.facts.json path unchanged', async () => { | ||
| const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) | ||
| const missingFacts = path.join(wrapDir, '.socket.facts.json') | ||
| // Note: no writeFileSync — file does not exist. | ||
|
|
||
| const result = await compressSocketFactsForUpload([missingFacts]) | ||
| try { | ||
| expect(result.paths).toEqual([missingFacts]) | ||
| } finally { | ||
| await result.cleanup() | ||
| rmSync(wrapDir, { recursive: true, force: true }) | ||
| } | ||
| }) | ||
|
|
||
| it('mixes facts and non-facts entries correctly', async () => { | ||
| const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) | ||
| const facts = path.join(wrapDir, '.socket.facts.json') | ||
| const lock = path.join(wrapDir, 'package-lock.json') | ||
| writeFileSync(facts, JSON.stringify({ tier1ReachabilityScanId: 'mix' })) | ||
| writeFileSync(lock, '{"name":"x"}') | ||
|
|
||
| const result = await compressSocketFactsForUpload([lock, facts]) | ||
| try { | ||
| expect(result.paths[0]).toBe(lock) | ||
| expect(result.paths[1]).toBe(`${facts}.br`) | ||
| const roundTripped = JSON.parse( | ||
| brotliDecompressSync(readFileSync(result.paths[1]!)).toString('utf8'), | ||
| ) | ||
| expect(roundTripped.tier1ReachabilityScanId).toBe('mix') | ||
| } finally { | ||
| await result.cleanup() | ||
| rmSync(wrapDir, { recursive: true, force: true }) | ||
| } | ||
| }) | ||
|
|
||
| it('cleanup is idempotent (safe to call twice)', async () => { | ||
| const wrapDir = mkdtempSync(path.join(tmpdir(), 'socket-coana-wrap-')) | ||
| const facts = path.join(wrapDir, '.socket.facts.json') | ||
| writeFileSync(facts, JSON.stringify({ tier1ReachabilityScanId: 'idem' })) | ||
|
|
||
| const result = await compressSocketFactsForUpload([facts]) | ||
| await result.cleanup() | ||
| await expect(result.cleanup()).resolves.not.toThrow() | ||
| rmSync(wrapDir, { recursive: true, force: true }) | ||
| }) | ||
| }) | ||
| }) | ||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Orphaned
.brfiles on compression failureLow Severity
If
pipelinethrows for any.socket.facts.jsonentry (e.g., disk-full, I/O error), thePromise.allrejects andcompressSocketFactsForUploadthrows before returning thecleanupcallback. Any.brfiles already created by completed sibling pipelines, or partially written by the failingcreateWriteStream, are orphaned on disk with no cleanup path available to the caller.Reviewed by Cursor Bugbot for commit 4aef3ec. Configure here.