diff --git a/repository-release-embargo-controls/README.md b/repository-release-embargo-controls/README.md new file mode 100644 index 0000000..28dbe5e --- /dev/null +++ b/repository-release-embargo-controls/README.md @@ -0,0 +1,24 @@ +# Repository Release Embargo Controls + +This module adds a self-contained release readiness gate for access-controlled scientific repositories. It focuses on the part of Project Repository & Version Control where a repository steward needs to decide whether a version can be exported or published. + +## What it checks + +- Embargo windows before public release +- Restricted component access-policy coverage +- Large scientific artifacts that should be routed through LFS or object-store pointers +- DOI or reserved DOI metadata for citation stability +- Required reproducibility pipeline evidence +- Export bundle completeness for metadata, license, citation, manifest, and checksums +- Rollback snapshot readiness for the parent version +- Deterministic audit digest for review trails + +## Run locally + +```bash +npm run check +npm test +npm run demo +``` + +The demo uses `sample-data.json`, which intentionally blocks a public release because the release is still embargoed, a large restricted dataset is not routed through LFS, and the export bundle is missing checksums. diff --git a/repository-release-embargo-controls/demo.js b/repository-release-embargo-controls/demo.js new file mode 100644 index 0000000..3a6e79e --- /dev/null +++ b/repository-release-embargo-controls/demo.js @@ -0,0 +1,25 @@ +"use strict"; + +const sampleBundle = require("./sample-data.json"); +const { + analyzeRepositoryReleaseControls, +} = require("./src/repository-release-embargo-controls"); + +const result = analyzeRepositoryReleaseControls(sampleBundle); + +console.log(`Repository: ${result.repositoryName}`); +console.log(`Release: ${result.releaseVersion}`); +console.log(`Decision: ${result.releaseDecision}`); +console.log(`Audit digest: ${result.auditDigest}`); +console.log(""); +console.log("Checklist:"); +for (const item of result.releaseChecklist) { + console.log(`- ${item.name}: ${item.status}`); +} +console.log(""); +console.log("Findings:"); +for (const finding of result.findings) { + console.log(`- [${finding.severity}] ${finding.id}: ${finding.title}`); + console.log(` detail: ${finding.detail}`); + console.log(` remediation: ${finding.remediation}`); +} diff --git a/repository-release-embargo-controls/docs/repository-release-demo.mp4 b/repository-release-embargo-controls/docs/repository-release-demo.mp4 new file mode 100644 index 0000000..eebcf03 Binary files /dev/null and b/repository-release-embargo-controls/docs/repository-release-demo.mp4 differ diff --git a/repository-release-embargo-controls/docs/requirement-map.md b/repository-release-embargo-controls/docs/requirement-map.md new file mode 100644 index 0000000..9648d16 --- /dev/null +++ b/repository-release-embargo-controls/docs/requirement-map.md @@ -0,0 +1,16 @@ +# Requirement Map + +| Project Repository & Version Control requirement | Implementation | +| --- | --- | +| Structured scientific project repositories | `sample-data.json` models repository metadata, components, versions, access policies, release candidates, and reproducibility runs. | +| File, dataset, and notebook versioning | `componentManifest` normalizes typed files, notebooks, metadata, hashes, sizes, restrictions, and license fields. | +| Scientific commit/release readiness | `analyzeRepositoryReleaseControls` evaluates a release candidate against embargo, access, citation, reproducibility, export, and rollback controls. | +| Large file handling | `large-file-lfs-routing` blocks artifacts above 100 MB unless they have an LFS or object-store pointer. | +| Reproducibility checks | Required pipeline IDs must have latest passed runs with `sha256` output hashes. | +| DOI and citation support | DOI/reserved DOI and citation metadata are validated before export. | +| Exporting and archival | Required export files include metadata, license, citation, manifest, and checksums. | +| Rollback and auditability | Parent versions need rollback snapshot hashes, and every evaluation returns a deterministic `sha256` audit digest. | + +## Demo Video + +The PR includes `docs/repository-release-demo.mp4`, a real terminal walkthrough running the local check, test, and demo scripts. diff --git a/repository-release-embargo-controls/package.json b/repository-release-embargo-controls/package.json new file mode 100644 index 0000000..295c0c6 --- /dev/null +++ b/repository-release-embargo-controls/package.json @@ -0,0 +1,12 @@ +{ + "name": "repository-release-embargo-controls", + "version": "1.0.0", + "description": "Release readiness controls for access-controlled scientific repositories.", + "main": "src/repository-release-embargo-controls.js", + "scripts": { + "check": "node --check src/repository-release-embargo-controls.js && node --check test.js && node --check demo.js", + "test": "node test.js", + "demo": "node demo.js" + }, + "license": "MIT" +} diff --git a/repository-release-embargo-controls/sample-data.json b/repository-release-embargo-controls/sample-data.json new file mode 100644 index 0000000..263d7a8 --- /dev/null +++ b/repository-release-embargo-controls/sample-data.json @@ -0,0 +1,106 @@ +{ + "now": "2026-05-15T00:00:00.000Z", + "repository": { + "id": "repo-neuro-042", + "name": "Neurotherapy longitudinal response repository", + "steward": "dr.mendez@scibase.ai", + "embargoUntil": "2026-07-01T00:00:00.000Z", + "citation": { + "authors": ["A. Mendez", "J. Shah"], + "affiliations": ["SCIBASE Translational Neuroscience"], + "funding": ["NIH-R01-SCI-2042"] + } + }, + "components": [ + { + "path": "data/raw/cohort.csv", + "type": "human_subjects_dataset", + "bytes": 318767104, + "hash": "sha256:3c45b3a9ff7ebdbe8d8324f6fb52b04cda0d0d1d0bf8db346a04e23b63bbbc2f", + "restricted": true, + "lfsPointer": false, + "license": "DUA-2026-17" + }, + { + "path": "notebooks/response-model.ipynb", + "type": "analysis_notebook", + "bytes": 684512, + "hash": "sha256:623db1d2202f2a551a770471cfe01af0267758a19c38694d178287ff98c38d99", + "restricted": false, + "lfsPointer": false, + "license": "MIT" + }, + { + "path": "metadata/variables.json", + "type": "metadata", + "bytes": 18234, + "hash": "sha256:1483a0d69c817e45db648a55cc1eca6f18f23f99d20ecb72964ffab03b287d25", + "restricted": false, + "lfsPointer": false, + "license": "CC-BY-4.0" + } + ], + "versions": [ + { + "version": "v2.0.0", + "tag": "dataset-baseline", + "createdAt": "2026-04-01T12:30:00.000Z", + "rollbackSnapshotHash": "sha256:dd338f3433b08fda89ab11da7f54f840b5e832be4a47d1833911b1fb4d4c1dd8" + } + ], + "releaseCandidate": { + "version": "v2.1.0", + "visibility": "public", + "parentVersion": "v2.0.0", + "semanticTag": "dataset-minor", + "reservedDoi": "10.5281/zenodo.2002042", + "embargoUntil": "2026-07-01T00:00:00.000Z", + "requiredPipelineIds": [ + "pipeline:reproduce-figures", + "pipeline:privacy-regression" + ], + "manifest": [ + { + "path": "data/raw/cohort.csv", + "restricted": true + }, + { + "path": "notebooks/response-model.ipynb", + "restricted": false + }, + { + "path": "metadata/variables.json", + "restricted": false + } + ], + "exportFiles": [ + "metadata.json", + "LICENSE", + "CITATION.cff", + "manifest.json" + ] + }, + "reproducibilityRuns": [ + { + "pipelineId": "pipeline:reproduce-figures", + "status": "passed", + "completedAt": "2026-05-12T18:10:00.000Z", + "outputHash": "sha256:a97eaf103155fd400855d94c0ea50adfb245d51a6d0218736b176f817d4a1433" + }, + { + "pipelineId": "pipeline:privacy-regression", + "status": "passed", + "completedAt": "2026-05-12T18:33:00.000Z", + "outputHash": "sha256:3d0ff9fb5821faa229263f965ae6195d103439812c5f48f93ff8082af7c4f016" + } + ], + "accessPolicies": [ + { + "id": "dua-neurotherapy-2026", + "status": "active", + "steward": "dr.mendez@scibase.ai", + "allowedRoles": ["repository-steward", "privacy-reviewer"], + "expiresAt": "2026-12-31T23:59:59.000Z" + } + ] +} diff --git a/repository-release-embargo-controls/src/repository-release-embargo-controls.js b/repository-release-embargo-controls/src/repository-release-embargo-controls.js new file mode 100644 index 0000000..7510fe9 --- /dev/null +++ b/repository-release-embargo-controls/src/repository-release-embargo-controls.js @@ -0,0 +1,366 @@ +"use strict"; + +const crypto = require("node:crypto"); + +const LARGE_FILE_BYTES = 100 * 1024 * 1024; +const REQUIRED_EXPORT_FILES = [ + "metadata.json", + "LICENSE", + "CITATION.cff", + "manifest.json", + "checksums.sha256", +]; + +function stableStringify(value) { + if (Array.isArray(value)) { + return `[${value.map(stableStringify).join(",")}]`; + } + if (value && typeof value === "object") { + return `{${Object.keys(value) + .sort() + .map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`) + .join(",")}}`; + } + return JSON.stringify(value); +} + +function stableHash(value) { + return crypto.createHash("sha256").update(stableStringify(value)).digest("hex"); +} + +function requireFields(object, fields, label) { + const missing = fields.filter((field) => object[field] === undefined || object[field] === null); + if (missing.length > 0) { + throw new Error(`${label} is missing required field(s): ${missing.join(", ")}`); + } +} + +function createFinding(severity, id, title, detail, remediation) { + return { severity, id, title, detail, remediation }; +} + +function normalizeDate(value) { + if (!value) return null; + const parsed = new Date(value); + if (Number.isNaN(parsed.getTime())) { + throw new Error(`Invalid date: ${value}`); + } + return parsed; +} + +function assertBundle(bundle) { + requireFields(bundle, [ + "repository", + "components", + "versions", + "releaseCandidate", + "reproducibilityRuns", + "accessPolicies", + ], "release control bundle"); + requireFields(bundle.repository, ["id", "name", "steward"], "repository"); + requireFields(bundle.releaseCandidate, [ + "version", + "visibility", + "parentVersion", + "manifest", + "exportFiles", + "requiredPipelineIds", + ], "release candidate"); +} + +function buildComponentManifest(components) { + return components.map((component) => { + requireFields(component, ["path", "type", "bytes", "hash"], `component ${component.path || "(unknown)"}`); + return { + path: component.path, + type: component.type, + bytes: component.bytes, + hash: component.hash, + lfsPointer: Boolean(component.lfsPointer), + restricted: Boolean(component.restricted), + license: component.license || "unspecified", + }; + }); +} + +function evaluateComponents(componentManifest) { + const findings = []; + + for (const component of componentManifest) { + if (component.bytes >= LARGE_FILE_BYTES && !component.lfsPointer) { + findings.push(createFinding( + "blocker", + "large-file-lfs-routing", + "Large repository artifact is not routed through LFS", + `${component.path} is ${(component.bytes / (1024 * 1024)).toFixed(1)} MB and has no LFS pointer.`, + "Store the large artifact through Git LFS or an equivalent object-store pointer before release.", + )); + } + + if (!component.hash.startsWith("sha256:")) { + findings.push(createFinding( + "blocker", + "component-hash-format", + "Component hash is not a sha256 digest", + `${component.path} has hash ${component.hash}.`, + "Record every release component as sha256: in the manifest.", + )); + } + } + + return findings; +} + +function evaluateEmbargo(repository, releaseCandidate, accessPolicies, now) { + const findings = []; + const embargoUntil = normalizeDate(releaseCandidate.embargoUntil || repository.embargoUntil); + const activeRestrictedComponent = releaseCandidate.manifest.some((entry) => entry.restricted); + const hasActivePolicy = accessPolicies.some((policy) => { + const expiresAt = normalizeDate(policy.expiresAt); + return policy.status === "active" && (!expiresAt || expiresAt > now); + }); + + if (releaseCandidate.visibility === "public" && embargoUntil && embargoUntil > now) { + findings.push(createFinding( + "blocker", + "active-embargo-window", + "Release is public before the embargo expires", + `${releaseCandidate.version} is public but embargoed until ${embargoUntil.toISOString()}.`, + "Keep the release private or move the public release date after the embargo window.", + )); + } + + if (activeRestrictedComponent && !hasActivePolicy) { + findings.push(createFinding( + "blocker", + "restricted-access-policy-missing", + "Restricted artifacts have no active access policy", + "The manifest contains restricted data but no active repository access policy covers the release.", + "Attach an active access policy with steward, allowed roles, and expiry before publishing.", + )); + } + + return findings; +} + +function evaluateCitation(repository, releaseCandidate) { + const findings = []; + const hasDoi = Boolean(releaseCandidate.doi || releaseCandidate.reservedDoi || repository.doi); + + if (!hasDoi) { + findings.push(createFinding( + "blocker", + "doi-or-reservation-missing", + "Release has no DOI or reserved DOI", + "Scientific releases need a stable identifier before export.", + "Reserve or attach a DOI for the release candidate.", + )); + } + + if (!releaseCandidate.semanticTag) { + findings.push(createFinding( + "warning", + "semantic-version-tag-missing", + "Release lacks semantic scientific tag metadata", + "The candidate has a version but no dataset/model/protocol semantic change tag.", + "Add a semantic tag that explains the scientific scope of the version.", + )); + } + + const citation = releaseCandidate.citation || repository.citation || {}; + for (const field of ["authors", "affiliations", "funding"]) { + if (!Array.isArray(citation[field]) || citation[field].length === 0) { + findings.push(createFinding( + "warning", + `citation-${field}-missing`, + `Citation ${field} are missing`, + `The release citation has no ${field} entries.`, + `Add ${field} metadata to CITATION.cff before release.`, + )); + } + } + + return findings; +} + +function latestRunByPipeline(reproducibilityRuns) { + const latest = new Map(); + for (const run of reproducibilityRuns) { + const previous = latest.get(run.pipelineId); + if (!previous || new Date(run.completedAt) > new Date(previous.completedAt)) { + latest.set(run.pipelineId, run); + } + } + return latest; +} + +function evaluateReproducibility(reproducibilityRuns, requiredPipelineIds) { + const findings = []; + const latest = latestRunByPipeline(reproducibilityRuns); + + for (const pipelineId of requiredPipelineIds) { + const run = latest.get(pipelineId); + if (!run) { + findings.push(createFinding( + "blocker", + "reproducibility-run-missing", + "Required reproducibility pipeline has not run", + `${pipelineId} has no recorded execution for this release.`, + "Run the required reproducibility pipeline and attach its evidence hash.", + )); + continue; + } + + if (run.status !== "passed") { + findings.push(createFinding( + "blocker", + "reproducibility-run-failed", + "Required reproducibility pipeline did not pass", + `${pipelineId} completed with status ${run.status}.`, + "Fix the pipeline or release inputs, then rerun before publishing.", + )); + } + + if (!run.outputHash || !run.outputHash.startsWith("sha256:")) { + findings.push(createFinding( + "blocker", + "reproducibility-evidence-hash-missing", + "Reproducibility evidence hash is missing", + `${pipelineId} does not include a sha256 output evidence hash.`, + "Attach a deterministic hash of the pipeline outputs.", + )); + } + } + + return findings; +} + +function evaluateExportBundle(releaseCandidate, componentManifest) { + const findings = []; + const manifestPaths = new Set(releaseCandidate.manifest.map((entry) => entry.path)); + const componentPaths = new Set(componentManifest.map((component) => component.path)); + const exportedFiles = new Set(releaseCandidate.exportFiles); + + for (const componentPath of componentPaths) { + if (!manifestPaths.has(componentPath)) { + findings.push(createFinding( + "blocker", + "manifest-component-missing", + "Release manifest omits a repository component", + `${componentPath} is present in the repository but absent from the release manifest.`, + "Regenerate the manifest so every released component is included.", + )); + } + } + + for (const requiredFile of REQUIRED_EXPORT_FILES) { + if (!exportedFiles.has(requiredFile)) { + findings.push(createFinding( + "blocker", + "export-bundle-incomplete", + "Release export bundle is missing required metadata", + `${requiredFile} is absent from the export bundle.`, + "Include metadata, license, citation, manifest, and checksum files in every export bundle.", + )); + } + } + + return findings; +} + +function evaluateRollback(versions, releaseCandidate) { + const findings = []; + const parent = versions.find((version) => version.version === releaseCandidate.parentVersion); + + if (!parent) { + findings.push(createFinding( + "blocker", + "parent-version-missing", + "Release has no known rollback parent", + `${releaseCandidate.parentVersion} is not present in the version ledger.`, + "Record the parent version before releasing a new version.", + )); + } else if (!parent.rollbackSnapshotHash || !parent.rollbackSnapshotHash.startsWith("sha256:")) { + findings.push(createFinding( + "blocker", + "rollback-snapshot-missing", + "Rollback snapshot hash is missing", + `${parent.version} cannot be restored deterministically.`, + "Store a sha256 rollback snapshot hash for the parent release.", + )); + } + + return findings; +} + +function buildChecklist(findings) { + const blockers = new Set(findings.filter((finding) => finding.severity === "blocker").map((finding) => finding.id)); + const warnings = new Set(findings.filter((finding) => finding.severity === "warning").map((finding) => finding.id)); + + return [ + ["embargo", "active-embargo-window"], + ["access-policy", "restricted-access-policy-missing"], + ["large-file-routing", "large-file-lfs-routing"], + ["citation", "doi-or-reservation-missing"], + ["reproducibility", "reproducibility-run-failed"], + ["export-bundle", "export-bundle-incomplete"], + ["rollback", "rollback-snapshot-missing"], + ].map(([name, findingId]) => ({ + name, + status: blockers.has(findingId) ? "blocked" : warnings.has(findingId) ? "review" : "passed", + })); +} + +function decideRelease(findings) { + if (findings.some((finding) => finding.severity === "blocker")) { + return "blocked"; + } + if (findings.some((finding) => finding.severity === "warning")) { + return "manual-review"; + } + return "release-ready"; +} + +function analyzeRepositoryReleaseControls(bundle, options = {}) { + assertBundle(bundle); + + const now = normalizeDate(options.now || bundle.now || new Date().toISOString()); + const componentManifest = buildComponentManifest(bundle.components); + const findings = [ + ...evaluateComponents(componentManifest), + ...evaluateEmbargo(bundle.repository, bundle.releaseCandidate, bundle.accessPolicies, now), + ...evaluateCitation(bundle.repository, bundle.releaseCandidate), + ...evaluateReproducibility(bundle.reproducibilityRuns, bundle.releaseCandidate.requiredPipelineIds), + ...evaluateExportBundle(bundle.releaseCandidate, componentManifest), + ...evaluateRollback(bundle.versions, bundle.releaseCandidate), + ]; + const releaseChecklist = buildChecklist(findings); + const releaseDecision = decideRelease(findings); + const auditDigest = stableHash({ + repositoryId: bundle.repository.id, + version: bundle.releaseCandidate.version, + releaseDecision, + findings, + componentManifest, + releaseChecklist, + }); + + return { + repositoryId: bundle.repository.id, + repositoryName: bundle.repository.name, + evaluatedAt: now.toISOString(), + releaseVersion: bundle.releaseCandidate.version, + releaseDecision, + findings, + componentManifest, + releaseChecklist, + auditDigest: `sha256:${auditDigest}`, + }; +} + +module.exports = { + REQUIRED_EXPORT_FILES, + analyzeRepositoryReleaseControls, + stableHash, + stableStringify, +}; diff --git a/repository-release-embargo-controls/test.js b/repository-release-embargo-controls/test.js new file mode 100644 index 0000000..0dabf8c --- /dev/null +++ b/repository-release-embargo-controls/test.js @@ -0,0 +1,48 @@ +"use strict"; + +const assert = require("node:assert/strict"); +const sampleBundle = require("./sample-data.json"); +const { + analyzeRepositoryReleaseControls, + stableHash, +} = require("./src/repository-release-embargo-controls"); + +function clone(value) { + return JSON.parse(JSON.stringify(value)); +} + +function byId(result) { + return new Set(result.findings.map((finding) => finding.id)); +} + +const blocked = analyzeRepositoryReleaseControls(sampleBundle); +const blockedIds = byId(blocked); + +assert.equal(blocked.releaseDecision, "blocked"); +assert.match(blocked.auditDigest, /^sha256:[a-f0-9]{64}$/); +assert(blockedIds.has("active-embargo-window")); +assert(blockedIds.has("large-file-lfs-routing")); +assert(blockedIds.has("export-bundle-incomplete")); +assert.equal(blocked.componentManifest.length, 3); +assert.equal(blocked.releaseChecklist.find((item) => item.name === "embargo").status, "blocked"); + +const readyBundle = clone(sampleBundle); +readyBundle.releaseCandidate.visibility = "private"; +readyBundle.releaseCandidate.exportFiles.push("checksums.sha256"); +readyBundle.components[0].lfsPointer = true; + +const ready = analyzeRepositoryReleaseControls(readyBundle); +assert.equal(ready.releaseDecision, "release-ready"); +assert.equal(ready.findings.length, 0); +assert.equal(ready.releaseChecklist.every((item) => item.status === "passed"), true); + +const failedPipelineBundle = clone(readyBundle); +failedPipelineBundle.reproducibilityRuns[1].status = "failed"; +const failedPipeline = analyzeRepositoryReleaseControls(failedPipelineBundle); +assert.equal(failedPipeline.releaseDecision, "blocked"); +assert(byId(failedPipeline).has("reproducibility-run-failed")); + +assert.notEqual(blocked.auditDigest, ready.auditDigest); +assert.equal(stableHash({ b: 1, a: 2 }), stableHash({ a: 2, b: 1 })); + +console.log("repository release embargo control tests passed");