diff --git a/execution-environment-drift-governance/README.md b/execution-environment-drift-governance/README.md new file mode 100644 index 0000000..5f9acf5 --- /dev/null +++ b/execution-environment-drift-governance/README.md @@ -0,0 +1,66 @@ +# Execution Environment Drift Governance + +This module is a focused slice for SCIBASE.AI issue #14, **Scientific/Engineering Data & Code Hosting**. It covers the reproducible execution side of data/code hosting: environment fingerprints, dependency drift, artifact-to-runtime readiness, scheduled rerun decisions, retention/export evidence, and reviewer-ready requirement mapping. + +It is intentionally dependency-free and credential-free. The sample data is synthetic. + +## What It Adds + +- Deterministic artifact capsules for datasets, notebooks, JSON supplements, scripts, and media. +- Metadata-aware preview decisions for notebook, tabular, JSON, image, code, and generic artifacts. +- FAIR scoring plus JSON-LD and DataCite payload generation for DOI/export readiness. +- Container/runtime fingerprinting with pinned image checks, dependency version drift, GPU capability changes, and network policy drift. +- Compute trigger planning for scheduled reruns and "reproduce results" workflows. +- Retention/export evidence for regulated artifacts, embargoes, DOI readiness, and audit digests. +- Requirement mapping back to issue #14 for storage, metadata, executable environments, compute triggers, and export governance. + +## Demo + +```bash +npm run demo +``` + +The demo writes: + +```text +demo/execution-drift-governance-packet.json +``` + +That packet includes artifact digests, environment drift decisions, rerun actions, retention/export evidence, and the requirement map. + +A short visual demo is included at: + +```text +demo/execution-drift-governance-demo.mp4 +``` + +## Tests + +```bash +npm test +``` + +The tests cover: + +- Stable hashing regardless of object key order. +- Artifact type detection. +- Environment fingerprint determinism. +- Blocking unpinned container images and `latest` dependencies. +- Restricted human-subject artifact gating. +- Safe scheduled rerun decisions. +- Retention/export evidence and requirement mapping. + +## Requirement Map + +| Issue #14 requirement | Evidence in this module | +| --- | --- | +| Scalable storage engine for datasets/code/supplementary files | Artifact capsules include type detection, content digests, preview policy, version metadata, and path organization. | +| Metadata-aware previews and versioning/diffing | Preview policy and stable artifact digests enable deterministic review and version comparison. | +| JSON-LD, DataCite, schema.org, FAIR compliance | Each artifact evaluation emits JSON-LD/DataCite payloads and FAIR score checks with DOI thresholds. | +| Executable environments | Runtime capsules are normalized and fingerprinted; drift checks cover base image, package versions, GPU, and network policy. | +| Sandboxed execution and compute triggers | Rerun plans decide `RUN_NOW`, `SCHEDULED`, `REVIEW_REQUIRED`, or `BLOCKED` using artifacts, environments, cadence, and compute budgets. | +| Export/retention governance | Retention evidence covers regulated data, embargoes, DOI readiness, and immutable evidence digests. | + +## Design Notes + +The module does not run untrusted code. It produces governance decisions that an execution service can consume before enabling notebook/script reruns. That keeps the slice safe and reviewable while still addressing the executable-environment requirements in the bounty. diff --git a/execution-environment-drift-governance/demo.js b/execution-environment-drift-governance/demo.js new file mode 100644 index 0000000..c3251b6 --- /dev/null +++ b/execution-environment-drift-governance/demo.js @@ -0,0 +1,21 @@ +const { writeFileSync, mkdirSync } = require("fs"); +const path = require("path"); +const { buildGovernancePacket } = require("./src"); +const { sampleProject } = require("./src/sample-data"); + +const packet = buildGovernancePacket(sampleProject); +const outDir = path.join(__dirname, "demo"); +const outFile = path.join(outDir, "execution-drift-governance-packet.json"); + +mkdirSync(outDir, { recursive: true }); +writeFileSync(outFile, `${JSON.stringify(packet, null, 2)}\n`); + +console.log("Execution Environment Drift Governance Demo"); +console.log(`Project: ${packet.projectId}`); +console.log(`Artifacts: ${packet.summary.artifacts}`); +console.log(`Ready artifacts: ${packet.summary.readyArtifacts}`); +console.log(`Blocked artifacts: ${packet.summary.blockedArtifacts}`); +console.log(`Runnable now: ${packet.summary.runnableNow}`); +console.log(`Review-required reruns: ${packet.summary.reviewReruns}`); +console.log(`Blocked reruns: ${packet.summary.blockedReruns}`); +console.log(`Packet: ${outFile}`); diff --git a/execution-environment-drift-governance/demo/demo-card.html b/execution-environment-drift-governance/demo/demo-card.html new file mode 100644 index 0000000..d7eb6ac --- /dev/null +++ b/execution-environment-drift-governance/demo/demo-card.html @@ -0,0 +1,51 @@ + + + + + Execution Drift Governance Demo + + + +
+

SCIBASE.AI execution drift governance

+

Artifacts: 3, ready: 2, blocked: 1

+

Environment checks: pinned images, dependency drift, GPU, network policy

+

Rerun plans: 1 RUN_NOW, 1 BLOCKED with audit reasons

+

Output: demo/execution-drift-governance-packet.json

+

No credentials, no external services, synthetic data only

+
+ + diff --git a/execution-environment-drift-governance/demo/demo-card.html.png b/execution-environment-drift-governance/demo/demo-card.html.png new file mode 100644 index 0000000..0e5e415 Binary files /dev/null and b/execution-environment-drift-governance/demo/demo-card.html.png differ diff --git a/execution-environment-drift-governance/demo/demo-text.txt b/execution-environment-drift-governance/demo/demo-text.txt new file mode 100644 index 0000000..afb22af --- /dev/null +++ b/execution-environment-drift-governance/demo/demo-text.txt @@ -0,0 +1,7 @@ +SCIBASE.AI execution drift governance + +Artifacts: 3, ready: 2, blocked: 1 +Environment checks: pinned images, dependency drift, GPU, network policy +Rerun plans: 1 RUN_NOW, 1 BLOCKED with audit reasons +Output: demo/execution-drift-governance-packet.json +No credentials, no external services, synthetic data only diff --git a/execution-environment-drift-governance/demo/execution-drift-governance-demo.mp4 b/execution-environment-drift-governance/demo/execution-drift-governance-demo.mp4 new file mode 100644 index 0000000..88fbbf5 Binary files /dev/null and b/execution-environment-drift-governance/demo/execution-drift-governance-demo.mp4 differ diff --git a/execution-environment-drift-governance/demo/execution-drift-governance-packet.json b/execution-environment-drift-governance/demo/execution-drift-governance-packet.json new file mode 100644 index 0000000..9235ed2 --- /dev/null +++ b/execution-environment-drift-governance/demo/execution-drift-governance-packet.json @@ -0,0 +1,483 @@ +{ + "projectId": "scibase-neuro-protein-2026", + "generatedAt": "2026-05-15T12:00:00.000Z", + "summary": { + "artifacts": 3, + "environments": 2, + "rerunTriggers": 2, + "readyArtifacts": 2, + "blockedArtifacts": 1, + "blockedReruns": 1, + "reviewReruns": 0, + "runnableNow": 1 + }, + "artifacts": [ + { + "artifactId": "artifact:raw-assay-csv", + "path": "data/raw/assay-readings.csv", + "type": "dataset/csv", + "digest": "298d17e6f0b4170815b55951c21690fd65eb7a8c3dc18b9f5523d6f31c694c4c", + "preview": "sample-table-preview", + "fair": { + "score": 1, + "checks": [ + { + "name": "unique_identifier", + "ok": true + }, + { + "name": "title", + "ok": true + }, + { + "name": "creator", + "ok": true + }, + { + "name": "license", + "ok": true + }, + { + "name": "machine_readable_type", + "ok": true + }, + { + "name": "version", + "ok": true + }, + { + "name": "tags", + "ok": true + }, + { + "name": "variable_metadata", + "ok": true + } + ] + }, + "jsonLd": { + "@context": "https://schema.org", + "@type": "Dataset", + "identifier": "artifact:raw-assay-csv", + "name": "Raw protein folding assay readings", + "license": "CC-BY-4.0", + "creator": [ + { + "@type": "Person", + "identifier": "ORCID:0000-0002-1825-0097" + } + ], + "keywords": [ + "protein-folding", + "assay", + "raw" + ], + "version": "v3", + "encodingFormat": "dataset/csv" + }, + "dataCite": { + "identifiers": [ + { + "identifier": "artifact:raw-assay-csv", + "identifierType": "SCIBASE-ID" + } + ], + "titles": [ + { + "title": "Raw protein folding assay readings" + } + ], + "creators": [ + { + "name": "ORCID:0000-0002-1825-0097" + } + ], + "publisher": "SCIBASE.AI", + "publicationYear": 2026, + "resourceType": { + "resourceTypeGeneral": "Dataset", + "resourceType": "dataset/csv" + }, + "rightsList": [ + { + "rights": "CC-BY-4.0" + } + ] + }, + "status": "READY", + "reasons": [] + }, + { + "artifactId": "artifact:analysis-notebook", + "path": "notebooks/folding-analysis.ipynb", + "type": "code/notebook", + "digest": "173745b46ea44130059ca2c1fd95098de5a3ad74968d221bb5cb5961e093dabf", + "preview": "render-notebook", + "fair": { + "score": 1, + "checks": [ + { + "name": "unique_identifier", + "ok": true + }, + { + "name": "title", + "ok": true + }, + { + "name": "creator", + "ok": true + }, + { + "name": "license", + "ok": true + }, + { + "name": "machine_readable_type", + "ok": true + }, + { + "name": "version", + "ok": true + }, + { + "name": "tags", + "ok": true + }, + { + "name": "variable_metadata", + "ok": true + } + ] + }, + "jsonLd": { + "@context": "https://schema.org", + "@type": "SoftwareSourceCode", + "identifier": "artifact:analysis-notebook", + "name": "Protein folding analysis notebook", + "license": "MIT", + "creator": [ + { + "@type": "Person", + "identifier": "ORCID:0000-0002-1825-0097" + } + ], + "keywords": [ + "notebook", + "reproducibility", + "figure-generation" + ], + "version": "v5", + "encodingFormat": "code/notebook" + }, + "dataCite": { + "identifiers": [ + { + "identifier": "artifact:analysis-notebook", + "identifierType": "SCIBASE-ID" + } + ], + "titles": [ + { + "title": "Protein folding analysis notebook" + } + ], + "creators": [ + { + "name": "ORCID:0000-0002-1825-0097" + } + ], + "publisher": "SCIBASE.AI", + "publicationYear": 2026, + "resourceType": { + "resourceTypeGeneral": "Software", + "resourceType": "code/notebook" + }, + "rightsList": [ + { + "rights": "MIT" + } + ] + }, + "status": "READY", + "reasons": [] + }, + { + "artifactId": "artifact:clinical-supplement", + "path": "supplemental/clinical-cohort.json", + "type": "dataset/json", + "digest": "d6c5eeb47a61a1fc06ad599fbbc47f4b2ca6f7f4b835c2e8df738f493ce2e01d", + "preview": "schema-and-tree-preview", + "fair": { + "score": 0.88, + "checks": [ + { + "name": "unique_identifier", + "ok": true + }, + { + "name": "title", + "ok": true + }, + { + "name": "creator", + "ok": true + }, + { + "name": "license", + "ok": false + }, + { + "name": "machine_readable_type", + "ok": true + }, + { + "name": "version", + "ok": true + }, + { + "name": "tags", + "ok": true + }, + { + "name": "variable_metadata", + "ok": true + } + ] + }, + "jsonLd": { + "@context": "https://schema.org", + "@type": "Dataset", + "identifier": "artifact:clinical-supplement", + "name": "Restricted clinical cohort covariates", + "license": "UNSPECIFIED", + "creator": [ + { + "@type": "Person", + "identifier": "ORCID:0000-0002-1825-0097" + } + ], + "keywords": [ + "clinical", + "supplemental", + "restricted" + ], + "version": "v1", + "encodingFormat": "dataset/json" + }, + "dataCite": { + "identifiers": [ + { + "identifier": "artifact:clinical-supplement", + "identifierType": "SCIBASE-ID" + } + ], + "titles": [ + { + "title": "Restricted clinical cohort covariates" + } + ], + "creators": [ + { + "name": "ORCID:0000-0002-1825-0097" + } + ], + "publisher": "SCIBASE.AI", + "publicationYear": 2026, + "resourceType": { + "resourceTypeGeneral": "Dataset", + "resourceType": "dataset/json" + }, + "rightsList": [] + }, + "status": "BLOCKED", + "reasons": [ + { + "type": "restricted_artifact_missing_license", + "severity": "block", + "detail": "Human-subject or restricted artifacts need explicit reuse/licensing metadata." + }, + { + "type": "embargo_requires_export_approval", + "severity": "review", + "detail": "Embargo active until 2026-08-01." + } + ] + } + ], + "environments": [ + { + "environmentId": "env:python-analysis", + "baselineDigest": "4712d9ad4a010c7f5323eafe6f3954f049d21782c36eb6953e1dd8b211f1abb8", + "currentDigest": "4712d9ad4a010c7f5323eafe6f3954f049d21782c36eb6953e1dd8b211f1abb8", + "drifted": false, + "findings": [], + "status": "READY" + }, + { + "environmentId": "env:ml-training", + "baselineDigest": "5290755b3c117cd6d2b8dc54ffc4c387bc3415d08f6d765a7e21b936b895b7ea", + "currentDigest": "045d7fda0aed683a2e54ab1133ae7be6bfe65a138d4ca986c4ba2c4d4487a254", + "drifted": true, + "findings": [ + { + "type": "base_image_changed", + "severity": "block", + "baseline": "pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime@sha256:demo-old", + "current": "pytorch/pytorch:2.4.1-cuda12.1-cudnn9-runtime" + }, + { + "type": "base_image_unpinned", + "severity": "block", + "current": "pytorch/pytorch:2.4.1-cuda12.1-cudnn9-runtime" + }, + { + "type": "dependency_changed", + "severity": "review", + "package": "torch", + "baseline": "2.3.0", + "current": "2.4.1" + }, + { + "type": "dependency_changed", + "severity": "block", + "package": "transformers", + "baseline": "4.41.0", + "current": "latest" + }, + { + "type": "dependency_unpinned", + "severity": "block", + "package": "transformers", + "current": "latest" + } + ], + "status": "BLOCKED" + } + ], + "reruns": [ + { + "triggerId": "trigger:weekly-figures", + "name": "Weekly figure regeneration", + "action": "RUN_NOW", + "due": true, + "cadence": "weekly", + "inputDigest": "59565e9fcb9896bb0d43f4db51d5b86e9a226b783ba08a9c4df642148f52f607", + "environmentDigest": "4712d9ad4a010c7f5323eafe6f3954f049d21782c36eb6953e1dd8b211f1abb8", + "estimatedCpuMinutes": 12, + "expectedOutputs": [ + "figures/folding-response.svg", + "tables/folding-summary.csv" + ], + "reasons": [] + }, + { + "triggerId": "trigger:model-refresh", + "name": "Quarterly model training refresh", + "action": "BLOCKED", + "due": false, + "cadence": "quarterly", + "inputDigest": "5b7b3d7846b3b82e6845080c7d2e296ebff82b524eb604dcb0c253e72b01c4f0", + "environmentDigest": "045d7fda0aed683a2e54ab1133ae7be6bfe65a138d4ca986c4ba2c4d4487a254", + "estimatedCpuMinutes": 180, + "expectedOutputs": [ + "models/folding-risk.pt", + "reports/model-card.md" + ], + "reasons": [ + { + "type": "artifact_blocked", + "severity": "block", + "artifacts": [ + "artifact:clinical-supplement" + ] + }, + { + "type": "environment_drift_blocked", + "severity": "block", + "findings": [ + { + "type": "base_image_changed", + "severity": "block", + "baseline": "pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime@sha256:demo-old", + "current": "pytorch/pytorch:2.4.1-cuda12.1-cudnn9-runtime" + }, + { + "type": "base_image_unpinned", + "severity": "block", + "current": "pytorch/pytorch:2.4.1-cuda12.1-cudnn9-runtime" + }, + { + "type": "dependency_changed", + "severity": "block", + "package": "transformers", + "baseline": "4.41.0", + "current": "latest" + }, + { + "type": "dependency_unpinned", + "severity": "block", + "package": "transformers", + "current": "latest" + } + ] + }, + { + "type": "compute_budget_exceeded", + "severity": "review", + "estimatedCpuMinutes": 180, + "maxCpuMinutes": 120 + } + ] + } + ], + "retentionAndExport": [ + { + "artifactId": "artifact:raw-assay-csv", + "retentionDays": 2555, + "exportEligible": true, + "doiReady": true, + "evidenceDigest": "f4f308ba885a319144644ab062d7595b5c37ae497c70e17e401f9623a67d355e" + }, + { + "artifactId": "artifact:analysis-notebook", + "retentionDays": 2555, + "exportEligible": true, + "doiReady": true, + "evidenceDigest": "59380beeb56d60bc716c471e53ac55a872114fc73da877b00422fd7bfac56595" + }, + { + "artifactId": "artifact:clinical-supplement", + "retentionDays": 3650, + "exportEligible": false, + "doiReady": false, + "evidenceDigest": "b9d62bb50fd7db2ca29e4b837e8a6406705a2dd3ca8f9fed6cdbad27f9079a18" + } + ], + "requirementMap": [ + { + "requirement": "Scalable storage engine for datasets, code, supplementary files", + "evidence": "3 artifact capsules include type detection, content digests, preview policy, and version metadata." + }, + { + "requirement": "Metadata-aware previews and version/diff readiness", + "evidence": "Each artifact receives a deterministic preview policy and digest suitable for version comparison." + }, + { + "requirement": "JSON-LD, DataCite, schema.org, and FAIR compliance", + "evidence": "Artifact evaluations emit JSON-LD/DataCite payloads and FAIR score checks with DOI readiness thresholds." + }, + { + "requirement": "Executable environments and sandboxed reruns", + "evidence": "2 environment capsules are fingerprinted and checked for pinned images, dependencies, GPU, and network policy drift." + }, + { + "requirement": "Built-in compute triggers and scheduled reruns", + "evidence": "2 rerun plans include due-state, input digests, compute budget checks, expected outputs, and block/review decisions." + }, + { + "requirement": "Research artifact export and retention governance", + "evidence": "Retention/export evidence records DOI readiness, embargo checks, regulated retention days, and immutable evidence digests." + } + ] +} diff --git a/execution-environment-drift-governance/package.json b/execution-environment-drift-governance/package.json new file mode 100644 index 0000000..e31d8dd --- /dev/null +++ b/execution-environment-drift-governance/package.json @@ -0,0 +1,15 @@ +{ + "name": "execution-environment-drift-governance", + "version": "1.0.0", + "private": true, + "description": "Executable environment drift and rerun governance for scientific data and code hosting.", + "main": "src/index.js", + "scripts": { + "demo": "node demo.js", + "test": "node test.js" + }, + "engines": { + "node": ">=18" + }, + "license": "MIT" +} diff --git a/execution-environment-drift-governance/src/index.js b/execution-environment-drift-governance/src/index.js new file mode 100644 index 0000000..fa83099 --- /dev/null +++ b/execution-environment-drift-governance/src/index.js @@ -0,0 +1,518 @@ +const crypto = require("crypto"); + +const DAY_MS = 24 * 60 * 60 * 1000; + +function stableStringify(value) { + if (Array.isArray(value)) { + return `[${value.map((item) => stableStringify(item)).join(",")}]`; + } + + if (value && typeof value === "object") { + return `{${Object.keys(value) + .sort() + .map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`) + .join(",")}}`; + } + + return JSON.stringify(value); +} + +function sha256(value) { + return crypto.createHash("sha256").update(stableStringify(value)).digest("hex"); +} + +function normalizePackageMap(packages = {}) { + return Object.fromEntries( + Object.entries(packages) + .map(([name, version]) => [name.toLowerCase(), String(version)]) + .sort(([left], [right]) => left.localeCompare(right)), + ); +} + +function normalizeEnvironmentSpec(spec = {}) { + return { + baseImage: spec.baseImage || null, + runtime: spec.runtime || null, + runtimeVersion: spec.runtimeVersion || null, + packages: normalizePackageMap(spec.packages), + gpu: Boolean(spec.gpu), + network: spec.network || "blocked", + osPackages: [...(spec.osPackages || [])].sort(), + }; +} + +function fingerprintEnvironment(spec) { + const normalized = normalizeEnvironmentSpec(spec); + return { + digest: sha256(normalized), + normalized, + }; +} + +function hasPinnedContainerDigest(baseImage) { + return typeof baseImage === "string" && baseImage.includes("@sha256:"); +} + +function detectDependencyChanges(baselinePackages, currentPackages) { + const names = new Set([...Object.keys(baselinePackages), ...Object.keys(currentPackages)]); + return [...names].sort().flatMap((name) => { + const before = baselinePackages[name]; + const after = currentPackages[name]; + + if (!before) { + return [{ type: "dependency_added", severity: "review", package: name, current: after }]; + } + + if (!after) { + return [{ type: "dependency_removed", severity: "review", package: name, baseline: before }]; + } + + if (before !== after) { + return [ + { + type: "dependency_changed", + severity: after === "latest" || after === "*" ? "block" : "review", + package: name, + baseline: before, + current: after, + }, + ]; + } + + return []; + }); +} + +function detectEnvironmentDrift(environment) { + const baseline = normalizeEnvironmentSpec(environment.baseline); + const current = normalizeEnvironmentSpec(environment.current); + const findings = []; + + if (baseline.baseImage !== current.baseImage) { + findings.push({ + type: "base_image_changed", + severity: hasPinnedContainerDigest(current.baseImage) ? "review" : "block", + baseline: baseline.baseImage, + current: current.baseImage, + }); + } + + if (!hasPinnedContainerDigest(current.baseImage)) { + findings.push({ + type: "base_image_unpinned", + severity: "block", + current: current.baseImage, + }); + } + + if (baseline.runtime !== current.runtime || baseline.runtimeVersion !== current.runtimeVersion) { + findings.push({ + type: "runtime_changed", + severity: "review", + baseline: `${baseline.runtime || "unknown"}@${baseline.runtimeVersion || "unknown"}`, + current: `${current.runtime || "unknown"}@${current.runtimeVersion || "unknown"}`, + }); + } + + findings.push(...detectDependencyChanges(baseline.packages, current.packages)); + + Object.entries(current.packages).forEach(([name, version]) => { + if (version === "latest" || version === "*" || version.startsWith("^") || version.startsWith("~")) { + findings.push({ + type: "dependency_unpinned", + severity: "block", + package: name, + current: version, + }); + } + }); + + if (baseline.gpu !== current.gpu) { + findings.push({ + type: "gpu_capability_changed", + severity: "review", + baseline: baseline.gpu, + current: current.gpu, + }); + } + + if (baseline.network !== current.network) { + findings.push({ + type: "network_policy_changed", + severity: current.network === "blocked" ? "review" : "block", + baseline: baseline.network, + current: current.network, + }); + } + + const baselineFingerprint = fingerprintEnvironment(baseline); + const currentFingerprint = fingerprintEnvironment(current); + + return { + environmentId: environment.id, + baselineDigest: baselineFingerprint.digest, + currentDigest: currentFingerprint.digest, + drifted: baselineFingerprint.digest !== currentFingerprint.digest, + findings, + status: findings.some((finding) => finding.severity === "block") + ? "BLOCKED" + : findings.length > 0 + ? "REVIEW" + : "READY", + }; +} + +function detectArtifactType(path = "", explicitType) { + if (explicitType) return explicitType; + const lower = path.toLowerCase(); + if (lower.endsWith(".ipynb")) return "code/notebook"; + if (lower.endsWith(".csv") || lower.endsWith(".tsv")) return "dataset/tabular"; + if (lower.endsWith(".json") || lower.endsWith(".jsonld")) return "dataset/json"; + if (lower.endsWith(".py") || lower.endsWith(".r") || lower.endsWith(".jl")) return "code/script"; + if (lower.match(/\.(png|jpg|jpeg|gif|svg)$/)) return "image"; + return "artifact/generic"; +} + +function scoreFairMetadata(artifact) { + const metadata = artifact.metadata || {}; + const checks = [ + ["unique_identifier", Boolean(artifact.id || metadata.doi || metadata.uuid)], + ["title", Boolean(metadata.title)], + ["creator", Array.isArray(metadata.creators) && metadata.creators.length > 0], + ["license", Boolean(artifact.license)], + ["machine_readable_type", Boolean(detectArtifactType(artifact.path, artifact.type))], + ["version", Boolean(artifact.version)], + ["tags", Array.isArray(artifact.tags) && artifact.tags.length > 0], + ["variable_metadata", Array.isArray(metadata.variables) && metadata.variables.length > 0], + ]; + + const passed = checks.filter(([, ok]) => ok).length; + return { + score: Number((passed / checks.length).toFixed(2)), + checks: checks.map(([name, ok]) => ({ name, ok })), + }; +} + +function classifyPreview(artifact) { + const type = detectArtifactType(artifact.path, artifact.type); + if (type.includes("notebook")) return "render-notebook"; + if (type.includes("csv") || type.includes("tabular")) return "sample-table-preview"; + if (type.includes("json")) return "schema-and-tree-preview"; + if (type.startsWith("image")) return "thumbnail-preview"; + if (type.includes("script")) return "syntax-highlighted-code-preview"; + return "metadata-only-preview"; +} + +function buildJsonLd(artifact) { + const metadata = artifact.metadata || {}; + return { + "@context": "https://schema.org", + "@type": artifact.type && artifact.type.startsWith("code") ? "SoftwareSourceCode" : "Dataset", + identifier: artifact.id, + name: metadata.title || artifact.path, + license: artifact.license || "UNSPECIFIED", + creator: (metadata.creators || []).map((creator) => ({ "@type": "Person", identifier: creator })), + keywords: artifact.tags || [], + version: artifact.version || "unversioned", + encodingFormat: detectArtifactType(artifact.path, artifact.type), + }; +} + +function buildDataCite(artifact) { + const metadata = artifact.metadata || {}; + return { + identifiers: [{ identifier: artifact.id, identifierType: "SCIBASE-ID" }], + titles: [{ title: metadata.title || artifact.path }], + creators: (metadata.creators || []).map((creator) => ({ name: creator })), + publisher: "SCIBASE.AI", + publicationYear: new Date().getUTCFullYear(), + resourceType: { + resourceTypeGeneral: artifact.type && artifact.type.startsWith("code") ? "Software" : "Dataset", + resourceType: detectArtifactType(artifact.path, artifact.type), + }, + rightsList: artifact.license ? [{ rights: artifact.license }] : [], + }; +} + +function evaluateArtifact(artifact, environmentsById, policy = {}) { + const fair = scoreFairMetadata(artifact); + const requiredEnvironmentIds = artifact.requiredEnvironmentIds || []; + const missingEnvironments = requiredEnvironmentIds.filter((id) => !environmentsById.has(id)); + const reasons = []; + + if (missingEnvironments.length > 0) { + reasons.push({ + type: "missing_execution_environment", + severity: "block", + detail: missingEnvironments.join(", "), + }); + } + + if (artifact.access?.containsHumanSubjects && !artifact.license) { + reasons.push({ + type: "restricted_artifact_missing_license", + severity: "block", + detail: "Human-subject or restricted artifacts need explicit reuse/licensing metadata.", + }); + } + + if (artifact.access?.embargoUntil && policy.embargoedExportRequiresApproval) { + reasons.push({ + type: "embargo_requires_export_approval", + severity: "review", + detail: `Embargo active until ${artifact.access.embargoUntil}.`, + }); + } + + if (fair.score < (policy.minimumFairScoreForDoi || 0.82)) { + reasons.push({ + type: "fair_score_below_doi_threshold", + severity: "review", + detail: `FAIR score ${fair.score} is below DOI threshold.`, + }); + } + + const digest = sha256({ + path: artifact.path, + type: detectArtifactType(artifact.path, artifact.type), + version: artifact.version, + content: artifact.content, + metadata: artifact.metadata, + }); + + return { + artifactId: artifact.id, + path: artifact.path, + type: detectArtifactType(artifact.path, artifact.type), + digest, + preview: classifyPreview(artifact), + fair, + jsonLd: buildJsonLd(artifact), + dataCite: buildDataCite(artifact), + status: reasons.some((reason) => reason.severity === "block") + ? "BLOCKED" + : reasons.length > 0 + ? "REVIEW" + : "READY", + reasons, + }; +} + +function cadenceToDays(cadence) { + return { + daily: 1, + weekly: 7, + monthly: 30, + quarterly: 90, + }[cadence] || 7; +} + +function isDue(trigger) { + const lastRunAt = new Date(trigger.lastRunAt || 0).getTime(); + const requestedAt = new Date(trigger.requestedAt || Date.now()).getTime(); + const dueAfter = lastRunAt + cadenceToDays(trigger.cadence) * DAY_MS; + return requestedAt >= dueAfter; +} + +function planRerun(trigger, artifactEvaluations, environmentDrift, environment) { + const artifactStatuses = trigger.artifactIds.map((artifactId) => artifactEvaluations.get(artifactId)); + const missingArtifacts = artifactStatuses.filter(Boolean).length !== trigger.artifactIds.length; + const blockingArtifacts = artifactStatuses.filter((artifact) => artifact?.status === "BLOCKED"); + const reviewArtifacts = artifactStatuses.filter((artifact) => artifact?.status === "REVIEW"); + const estimatedCpuMinutes = trigger.estimatedCpuMinutes || 0; + const maxCpuMinutes = environment.computePolicy?.maxCpuMinutes || Infinity; + const due = isDue(trigger); + const reasons = []; + + if (missingArtifacts) { + reasons.push({ type: "missing_artifact", severity: "block" }); + } + + if (blockingArtifacts.length > 0) { + reasons.push({ + type: "artifact_blocked", + severity: "block", + artifacts: blockingArtifacts.map((artifact) => artifact.artifactId), + }); + } + + if (environmentDrift.status === "BLOCKED") { + reasons.push({ + type: "environment_drift_blocked", + severity: "block", + findings: environmentDrift.findings.filter((finding) => finding.severity === "block"), + }); + } else if (environmentDrift.status === "REVIEW") { + reasons.push({ + type: "environment_drift_review", + severity: "review", + findings: environmentDrift.findings, + }); + } + + if (reviewArtifacts.length > 0) { + reasons.push({ + type: "artifact_review_required", + severity: "review", + artifacts: reviewArtifacts.map((artifact) => artifact.artifactId), + }); + } + + if (estimatedCpuMinutes > maxCpuMinutes) { + reasons.push({ + type: "compute_budget_exceeded", + severity: "review", + estimatedCpuMinutes, + maxCpuMinutes, + }); + } + + const inputDigest = sha256({ + artifacts: artifactStatuses.map((artifact) => artifact && { + artifactId: artifact.artifactId, + digest: artifact.digest, + }), + environmentDigest: environmentDrift.currentDigest, + expectedOutputs: trigger.expectedOutputs || [], + }); + + const action = reasons.some((reason) => reason.severity === "block") + ? "BLOCKED" + : reasons.some((reason) => reason.severity === "review") + ? "REVIEW_REQUIRED" + : due + ? "RUN_NOW" + : "SCHEDULED"; + + return { + triggerId: trigger.id, + name: trigger.name, + action, + due, + cadence: trigger.cadence, + inputDigest, + environmentDigest: environmentDrift.currentDigest, + estimatedCpuMinutes, + expectedOutputs: trigger.expectedOutputs || [], + reasons, + }; +} + +function buildRetentionAndExportEvidence(artifact, evaluation, policy = {}) { + const regulated = artifact.access?.containsHumanSubjects || artifact.access?.classification === "restricted"; + const retentionDays = regulated + ? policy.regulatedRetentionDays || policy.defaultRetentionDays || 2555 + : policy.defaultRetentionDays || 2555; + + return { + artifactId: artifact.id, + retentionDays, + exportEligible: evaluation.status !== "BLOCKED" && !artifact.access?.embargoUntil, + doiReady: evaluation.fair.score >= (policy.minimumFairScoreForDoi || 0.82) && Boolean(artifact.license), + evidenceDigest: sha256({ + artifactDigest: evaluation.digest, + jsonLd: evaluation.jsonLd, + dataCite: evaluation.dataCite, + retentionDays, + }), + }; +} + +function buildRequirementMap(packet) { + return [ + { + requirement: "Scalable storage engine for datasets, code, supplementary files", + evidence: `${packet.artifacts.length} artifact capsules include type detection, content digests, preview policy, and version metadata.`, + }, + { + requirement: "Metadata-aware previews and version/diff readiness", + evidence: "Each artifact receives a deterministic preview policy and digest suitable for version comparison.", + }, + { + requirement: "JSON-LD, DataCite, schema.org, and FAIR compliance", + evidence: "Artifact evaluations emit JSON-LD/DataCite payloads and FAIR score checks with DOI readiness thresholds.", + }, + { + requirement: "Executable environments and sandboxed reruns", + evidence: `${packet.environments.length} environment capsules are fingerprinted and checked for pinned images, dependencies, GPU, and network policy drift.`, + }, + { + requirement: "Built-in compute triggers and scheduled reruns", + evidence: `${packet.reruns.length} rerun plans include due-state, input digests, compute budget checks, expected outputs, and block/review decisions.`, + }, + { + requirement: "Research artifact export and retention governance", + evidence: "Retention/export evidence records DOI readiness, embargo checks, regulated retention days, and immutable evidence digests.", + }, + ]; +} + +function buildGovernancePacket(project) { + const environmentsById = new Map(project.environments.map((environment) => [environment.id, environment])); + const environmentEvaluations = project.environments.map(detectEnvironmentDrift); + const environmentEvaluationsById = new Map( + environmentEvaluations.map((evaluation) => [evaluation.environmentId, evaluation]), + ); + + const artifactEvaluations = project.artifacts.map((artifact) => + evaluateArtifact(artifact, environmentsById, project.retentionPolicy), + ); + const artifactEvaluationsById = new Map( + artifactEvaluations.map((evaluation) => [evaluation.artifactId, evaluation]), + ); + + const reruns = project.rerunTriggers.map((trigger) => + planRerun( + trigger, + artifactEvaluationsById, + environmentEvaluationsById.get(trigger.environmentId), + environmentsById.get(trigger.environmentId), + ), + ); + + const retentionAndExport = project.artifacts.map((artifact) => + buildRetentionAndExportEvidence( + artifact, + artifactEvaluationsById.get(artifact.id), + project.retentionPolicy, + ), + ); + + const packet = { + projectId: project.projectId, + generatedAt: project.generatedAt || new Date().toISOString(), + summary: { + artifacts: project.artifacts.length, + environments: project.environments.length, + rerunTriggers: project.rerunTriggers.length, + readyArtifacts: artifactEvaluations.filter((artifact) => artifact.status === "READY").length, + blockedArtifacts: artifactEvaluations.filter((artifact) => artifact.status === "BLOCKED").length, + blockedReruns: reruns.filter((rerun) => rerun.action === "BLOCKED").length, + reviewReruns: reruns.filter((rerun) => rerun.action === "REVIEW_REQUIRED").length, + runnableNow: reruns.filter((rerun) => rerun.action === "RUN_NOW").length, + }, + artifacts: artifactEvaluations, + environments: environmentEvaluations, + reruns, + retentionAndExport, + }; + + return { + ...packet, + requirementMap: buildRequirementMap(packet), + }; +} + +module.exports = { + buildGovernancePacket, + buildRetentionAndExportEvidence, + detectArtifactType, + detectEnvironmentDrift, + evaluateArtifact, + fingerprintEnvironment, + normalizeEnvironmentSpec, + planRerun, + scoreFairMetadata, + sha256, + stableStringify, +}; diff --git a/execution-environment-drift-governance/src/sample-data.js b/execution-environment-drift-governance/src/sample-data.js new file mode 100644 index 0000000..3fecb9b --- /dev/null +++ b/execution-environment-drift-governance/src/sample-data.js @@ -0,0 +1,189 @@ +const sampleProject = { + projectId: "scibase-neuro-protein-2026", + generatedAt: "2026-05-15T12:00:00.000Z", + retentionPolicy: { + defaultRetentionDays: 2555, + regulatedRetentionDays: 3650, + embargoedExportRequiresApproval: true, + minimumFairScoreForDoi: 0.82, + }, + artifacts: [ + { + id: "artifact:raw-assay-csv", + path: "data/raw/assay-readings.csv", + type: "dataset/csv", + version: "v3", + bytes: 1843902, + license: "CC-BY-4.0", + tags: ["protein-folding", "assay", "raw"], + metadata: { + title: "Raw protein folding assay readings", + creators: ["ORCID:0000-0002-1825-0097"], + funder: "NIH-R01-EXAMPLE", + instrument: "plate-reader", + variables: ["sample_id", "timepoint", "fluorescence"], + }, + content: [ + ["sample_id", "timepoint", "fluorescence"], + ["S-001", "0h", 0.12], + ["S-001", "24h", 0.87], + ], + access: { + classification: "controlled", + containsHumanSubjects: false, + embargoUntil: null, + }, + requiredEnvironmentIds: ["env:python-analysis"], + }, + { + id: "artifact:analysis-notebook", + path: "notebooks/folding-analysis.ipynb", + type: "code/notebook", + version: "v5", + bytes: 93712, + license: "MIT", + tags: ["notebook", "reproducibility", "figure-generation"], + metadata: { + title: "Protein folding analysis notebook", + creators: ["ORCID:0000-0002-1825-0097"], + variables: ["fold_change", "confidence_interval"], + }, + content: { + cells: [ + { cell_type: "markdown", source: "# Protein folding analysis" }, + { cell_type: "code", source: "import pandas as pd\nprint('ready')" }, + ], + }, + access: { + classification: "public", + containsHumanSubjects: false, + embargoUntil: null, + }, + requiredEnvironmentIds: ["env:python-analysis"], + }, + { + id: "artifact:clinical-supplement", + path: "supplemental/clinical-cohort.json", + type: "dataset/json", + version: "v1", + bytes: 29231, + license: null, + tags: ["clinical", "supplemental", "restricted"], + metadata: { + title: "Restricted clinical cohort covariates", + creators: ["ORCID:0000-0002-1825-0097"], + variables: ["age_bucket", "condition_code", "site"], + }, + content: { + cohort: "redacted-demo-only", + variables: ["age_bucket", "condition_code", "site"], + }, + access: { + classification: "restricted", + containsHumanSubjects: true, + embargoUntil: "2026-08-01", + }, + requiredEnvironmentIds: ["env:python-analysis"], + }, + ], + environments: [ + { + id: "env:python-analysis", + kind: "container", + name: "Python data science capsule", + baseline: { + baseImage: "python:3.12.3-slim@sha256:demo-baseline", + runtime: "python", + runtimeVersion: "3.12.3", + packages: { + numpy: "1.26.4", + pandas: "2.2.2", + scipy: "1.13.0", + }, + gpu: false, + network: "blocked", + osPackages: ["libgomp1=13.2"], + }, + current: { + baseImage: "python:3.12.3-slim@sha256:demo-baseline", + runtime: "python", + runtimeVersion: "3.12.3", + packages: { + numpy: "1.26.4", + pandas: "2.2.2", + scipy: "1.13.0", + }, + gpu: false, + network: "blocked", + osPackages: ["libgomp1=13.2"], + }, + computePolicy: { + maxCpuMinutes: 40, + maxMemoryGb: 8, + allowGpu: false, + }, + }, + { + id: "env:ml-training", + kind: "container", + name: "GPU model training capsule", + baseline: { + baseImage: "pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime@sha256:demo-old", + runtime: "python", + runtimeVersion: "3.11.9", + packages: { + torch: "2.3.0", + transformers: "4.41.0", + numpy: "1.26.4", + }, + gpu: true, + network: "egress-allowlist", + osPackages: ["cuda=12.1"], + }, + current: { + baseImage: "pytorch/pytorch:2.4.1-cuda12.1-cudnn9-runtime", + runtime: "python", + runtimeVersion: "3.11.9", + packages: { + torch: "2.4.1", + transformers: "latest", + numpy: "1.26.4", + }, + gpu: true, + network: "egress-allowlist", + osPackages: ["cuda=12.1"], + }, + computePolicy: { + maxCpuMinutes: 120, + maxMemoryGb: 24, + allowGpu: true, + }, + }, + ], + rerunTriggers: [ + { + id: "trigger:weekly-figures", + name: "Weekly figure regeneration", + artifactIds: ["artifact:raw-assay-csv", "artifact:analysis-notebook"], + environmentId: "env:python-analysis", + cadence: "weekly", + lastRunAt: "2026-05-01T00:00:00.000Z", + requestedAt: "2026-05-15T12:00:00.000Z", + estimatedCpuMinutes: 12, + expectedOutputs: ["figures/folding-response.svg", "tables/folding-summary.csv"], + }, + { + id: "trigger:model-refresh", + name: "Quarterly model training refresh", + artifactIds: ["artifact:raw-assay-csv", "artifact:clinical-supplement"], + environmentId: "env:ml-training", + cadence: "quarterly", + lastRunAt: "2026-02-15T00:00:00.000Z", + requestedAt: "2026-05-15T12:00:00.000Z", + estimatedCpuMinutes: 180, + expectedOutputs: ["models/folding-risk.pt", "reports/model-card.md"], + }, + ], +}; + +module.exports = { sampleProject }; diff --git a/execution-environment-drift-governance/test.js b/execution-environment-drift-governance/test.js new file mode 100644 index 0000000..e183aaa --- /dev/null +++ b/execution-environment-drift-governance/test.js @@ -0,0 +1,102 @@ +const assert = require("assert"); +const { + buildGovernancePacket, + detectArtifactType, + detectEnvironmentDrift, + evaluateArtifact, + fingerprintEnvironment, + scoreFairMetadata, + sha256, + stableStringify, +} = require("./src"); +const { sampleProject } = require("./src/sample-data"); + +function testStableHashing() { + const left = { b: 2, a: { d: 4, c: 3 } }; + const right = { a: { c: 3, d: 4 }, b: 2 }; + + assert.strictEqual(stableStringify(left), stableStringify(right)); + assert.strictEqual(sha256(left), sha256(right)); +} + +function testArtifactTypeDetection() { + assert.strictEqual(detectArtifactType("analysis.ipynb"), "code/notebook"); + assert.strictEqual(detectArtifactType("dataset.csv"), "dataset/tabular"); + assert.strictEqual(detectArtifactType("figure.svg"), "image"); +} + +function testEnvironmentFingerprinting() { + const a = fingerprintEnvironment({ + baseImage: "python:3.12@sha256:abc", + packages: { pandas: "2.2.2", numpy: "1.26.4" }, + }); + const b = fingerprintEnvironment({ + packages: { numpy: "1.26.4", pandas: "2.2.2" }, + baseImage: "python:3.12@sha256:abc", + }); + + assert.strictEqual(a.digest, b.digest); +} + +function testEnvironmentDriftBlocksUnpinnedLatest() { + const mlEnv = sampleProject.environments.find((environment) => environment.id === "env:ml-training"); + const drift = detectEnvironmentDrift(mlEnv); + + assert.strictEqual(drift.status, "BLOCKED"); + assert.ok(drift.findings.some((finding) => finding.type === "base_image_unpinned")); + assert.ok(drift.findings.some((finding) => finding.type === "dependency_unpinned")); +} + +function testFairScoringAndRestrictedArtifactBlock() { + const envs = new Map(sampleProject.environments.map((environment) => [environment.id, environment])); + const clinical = sampleProject.artifacts.find((artifact) => artifact.id === "artifact:clinical-supplement"); + const fair = scoreFairMetadata(clinical); + const evaluation = evaluateArtifact(clinical, envs, sampleProject.retentionPolicy); + + assert.ok(fair.score < 1); + assert.strictEqual(evaluation.status, "BLOCKED"); + assert.ok(evaluation.reasons.some((reason) => reason.type === "restricted_artifact_missing_license")); +} + +function testPacketSummaryAndRerunDecisions() { + const packet = buildGovernancePacket(sampleProject); + + assert.strictEqual(packet.summary.artifacts, 3); + assert.strictEqual(packet.summary.environments, 2); + assert.strictEqual(packet.summary.rerunTriggers, 2); + assert.strictEqual(packet.summary.runnableNow, 1); + assert.strictEqual(packet.summary.blockedReruns, 1); + + const weekly = packet.reruns.find((rerun) => rerun.triggerId === "trigger:weekly-figures"); + const modelRefresh = packet.reruns.find((rerun) => rerun.triggerId === "trigger:model-refresh"); + + assert.strictEqual(weekly.action, "RUN_NOW"); + assert.strictEqual(modelRefresh.action, "BLOCKED"); + assert.ok(modelRefresh.reasons.some((reason) => reason.type === "artifact_blocked")); + assert.ok(modelRefresh.reasons.some((reason) => reason.type === "environment_drift_blocked")); +} + +function testRetentionAndRequirementEvidence() { + const packet = buildGovernancePacket(sampleProject); + const rawAssay = packet.retentionAndExport.find((item) => item.artifactId === "artifact:raw-assay-csv"); + const clinical = packet.retentionAndExport.find((item) => item.artifactId === "artifact:clinical-supplement"); + + assert.strictEqual(rawAssay.exportEligible, true); + assert.strictEqual(rawAssay.doiReady, true); + assert.strictEqual(clinical.exportEligible, false); + assert.strictEqual(clinical.retentionDays, sampleProject.retentionPolicy.regulatedRetentionDays); + assert.ok(packet.requirementMap.length >= 6); +} + +const tests = [ + testStableHashing, + testArtifactTypeDetection, + testEnvironmentFingerprinting, + testEnvironmentDriftBlocksUnpinnedLatest, + testFairScoringAndRestrictedArtifactBlock, + testPacketSummaryAndRerunDecisions, + testRetentionAndRequirementEvidence, +]; + +tests.forEach((test) => test()); +console.log(`${tests.length} tests passed`);