SCIBASE-AI · zhengjynicolas · May 15, 2026
diff --git a/research-assistant-protocol-trace/README.md b/research-assistant-protocol-trace/README.md
@@ -0,0 +1,42 @@
+# Research Assistant Protocol Trace
+
+Self-contained contribution for SCIBASE issue #16, focused on a protocol-trace and reproducibility-readiness slice of the AI-Powered Research Assistant Suite.
+
+The module helps reviewers answer:
+
+- Are manuscript claims aligned with preregistered protocol commitments?
+- Which evidence artifacts and citations support each claim?
+- Is the project reproducible enough to release or review?
+- Which research gaps follow from the manuscript limitations and user interests?
+
+It is deterministic, dependency-free, credential-free, and uses synthetic sample data only.
+
+## Run
+
+```bash
+cd research-assistant-protocol-trace
+npm run check
+npm test
+npm run demo
+```
+
+Demo recording: `docs/protocol-trace-demo.mp4`
+
+## What It Includes
+
+- `src/protocol-trace.js` - core scoring, claim tracing, evidence mapping, reproducibility checks, peer-review recommendations, and research-gap ranking.
+- `sample-data.json` - synthetic manuscript, protocol, evidence, citation, reproducibility, and corpus fixture.
+- `test.js` - Node assertion tests for alignment, release decision, reproducibility regression, and validation errors.
+- `demo.js` - CLI reviewer demo that prints the protocol trace packet.
+- `docs/requirement-map.md` - direct mapping to issue #16 requirements.
+
+## Design Notes
+
+This is intentionally not a generic LLM wrapper. It models the assistant as an evidence-first deterministic quality gate that can run before a manuscript is released:
+
+- Primary claims without protocol matches are held for revision.
+- Claims with weak evidence or missing citations become peer-review findings.
+- Reproducibility confidence is computed from concrete artifacts and run history.
+- Gap recommendations are ranked from manuscript limitations, researcher interests, replication scarcity, and topic activity.
+
+The result is a reviewer-ready trace packet with an audit digest, so future backend/API work can persist and compare assistant runs.
diff --git a/research-assistant-protocol-trace/demo.js b/research-assistant-protocol-trace/demo.js
@@ -0,0 +1,38 @@
+"use strict";
+
+const fs = require("node:fs");
+const path = require("node:path");
+const { analyzeResearchAssistantSuite } = require("./src/protocol-trace");
+
+const samplePath = path.join(__dirname, "sample-data.json");
+const bundle = JSON.parse(fs.readFileSync(samplePath, "utf8"));
+const report = analyzeResearchAssistantSuite(bundle);
+
+console.log("Research Assistant Protocol Trace Demo");
+console.log("======================================");
+console.log(`Assistant run: ${report.assistantRunId}`);
+console.log(`Readiness score: ${report.readinessScore}`);
+console.log(`Release decision: ${report.releaseDecision}`);
+console.log(`Reproducibility: ${report.reproducibility.confidenceBand} (${report.reproducibility.confidenceScore}/100)`);
+console.log("");
+
+console.log("Claim trace");
+for (const trace of report.claimTraces) {
+  const drift = trace.driftSignals.length ? ` | drift: ${trace.driftSignals.join("; ")}` : "";
+  console.log(`- ${trace.claimId}: ${trace.status} -> ${trace.matchedCommitmentId || "no protocol match"} (${trace.alignmentScore})${drift}`);
+}
+console.log("");
+
+console.log("Peer-review recommendations");
+for (const item of report.peerReview.recommendations) {
+  console.log(`- [${item.severity}] ${item.topic}: ${item.message}`);
+}
+console.log("");
+
+console.log("Research opportunities");
+for (const gap of report.researchGaps) {
+  console.log(`- ${gap.topic} (${gap.opportunityScore}): ${gap.openQuestion}`);
+}
+console.log("");
+
+console.log(`Audit digest: ${report.auditDigest}`);
diff --git a/research-assistant-protocol-trace/docs/protocol-trace-demo.mp4 b/research-assistant-protocol-trace/docs/protocol-trace-demo.mp4
diff --git a/research-assistant-protocol-trace/docs/requirement-map.md b/research-assistant-protocol-trace/docs/requirement-map.md
@@ -0,0 +1,15 @@
+# Requirement Map
+
+This module contributes a focused protocol-trace slice for issue #16, not a second broad assistant-suite demo.
+
+| Issue #16 requirement | Implementation evidence |
+| --- | --- |
+| Auto peer review reports | `generatePeerReviewReport()` emits severity-ranked review recommendations for protocol drift, evidence coverage, reproducibility, and manuscript completeness. |
+| Clarity/methodological/statistical red flags | The peer-review packet flags unregistered primary claims, evidence blockers, missing manuscript sections, and reproducibility gaps. |
+| Claims vs. evidence alignment | `traceClaimsToProtocol()` links manuscript claims to protocol commitments; `buildEvidenceMap()` links claims to evidence artifacts and citations. |
+| Reproducibility checker | `evaluateReproducibility()` scores raw data, analysis command, environment lock, expected output hashes, attempts, and test data. |
+| Links to previous reproducibility attempts | The sample bundle includes prior attempts; the report surfaces the last attempt and next runbook step. |
+| Research gap finder | `findResearchGaps()` ranks research opportunities from user interests, manuscript limitations, activity level, and replication gaps. |
+| Research opportunities feed | The demo prints ranked opportunity topics with rationale and open questions. |
+| Reviewer-ready output | `analyzeResearchAssistantSuite()` returns claim traces, evidence maps, reproducibility checks, peer-review recommendations, research gaps, readiness score, release decision, and an audit digest. |
+| Local verification | `npm run check`, `npm test`, and `npm run demo` run without external services or credentials. |
diff --git a/research-assistant-protocol-trace/package.json b/research-assistant-protocol-trace/package.json
@@ -0,0 +1,13 @@
+{
+  "name": "research-assistant-protocol-trace",
+  "version": "1.0.0",
+  "description": "Deterministic AI research assistant protocol trace and reproducibility readiness module for SCIBASE issue 16.",
+  "main": "src/protocol-trace.js",
+  "scripts": {
+    "check": "node --check src/protocol-trace.js && node --check demo.js && node --check test.js",
+    "demo": "node demo.js",
+    "test": "node test.js"
+  },
+  "license": "MIT",
+  "private": true
+}
diff --git a/research-assistant-protocol-trace/sample-data.json b/research-assistant-protocol-trace/sample-data.json
@@ -0,0 +1,157 @@
+{
+  "generatedAt": "2026-05-15T00:00:00Z",
+  "manuscript": {
+    "id": "ms-crispr-ad-001",
+    "title": "Single-cell CRISPR perturbation markers for early Alzheimer's risk",
+    "claims": [
+      {
+        "id": "claim-primary-biomarker",
+        "text": "CRISPR perturbation of microglia identifies APOE-regulated single-cell biomarkers associated with early Alzheimer's progression.",
+        "resultType": "primary",
+        "direction": "positive",
+        "population": "microglia single-cell cohort"
+      },
+      {
+        "id": "claim-secondary-model",
+        "text": "A reproducible notebook pipeline ranks biomarker candidates with stable confidence across two validation folds.",
+        "resultType": "secondary",
+        "direction": "positive",
+        "population": "validation folds"
+      },
+      {
+        "id": "claim-exploratory-therapy",
+        "text": "The same evidence suggests a therapeutic intervention path for late-stage disease.",
+        "resultType": "exploratory",
+        "direction": "positive",
+        "population": "late-stage patients"
+      }
+    ],
+    "limitations": [
+      "The study has limited replication across independent Alzheimer's cohorts.",
+      "Therapeutic claims require follow-up experiments outside the discovery dataset."
+    ],
+    "sections": {
+      "ethics": "IRB exempt secondary analysis of deidentified public data.",
+      "dataAvailability": "Synthetic fixture data and checksums are included in the export manifest.",
+      "limitations": "Independent cohort replication remains future work."
+    }
+  },
+  "protocol": {
+    "id": "proto-crispr-ad-001",
+    "commitments": [
+      {
+        "id": "commit-primary-biomarker",
+        "question": "Do APOE-regulated CRISPR perturbations identify microglia biomarkers for early Alzheimer's progression?",
+        "endpoint": "ranked biomarker candidates with confidence intervals",
+        "analysisPlan": "Run the preregistered single-cell notebook and compare top markers across validation folds.",
+        "expectedDirection": "positive",
+        "population": "microglia single-cell cohort"
+      },
+      {
+        "id": "commit-validation",
+        "question": "Are biomarker rankings reproducible across two validation folds?",
+        "endpoint": "stable confidence score in both validation folds",
+        "analysisPlan": "Execute deterministic ranking pipeline and compare artifact hashes.",
+        "expectedDirection": "positive",
+        "population": "validation folds"
+      }
+    ]
+  },
+  "evidenceArtifacts": [
+    {
+      "id": "artifact-notebook",
+      "kind": "notebook",
+      "title": "Single-cell CRISPR biomarker ranking notebook",
+      "description": "Runs preregistered APOE microglia perturbation analysis and produces ranked biomarker candidates.",
+      "findings": "APOE-regulated CRISPR perturbation markers remain stable across validation folds.",
+      "path": "notebooks/crispr-biomarker-ranking.ipynb"
+    },
+    {
+      "id": "artifact-dataset",
+      "kind": "dataset",
+      "title": "Microglia single-cell cohort manifest",
+      "description": "Deidentified single-cell input matrix with fold assignment and checksum metadata.",
+      "findings": "Dataset supports early Alzheimer's progression endpoint and validation fold analysis.",
+      "path": "data/microglia-cohort-manifest.json"
+    },
+    {
+      "id": "artifact-results",
+      "kind": "result",
+      "title": "Biomarker confidence table",
+      "description": "Confidence intervals and fold stability for ranked biomarker candidates.",
+      "findings": "Top biomarker confidence remains stable across two validation folds.",
+      "path": "results/biomarker-confidence.csv"
+    }
+  ],
+  "citations": [
+    {
+      "id": "cite-apoe-microglia",
+      "title": "APOE regulation in microglia and Alzheimer's progression",
+      "abstract": "Review of APOE-regulated microglia pathways in Alzheimer's disease.",
+      "keywords": ["APOE", "microglia", "Alzheimer's", "single-cell"],
+      "doi": "10.0000/scibase.apoe.microglia"
+    },
+    {
+      "id": "cite-crispr-screen",
+      "title": "CRISPR perturbation screens for biomarker discovery",
+      "abstract": "Methods for CRISPR perturbation and biomarker ranking in single-cell datasets.",
+      "keywords": ["CRISPR", "biomarker", "single-cell"],
+      "url": "https://example.org/crispr-screen"
+    }
+  ],
+  "reproducibility": {
+    "rawDataAvailable": true,
+    "analysisCommand": "node scripts/run-biomarker-ranking.js --fixture data/microglia-cohort-manifest.json",
+    "lockfile": "package-lock.json",
+    "testDataAvailable": true,
+    "expectedOutputs": [
+      {
+        "path": "results/biomarker-confidence.csv",
+        "sha256": "f0b094bc2bd4d6ddc71b0b172d21b7fd0ff7ea4bece43e7ea2ed3e8f4c95d7f6"
+      },
+      {
+        "path": "results/reproducibility-summary.json",
+        "sha256": "92e9a7fe1e539a906cbb1b8be838cf6f20f3fbdcfa4d061b2e94817adbb3f2b4"
+      }
+    ],
+    "attempts": [
+      {
+        "id": "attempt-001",
+        "status": "passed",
+        "executedAt": "2026-05-14T19:00:00Z",
+        "runner": "local-sandbox"
+      }
+    ]
+  },
+  "researchCorpus": {
+    "topicClusters": [
+      {
+        "topic": "CRISPR microglia Alzheimer's replication cohorts",
+        "openQuestion": "Which independent cohorts can replicate CRISPR-derived microglia biomarkers?",
+        "keywords": ["CRISPR", "microglia", "Alzheimer's", "replication"],
+        "paperCount": 21,
+        "replicationCount": 1
+      },
+      {
+        "topic": "Late-stage Alzheimer's therapeutic intervention studies",
+        "openQuestion": "Which perturbation-derived biomarkers translate into late-stage therapeutic hypotheses?",
+        "keywords": ["Alzheimer's", "therapy", "biomarkers"],
+        "paperCount": 14,
+        "replicationCount": 5
+      },
+      {
+        "topic": "Single-cell negative results in neurodegeneration",
+        "openQuestion": "Where do negative single-cell CRISPR results prevent overclaiming in neurodegeneration?",
+        "keywords": ["single-cell", "negative results", "neurodegeneration"],
+        "paperCount": 8,
+        "replicationCount": 0
+      }
+    ]
+  },
+  "userContext": {
+    "interests": [
+      "single-cell CRISPR biomarker replication",
+      "Alzheimer's microglia reproducibility"
+    ]
+  }
+}