SCIBASE-AI · kiheon0709 · May 15, 2026
diff --git a/research-assistant-evidence-grounding/README.md b/research-assistant-evidence-grounding/README.md
@@ -0,0 +1,62 @@
+# Research Assistant Evidence Grounding
+
+Self-contained MVP module for issue #16, **AI-Powered Research Assistant Suite**. It focuses on evidence-grounded assistant behavior: claims-vs-evidence mapping, pre-submission peer review, reproducibility readiness, and research gap prioritization.
+
+## What It Covers
+
+- Auto peer-review reports grounded in manuscript sections, domain templates, and evidence alignment.
+- Claims-vs-evidence matrix for citations, datasets, protocols, statistical analyses, and invalid/missing references.
+- Reproducibility checker for environment lockfiles, raw data checksums, pipeline steps, reported output consistency, and prior attempt links.
+- Research gap finder that ranks unresolved questions, low-replication clusters, negative signals, and user/lab fit.
+- Aggregated assistant brief with release status, blockers, top gap, audit hashes, and reviewer-ready signals.
+
+## Run
+
+```bash
+npm run check
+```
+
+That runs:
+
+```bash
+npm test
+npm run demo
+```
+
+Expected demo shape:
+
+```json
+{
+  "projectId": "project-ai-biomarker-002",
+  "status": "needs_researcher_attention",
+  "evidenceCoverage": 0.667,
+  "peerReviewRecommendation": "revise_before_release",
+  "reproducibilityConfidence": 100,
+  "topResearchGap": "gap-spatial-microglia"
+}
+```
+
+## Requirement Map
+
+| Issue #16 requirement | Implementation evidence |
+| --- | --- |
+| Auto peer review reports with clarity, methodology, missing citations, and claims-vs-evidence alignment | `generatePeerReviewReport()` applies domain templates and emits severity-tagged findings for clarity, methodology, and weak claims. |
+| Adaptive templates per domain | `DOMAIN_TEMPLATES` maps molecular biology, clinical trials, quantum physics, and generic reviewer lenses/risks. |
+| Researcher feedback before release/internal review | `buildAssistantBrief()` combines evidence coverage, peer-review recommendation, blockers, and readiness status. |
+| Reproducibility checker for code/notebooks, dependencies, raw data, and reported results | `runReproducibilityCheck()` scores environment lockfiles, raw data checksums, pipeline steps, reported outputs, and prior attempts. |
+| Reproducibility confidence score and prior attempt links | The reproducibility result includes `confidenceScore`, `status`, `blockers`, and `attemptLinks`. |
+| Research gap finder for under-studied intersections, unresolved questions, low replication, negative results, and user fit | `findResearchGaps()` ranks corpus opportunities by unresolved questions, replication count, negative signals, interests, and lab capabilities. |
+| Project-aware AI research assistant output | `buildEvidenceMap()` and `buildAssistantBrief()` generate deterministic audit hashes and reviewer-ready assistant summaries. |
+
+## Files
+
+- `src/index.js` - assistant rules and exported functions.
+- `src/cli.js` - reviewer demo command.
+- `sample/assistant-fixture.json` - manuscript, evidence library, reproducibility, and corpus fixture.
+- `test/assistant.test.js` - regression tests for normalization, evidence mapping, peer review, reproducibility, gap ranking, and brief aggregation.
+- `docs/demo.svg` - visual walkthrough for PR review.
+- `docs/demo.mp4` - short generated video walkthrough for maintainers who prefer an inline demo artifact.
+
+## Notes
+
+This module is dependency-free and credential-free. It is designed as a deterministic foundation for a future LLM provider adapter, sandbox execution runner, citation index, and UI workflow.
diff --git a/research-assistant-evidence-grounding/docs/demo.mp4 b/research-assistant-evidence-grounding/docs/demo.mp4
diff --git a/research-assistant-evidence-grounding/docs/demo.svg b/research-assistant-evidence-grounding/docs/demo.svg
diff --git a/research-assistant-evidence-grounding/package.json b/research-assistant-evidence-grounding/package.json
@@ -0,0 +1,12 @@
+{
+  "name": "research-assistant-evidence-grounding",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module",
+  "description": "Dependency-free evidence-grounding assistant module for SCIBASE issue #16.",
+  "scripts": {
+    "test": "node --test test/*.test.js",
+    "demo": "node src/cli.js",
+    "check": "npm test && npm run demo"
+  }
+}
diff --git a/research-assistant-evidence-grounding/sample/assistant-fixture.json b/research-assistant-evidence-grounding/sample/assistant-fixture.json
@@ -0,0 +1,92 @@
+{
+  "projectId": "project-ai-biomarker-002",
+  "domain": "molecular_biology",
+  "researcherInterests": ["single-cell", "alzheimers", "biomarkers"],
+  "labCapabilities": ["single-cell-rna-seq", "mouse-model", "spatial-transcriptomics"],
+  "manuscript": {
+    "title": "Single-cell inflammatory biomarker panel for early Alzheimer's progression",
+    "abstract": "We present a single-cell RNA sequencing analysis of inflammatory signatures associated with early Alzheimer's progression. The study combines a curated cohort, reproducible processing workflow, and pathway enrichment review to prioritize candidate biomarkers for follow-up validation.",
+    "sections": [
+      { "name": "Methods", "text": "Batch-effect controls, differential expression thresholds, and pathway enrichment strategy are described with protocol references." },
+      { "name": "Results", "text": "A three-gene inflammatory panel stratifies early-stage samples and aligns with microglial activation literature." }
+    ],
+    "claims": [
+      {
+        "id": "claim-panel-accuracy",
+        "text": "The three-gene inflammatory panel separates early-stage Alzheimer's samples from controls.",
+        "importance": "high",
+        "expectedEvidence": ["dataset", "statistical-analysis", "protocol"],
+        "artifacts": ["dataset-cohort", "analysis-differential-expression", "protocol-processing"],
+        "citations": ["citation-microglia-review"]
+      },
+      {
+        "id": "claim-pathway-novelty",
+        "text": "The pathway intersection is under-studied in spatial transcriptomics replication cohorts.",
+        "importance": "medium",
+        "expectedEvidence": ["literature-scan"],
+        "artifacts": ["gap-scan-spatial"],
+        "citations": ["citation-negative-results"]
+      },
+      {
+        "id": "claim-clinical-readiness",
+        "text": "The panel is ready for clinical deployment.",
+        "importance": "high",
+        "expectedEvidence": ["clinical-validation", "ethics-approval"],
+        "artifacts": ["analysis-differential-expression"],
+        "citations": []
+      }
+    ]
+  },
+  "evidenceLibrary": [
+    { "id": "dataset-cohort", "type": "dataset", "title": "Curated single-cell cohort", "year": 2026, "checksum": "sha256:cohort", "reproducible": true },
+    { "id": "analysis-differential-expression", "type": "statistical-analysis", "title": "Differential expression notebook", "year": 2026, "checksum": "sha256:analysis", "reproducible": true },
+    { "id": "protocol-processing", "type": "protocol", "title": "Cell filtering and integration protocol", "year": 2025, "peerReviewed": true },
+    { "id": "citation-microglia-review", "type": "literature-scan", "title": "Microglial activation review", "year": 2023, "peerReviewed": true },
+    { "id": "gap-scan-spatial", "type": "literature-scan", "title": "Spatial transcriptomics gap scan", "year": 2026, "reproducible": true },
+    { "id": "citation-negative-results", "type": "literature-scan", "title": "Negative replication signals in neuroinflammation", "year": 2024, "peerReviewed": true }
+  ],
+  "reproducibility": {
+    "environment": { "type": "docker", "lockfile": "renv.lock" },
+    "rawData": { "available": true, "checksum": "sha256:raw" },
+    "pipelineSteps": [
+      { "id": "ingest", "command": "Rscript scripts/ingest.R", "input": "raw/", "output": "derived/cell-matrix.rds" },
+      { "id": "model", "command": "Rscript scripts/model.R", "input": "derived/cell-matrix.rds", "output": "results/panel.json" }
+    ],
+    "reportedResults": [
+      { "metric": "panel_auc", "expected": 0.88, "observed": 0.88 },
+      { "metric": "signature_count", "expected": 3, "observed": 3 }
+    ],
+    "previousAttempts": [
+      { "status": "passed", "url": "https://scibase.example/repro/project-ai-biomarker-002/attempt-1" }
+    ]
+  },
+  "corpus": [
+    {
+      "id": "gap-spatial-microglia",
+      "title": "Spatial replication of microglial inflammatory biomarkers",
+      "tags": ["single-cell", "alzheimers", "spatial-transcriptomics"],
+      "requiredCapabilities": ["spatial-transcriptomics"],
+      "replicationCount": 0,
+      "unresolvedQuestions": ["Do spatial niches preserve the single-cell inflammatory signature?", "Which cell-cell interactions explain false positives?"],
+      "negativeSignals": ["bulk RNA-seq replication was inconsistent"]
+    },
+    {
+      "id": "gap-mouse-model-transfer",
+      "title": "Mouse model transferability of human inflammatory panel",
+      "tags": ["alzheimers", "mouse-model"],
+      "requiredCapabilities": ["mouse-model"],
+      "replicationCount": 1,
+      "unresolvedQuestions": ["Which model captures early-stage microglial state?"],
+      "negativeSignals": []
+    },
+    {
+      "id": "well-covered-proteomics",
+      "title": "Proteomics validation of established amyloid markers",
+      "tags": ["proteomics"],
+      "requiredCapabilities": ["mass-spec"],
+      "replicationCount": 6,
+      "unresolvedQuestions": [],
+      "negativeSignals": []
+    }
+  ]
+}
diff --git a/research-assistant-evidence-grounding/src/cli.js b/research-assistant-evidence-grounding/src/cli.js
@@ -0,0 +1,25 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import path from 'node:path';
+import { buildAssistantBrief, buildEvidenceMap, generatePeerReviewReport, runReproducibilityCheck, findResearchGaps } from './index.js';
+
+const fixturePath = process.argv[2] || path.join(import.meta.dirname, '..', 'sample', 'assistant-fixture.json');
+const project = JSON.parse(fs.readFileSync(fixturePath, 'utf8'));
+const evidenceMap = buildEvidenceMap(project);
+const peerReview = generatePeerReviewReport(project, evidenceMap);
+const reproducibility = runReproducibilityCheck(project);
+const gaps = findResearchGaps(project);
+const brief = buildAssistantBrief(project);
+
+console.log(JSON.stringify({
+  projectId: brief.projectId,
+  status: brief.status,
+  evidenceCoverage: brief.evidenceCoverage,
+  peerReviewRecommendation: brief.peerReviewRecommendation,
+  reproducibilityConfidence: brief.reproducibilityConfidence,
+  topResearchGap: brief.topResearchGap?.id,
+  weakClaims: evidenceMap.weakClaims.map((claim) => claim.claimId),
+  peerReviewFindings: peerReview.findings.length,
+  gapCount: gaps.opportunities.length,
+  auditHash: brief.auditHash
+}, null, 2));