diff --git a/research-assistant-protocol-trace/README.md b/research-assistant-protocol-trace/README.md new file mode 100644 index 0000000..deddafa --- /dev/null +++ b/research-assistant-protocol-trace/README.md @@ -0,0 +1,42 @@ +# Research Assistant Protocol Trace + +Self-contained contribution for SCIBASE issue #16, focused on a protocol-trace and reproducibility-readiness slice of the AI-Powered Research Assistant Suite. + +The module helps reviewers answer: + +- Are manuscript claims aligned with preregistered protocol commitments? +- Which evidence artifacts and citations support each claim? +- Is the project reproducible enough to release or review? +- Which research gaps follow from the manuscript limitations and user interests? + +It is deterministic, dependency-free, credential-free, and uses synthetic sample data only. + +## Run + +```bash +cd research-assistant-protocol-trace +npm run check +npm test +npm run demo +``` + +Demo recording: `docs/protocol-trace-demo.mp4` + +## What It Includes + +- `src/protocol-trace.js` - core scoring, claim tracing, evidence mapping, reproducibility checks, peer-review recommendations, and research-gap ranking. +- `sample-data.json` - synthetic manuscript, protocol, evidence, citation, reproducibility, and corpus fixture. +- `test.js` - Node assertion tests for alignment, release decision, reproducibility regression, and validation errors. +- `demo.js` - CLI reviewer demo that prints the protocol trace packet. +- `docs/requirement-map.md` - direct mapping to issue #16 requirements. + +## Design Notes + +This is intentionally not a generic LLM wrapper. It models the assistant as an evidence-first deterministic quality gate that can run before a manuscript is released: + +- Primary claims without protocol matches are held for revision. +- Claims with weak evidence or missing citations become peer-review findings. +- Reproducibility confidence is computed from concrete artifacts and run history. +- Gap recommendations are ranked from manuscript limitations, researcher interests, replication scarcity, and topic activity. + +The result is a reviewer-ready trace packet with an audit digest, so future backend/API work can persist and compare assistant runs. diff --git a/research-assistant-protocol-trace/demo.js b/research-assistant-protocol-trace/demo.js new file mode 100644 index 0000000..a45760a --- /dev/null +++ b/research-assistant-protocol-trace/demo.js @@ -0,0 +1,38 @@ +"use strict"; + +const fs = require("node:fs"); +const path = require("node:path"); +const { analyzeResearchAssistantSuite } = require("./src/protocol-trace"); + +const samplePath = path.join(__dirname, "sample-data.json"); +const bundle = JSON.parse(fs.readFileSync(samplePath, "utf8")); +const report = analyzeResearchAssistantSuite(bundle); + +console.log("Research Assistant Protocol Trace Demo"); +console.log("======================================"); +console.log(`Assistant run: ${report.assistantRunId}`); +console.log(`Readiness score: ${report.readinessScore}`); +console.log(`Release decision: ${report.releaseDecision}`); +console.log(`Reproducibility: ${report.reproducibility.confidenceBand} (${report.reproducibility.confidenceScore}/100)`); +console.log(""); + +console.log("Claim trace"); +for (const trace of report.claimTraces) { + const drift = trace.driftSignals.length ? ` | drift: ${trace.driftSignals.join("; ")}` : ""; + console.log(`- ${trace.claimId}: ${trace.status} -> ${trace.matchedCommitmentId || "no protocol match"} (${trace.alignmentScore})${drift}`); +} +console.log(""); + +console.log("Peer-review recommendations"); +for (const item of report.peerReview.recommendations) { + console.log(`- [${item.severity}] ${item.topic}: ${item.message}`); +} +console.log(""); + +console.log("Research opportunities"); +for (const gap of report.researchGaps) { + console.log(`- ${gap.topic} (${gap.opportunityScore}): ${gap.openQuestion}`); +} +console.log(""); + +console.log(`Audit digest: ${report.auditDigest}`); diff --git a/research-assistant-protocol-trace/docs/protocol-trace-demo.mp4 b/research-assistant-protocol-trace/docs/protocol-trace-demo.mp4 new file mode 100644 index 0000000..ded97ed Binary files /dev/null and b/research-assistant-protocol-trace/docs/protocol-trace-demo.mp4 differ diff --git a/research-assistant-protocol-trace/docs/requirement-map.md b/research-assistant-protocol-trace/docs/requirement-map.md new file mode 100644 index 0000000..2d7ced6 --- /dev/null +++ b/research-assistant-protocol-trace/docs/requirement-map.md @@ -0,0 +1,15 @@ +# Requirement Map + +This module contributes a focused protocol-trace slice for issue #16, not a second broad assistant-suite demo. + +| Issue #16 requirement | Implementation evidence | +| --- | --- | +| Auto peer review reports | `generatePeerReviewReport()` emits severity-ranked review recommendations for protocol drift, evidence coverage, reproducibility, and manuscript completeness. | +| Clarity/methodological/statistical red flags | The peer-review packet flags unregistered primary claims, evidence blockers, missing manuscript sections, and reproducibility gaps. | +| Claims vs. evidence alignment | `traceClaimsToProtocol()` links manuscript claims to protocol commitments; `buildEvidenceMap()` links claims to evidence artifacts and citations. | +| Reproducibility checker | `evaluateReproducibility()` scores raw data, analysis command, environment lock, expected output hashes, attempts, and test data. | +| Links to previous reproducibility attempts | The sample bundle includes prior attempts; the report surfaces the last attempt and next runbook step. | +| Research gap finder | `findResearchGaps()` ranks research opportunities from user interests, manuscript limitations, activity level, and replication gaps. | +| Research opportunities feed | The demo prints ranked opportunity topics with rationale and open questions. | +| Reviewer-ready output | `analyzeResearchAssistantSuite()` returns claim traces, evidence maps, reproducibility checks, peer-review recommendations, research gaps, readiness score, release decision, and an audit digest. | +| Local verification | `npm run check`, `npm test`, and `npm run demo` run without external services or credentials. | diff --git a/research-assistant-protocol-trace/package.json b/research-assistant-protocol-trace/package.json new file mode 100644 index 0000000..3756d51 --- /dev/null +++ b/research-assistant-protocol-trace/package.json @@ -0,0 +1,13 @@ +{ + "name": "research-assistant-protocol-trace", + "version": "1.0.0", + "description": "Deterministic AI research assistant protocol trace and reproducibility readiness module for SCIBASE issue 16.", + "main": "src/protocol-trace.js", + "scripts": { + "check": "node --check src/protocol-trace.js && node --check demo.js && node --check test.js", + "demo": "node demo.js", + "test": "node test.js" + }, + "license": "MIT", + "private": true +} diff --git a/research-assistant-protocol-trace/sample-data.json b/research-assistant-protocol-trace/sample-data.json new file mode 100644 index 0000000..247f484 --- /dev/null +++ b/research-assistant-protocol-trace/sample-data.json @@ -0,0 +1,157 @@ +{ + "generatedAt": "2026-05-15T00:00:00Z", + "manuscript": { + "id": "ms-crispr-ad-001", + "title": "Single-cell CRISPR perturbation markers for early Alzheimer's risk", + "claims": [ + { + "id": "claim-primary-biomarker", + "text": "CRISPR perturbation of microglia identifies APOE-regulated single-cell biomarkers associated with early Alzheimer's progression.", + "resultType": "primary", + "direction": "positive", + "population": "microglia single-cell cohort" + }, + { + "id": "claim-secondary-model", + "text": "A reproducible notebook pipeline ranks biomarker candidates with stable confidence across two validation folds.", + "resultType": "secondary", + "direction": "positive", + "population": "validation folds" + }, + { + "id": "claim-exploratory-therapy", + "text": "The same evidence suggests a therapeutic intervention path for late-stage disease.", + "resultType": "exploratory", + "direction": "positive", + "population": "late-stage patients" + } + ], + "limitations": [ + "The study has limited replication across independent Alzheimer's cohorts.", + "Therapeutic claims require follow-up experiments outside the discovery dataset." + ], + "sections": { + "ethics": "IRB exempt secondary analysis of deidentified public data.", + "dataAvailability": "Synthetic fixture data and checksums are included in the export manifest.", + "limitations": "Independent cohort replication remains future work." + } + }, + "protocol": { + "id": "proto-crispr-ad-001", + "commitments": [ + { + "id": "commit-primary-biomarker", + "question": "Do APOE-regulated CRISPR perturbations identify microglia biomarkers for early Alzheimer's progression?", + "endpoint": "ranked biomarker candidates with confidence intervals", + "analysisPlan": "Run the preregistered single-cell notebook and compare top markers across validation folds.", + "expectedDirection": "positive", + "population": "microglia single-cell cohort" + }, + { + "id": "commit-validation", + "question": "Are biomarker rankings reproducible across two validation folds?", + "endpoint": "stable confidence score in both validation folds", + "analysisPlan": "Execute deterministic ranking pipeline and compare artifact hashes.", + "expectedDirection": "positive", + "population": "validation folds" + } + ] + }, + "evidenceArtifacts": [ + { + "id": "artifact-notebook", + "kind": "notebook", + "title": "Single-cell CRISPR biomarker ranking notebook", + "description": "Runs preregistered APOE microglia perturbation analysis and produces ranked biomarker candidates.", + "findings": "APOE-regulated CRISPR perturbation markers remain stable across validation folds.", + "path": "notebooks/crispr-biomarker-ranking.ipynb" + }, + { + "id": "artifact-dataset", + "kind": "dataset", + "title": "Microglia single-cell cohort manifest", + "description": "Deidentified single-cell input matrix with fold assignment and checksum metadata.", + "findings": "Dataset supports early Alzheimer's progression endpoint and validation fold analysis.", + "path": "data/microglia-cohort-manifest.json" + }, + { + "id": "artifact-results", + "kind": "result", + "title": "Biomarker confidence table", + "description": "Confidence intervals and fold stability for ranked biomarker candidates.", + "findings": "Top biomarker confidence remains stable across two validation folds.", + "path": "results/biomarker-confidence.csv" + } + ], + "citations": [ + { + "id": "cite-apoe-microglia", + "title": "APOE regulation in microglia and Alzheimer's progression", + "abstract": "Review of APOE-regulated microglia pathways in Alzheimer's disease.", + "keywords": ["APOE", "microglia", "Alzheimer's", "single-cell"], + "doi": "10.0000/scibase.apoe.microglia" + }, + { + "id": "cite-crispr-screen", + "title": "CRISPR perturbation screens for biomarker discovery", + "abstract": "Methods for CRISPR perturbation and biomarker ranking in single-cell datasets.", + "keywords": ["CRISPR", "biomarker", "single-cell"], + "url": "https://example.org/crispr-screen" + } + ], + "reproducibility": { + "rawDataAvailable": true, + "analysisCommand": "node scripts/run-biomarker-ranking.js --fixture data/microglia-cohort-manifest.json", + "lockfile": "package-lock.json", + "testDataAvailable": true, + "expectedOutputs": [ + { + "path": "results/biomarker-confidence.csv", + "sha256": "f0b094bc2bd4d6ddc71b0b172d21b7fd0ff7ea4bece43e7ea2ed3e8f4c95d7f6" + }, + { + "path": "results/reproducibility-summary.json", + "sha256": "92e9a7fe1e539a906cbb1b8be838cf6f20f3fbdcfa4d061b2e94817adbb3f2b4" + } + ], + "attempts": [ + { + "id": "attempt-001", + "status": "passed", + "executedAt": "2026-05-14T19:00:00Z", + "runner": "local-sandbox" + } + ] + }, + "researchCorpus": { + "topicClusters": [ + { + "topic": "CRISPR microglia Alzheimer's replication cohorts", + "openQuestion": "Which independent cohorts can replicate CRISPR-derived microglia biomarkers?", + "keywords": ["CRISPR", "microglia", "Alzheimer's", "replication"], + "paperCount": 21, + "replicationCount": 1 + }, + { + "topic": "Late-stage Alzheimer's therapeutic intervention studies", + "openQuestion": "Which perturbation-derived biomarkers translate into late-stage therapeutic hypotheses?", + "keywords": ["Alzheimer's", "therapy", "biomarkers"], + "paperCount": 14, + "replicationCount": 5 + }, + { + "topic": "Single-cell negative results in neurodegeneration", + "openQuestion": "Where do negative single-cell CRISPR results prevent overclaiming in neurodegeneration?", + "keywords": ["single-cell", "negative results", "neurodegeneration"], + "paperCount": 8, + "replicationCount": 0 + } + ] + }, + "userContext": { + "interests": [ + "single-cell CRISPR biomarker replication", + "Alzheimer's microglia reproducibility" + ] + } +} diff --git a/research-assistant-protocol-trace/src/protocol-trace.js b/research-assistant-protocol-trace/src/protocol-trace.js new file mode 100644 index 0000000..8bedb4d --- /dev/null +++ b/research-assistant-protocol-trace/src/protocol-trace.js @@ -0,0 +1,446 @@ +"use strict"; + +const crypto = require("node:crypto"); + +const DEFAULT_REVIEW_WEIGHTS = Object.freeze({ + claimProtocolAlignment: 30, + evidenceCoverage: 25, + reproducibility: 25, + citationReadiness: 10, + gapNovelty: 10, +}); + +function stableHash(value) { + return crypto + .createHash("sha256") + .update(JSON.stringify(canonicalize(value))) + .digest("hex"); +} + +function canonicalize(value) { + if (Array.isArray(value)) { + return value.map(canonicalize); + } + if (value && typeof value === "object") { + return Object.keys(value) + .sort() + .reduce((result, key) => { + result[key] = canonicalize(value[key]); + return result; + }, {}); + } + return value; +} + +function normalizeText(value) { + return String(value || "") + .toLowerCase() + .replace(/[^a-z0-9\s-]/g, " ") + .replace(/\s+/g, " ") + .trim(); +} + +function tokenize(value) { + const stopWords = new Set([ + "a", + "an", + "and", + "are", + "as", + "at", + "be", + "by", + "for", + "from", + "in", + "into", + "is", + "of", + "on", + "or", + "our", + "that", + "the", + "this", + "to", + "with", + ]); + + return normalizeText(value) + .split(" ") + .map((token) => { + if (token.endsWith("ies") && token.length > 5) return `${token.slice(0, -3)}y`; + if (token.endsWith("ions") && token.length > 6) return token.slice(0, -1); + if (token.endsWith("ing") && token.length > 6) return token.slice(0, -3); + if (token.endsWith("ed") && token.length > 5) return token.slice(0, -2); + if (token.endsWith("s") && token.length > 4) return token.slice(0, -1); + return token; + }) + .filter((token) => token.length > 2 && !stopWords.has(token)); +} + +function overlapScore(left, right) { + const leftTokens = new Set(tokenize(left)); + const rightTokens = new Set(tokenize(right)); + + if (leftTokens.size === 0 || rightTokens.size === 0) { + return 0; + } + + let matches = 0; + for (const token of leftTokens) { + if (rightTokens.has(token)) { + matches += 1; + } + } + + return matches / Math.max(leftTokens.size, rightTokens.size); +} + +function classifyRisk(score, thresholds = { low: 0.75, medium: 0.45 }) { + if (score >= thresholds.low) return "low"; + if (score >= thresholds.medium) return "medium"; + return "high"; +} + +function bestMatch(item, candidates, textSelector) { + let best = null; + for (const candidate of candidates) { + const score = overlapScore(item, textSelector(candidate)); + if (!best || score > best.score) { + best = { candidate, score }; + } + } + return best || { candidate: null, score: 0 }; +} + +function traceClaimsToProtocol(bundle) { + const claims = bundle.manuscript.claims || []; + const commitments = bundle.protocol.commitments || []; + + return claims.map((claim) => { + const match = bestMatch(claim.text, commitments, (commitment) => + [commitment.question, commitment.endpoint, commitment.analysisPlan] + .filter(Boolean) + .join(" ") + ); + + const driftSignals = []; + if (claim.resultType === "primary" && match.score < 0.35) { + driftSignals.push("primary claim has weak preregistered support"); + } + const hasProtocolMatch = match.score >= 0.25; + if (hasProtocolMatch && claim.direction && match.candidate?.expectedDirection && claim.direction !== match.candidate.expectedDirection) { + driftSignals.push("claim direction differs from protocol expectation"); + } + if (hasProtocolMatch && claim.population && match.candidate?.population && normalizeText(claim.population) !== normalizeText(match.candidate.population)) { + driftSignals.push("claim population differs from protocol population"); + } + + const status = match.score >= 0.42 && driftSignals.length === 0 + ? "aligned" + : match.score >= 0.25 + ? "review-needed" + : "unregistered"; + + return { + claimId: claim.id, + claim: claim.text, + matchedCommitmentId: hasProtocolMatch ? match.candidate?.id || null : null, + alignmentScore: round(match.score), + status, + driftSignals, + }; + }); +} + +function buildEvidenceMap(bundle, claimTraces) { + const artifacts = bundle.evidenceArtifacts || []; + const citations = bundle.citations || []; + + return claimTraces.map((trace) => { + const claim = bundle.manuscript.claims.find((item) => item.id === trace.claimId); + const supportingArtifacts = artifacts + .map((artifact) => ({ + artifactId: artifact.id, + kind: artifact.kind, + integrityHash: artifact.integrityHash || stableHash(artifact), + score: overlapScore(claim.text, [artifact.title, artifact.description, artifact.findings].filter(Boolean).join(" ")), + })) + .filter((artifact) => artifact.score >= 0.2) + .sort((left, right) => right.score - left.score); + + const supportingCitations = citations + .map((citation) => ({ + citationId: citation.id, + title: citation.title, + styleReady: Boolean(citation.doi || citation.url), + score: overlapScore(claim.text, [citation.title, citation.abstract, citation.keywords?.join(" ")].filter(Boolean).join(" ")), + })) + .filter((citation) => citation.score >= 0.18) + .sort((left, right) => right.score - left.score); + + const evidenceScore = Math.min(1, supportingArtifacts.length * 0.28 + supportingCitations.length * 0.18); + const blockers = []; + + if (supportingArtifacts.length === 0) blockers.push("no evidence artifact linked to claim"); + if (supportingCitations.length === 0) blockers.push("no supporting citation linked to claim"); + if (claim.resultType === "primary" && supportingArtifacts.length < 2) { + blockers.push("primary claim should have at least two supporting artifacts"); + } + + return { + claimId: trace.claimId, + evidenceScore: round(evidenceScore), + risk: classifyRisk(evidenceScore), + supportingArtifacts: supportingArtifacts.slice(0, 3), + supportingCitations: supportingCitations.slice(0, 3), + blockers, + }; + }); +} + +function evaluateReproducibility(bundle) { + const pipeline = bundle.reproducibility || {}; + const checks = [ + { + id: "raw-data", + label: "Raw data is present or linked", + passed: Boolean(pipeline.rawDataAvailable), + weight: 18, + }, + { + id: "clean-pipeline", + label: "Clean analysis pipeline is declared", + passed: Boolean(pipeline.analysisCommand), + weight: 20, + }, + { + id: "environment", + label: "Execution environment is pinned", + passed: Boolean(pipeline.containerImage || pipeline.condaEnvironment || pipeline.lockfile), + weight: 18, + }, + { + id: "expected-output", + label: "Expected outputs are declared with checksums", + passed: Array.isArray(pipeline.expectedOutputs) && pipeline.expectedOutputs.every((item) => item.path && item.sha256), + weight: 18, + }, + { + id: "attempt-history", + label: "Previous reproducibility attempts are tracked", + passed: Array.isArray(pipeline.attempts) && pipeline.attempts.length > 0, + weight: 12, + }, + { + id: "test-set", + label: "Validation or test dataset is available", + passed: Boolean(pipeline.testDataAvailable), + weight: 14, + }, + ]; + + const score = checks.reduce((sum, check) => sum + (check.passed ? check.weight : 0), 0); + const failed = checks.filter((check) => !check.passed).map((check) => check.id); + const lastAttempt = (pipeline.attempts || []).slice(-1)[0] || null; + + return { + confidenceScore: score, + confidenceBand: score >= 80 ? "strong" : score >= 55 ? "moderate" : "weak", + checks, + failed, + lastAttempt, + nextRunbookStep: failed.length === 0 + ? "Run the declared analysis command in a sandbox and attach output hashes." + : `Resolve reproducibility gap: ${failed[0]}.`, + }; +} + +function generatePeerReviewReport(bundle, claimTraces, evidenceMap, reproducibility) { + const recommendations = []; + + for (const trace of claimTraces) { + if (trace.status === "unregistered") { + recommendations.push({ + severity: "high", + topic: "Protocol drift", + message: `Claim ${trace.claimId} is not linked to a preregistered protocol commitment.`, + }); + } else if (trace.driftSignals.length > 0) { + recommendations.push({ + severity: "medium", + topic: "Protocol consistency", + message: `Claim ${trace.claimId} needs review: ${trace.driftSignals.join("; ")}.`, + }); + } + } + + for (const evidence of evidenceMap) { + if (evidence.blockers.length > 0) { + recommendations.push({ + severity: evidence.risk === "high" ? "high" : "medium", + topic: "Evidence coverage", + message: `Claim ${evidence.claimId}: ${evidence.blockers.join("; ")}.`, + }); + } + } + + if (reproducibility.confidenceScore < 80) { + recommendations.push({ + severity: reproducibility.confidenceScore < 55 ? "high" : "medium", + topic: "Reproducibility", + message: reproducibility.nextRunbookStep, + }); + } + + const missingSections = ["ethics", "dataAvailability", "limitations"].filter( + (section) => !bundle.manuscript.sections?.[section] + ); + for (const section of missingSections) { + recommendations.push({ + severity: section === "ethics" ? "medium" : "low", + topic: "Manuscript completeness", + message: `Add or verify the ${section} section before release.`, + }); + } + + return { + manuscriptId: bundle.manuscript.id, + recommendationCount: recommendations.length, + recommendations, + releaseReadiness: recommendations.some((item) => item.severity === "high") + ? "hold" + : recommendations.some((item) => item.severity === "medium") + ? "conditional" + : "ready", + }; +} + +function findResearchGaps(bundle) { + const interests = bundle.userContext?.interests || []; + const clusters = bundle.researchCorpus?.topicClusters || []; + const limitations = bundle.manuscript.limitations || []; + + return clusters + .map((cluster) => { + const interestScore = Math.max( + ...interests.map((interest) => overlapScore(interest, [cluster.topic, cluster.keywords?.join(" ")].join(" "))), + 0 + ); + const limitationScore = Math.max( + ...limitations.map((limitation) => overlapScore(limitation, [cluster.topic, cluster.openQuestion].join(" "))), + 0 + ); + const replicationGap = cluster.replicationCount === 0 + ? 1 + : Math.max(0, 1 - cluster.replicationCount / Math.max(cluster.paperCount, 1)); + const activityScore = Math.min(1, cluster.paperCount / 50); + const opportunityScore = 0.35 * interestScore + 0.25 * limitationScore + 0.25 * replicationGap + 0.15 * activityScore; + + return { + topic: cluster.topic, + openQuestion: cluster.openQuestion, + opportunityScore: round(opportunityScore), + why: [ + interestScore >= 0.25 ? "matches researcher interests" : null, + limitationScore >= 0.2 ? "connects to manuscript limitations" : null, + replicationGap >= 0.8 ? "low replication coverage" : null, + activityScore >= 0.5 ? "active research area" : null, + ].filter(Boolean), + }; + }) + .filter((item) => item.opportunityScore >= 0.2) + .sort((left, right) => right.opportunityScore - left.opportunityScore) + .slice(0, 5); +} + +function computeReadinessScore(claimTraces, evidenceMap, reproducibility, gaps, weights = DEFAULT_REVIEW_WEIGHTS) { + const alignment = claimTraces.length === 0 + ? 0 + : claimTraces.reduce((sum, trace) => sum + (trace.status === "aligned" ? 1 : trace.status === "review-needed" ? 0.55 : 0), 0) / claimTraces.length; + const evidence = evidenceMap.length === 0 + ? 0 + : evidenceMap.reduce((sum, item) => sum + item.evidenceScore, 0) / evidenceMap.length; + const reproducibilityScore = reproducibility.confidenceScore / 100; + const citationReadiness = evidenceMap.length === 0 + ? 0 + : evidenceMap.reduce((sum, item) => sum + (item.supportingCitations.some((citation) => citation.styleReady) ? 1 : 0), 0) / evidenceMap.length; + const gapNovelty = gaps.length > 0 ? Math.min(1, gaps[0].opportunityScore + 0.2) : 0.2; + + const score = + alignment * weights.claimProtocolAlignment + + evidence * weights.evidenceCoverage + + reproducibilityScore * weights.reproducibility + + citationReadiness * weights.citationReadiness + + gapNovelty * weights.gapNovelty; + + return Math.round(score); +} + +function analyzeResearchAssistantSuite(bundle) { + validateBundle(bundle); + + const claimTraces = traceClaimsToProtocol(bundle); + const evidenceMap = buildEvidenceMap(bundle, claimTraces); + const reproducibility = evaluateReproducibility(bundle); + const researchGaps = findResearchGaps(bundle); + const peerReview = generatePeerReviewReport(bundle, claimTraces, evidenceMap, reproducibility); + const readinessScore = computeReadinessScore(claimTraces, evidenceMap, reproducibility, researchGaps); + + return { + assistantRunId: `rar-${stableHash({ + manuscript: bundle.manuscript.id, + protocol: bundle.protocol.id, + generatedAt: bundle.generatedAt || "deterministic", + }).slice(0, 12)}`, + readinessScore, + releaseDecision: readinessScore >= 82 && peerReview.releaseReadiness === "ready" + ? "ready-for-release" + : readinessScore >= 65 && peerReview.releaseReadiness !== "hold" + ? "needs-targeted-review" + : "hold-for-revision", + claimTraces, + evidenceMap, + reproducibility, + peerReview, + researchGaps, + auditDigest: stableHash({ claimTraces, evidenceMap, reproducibility, researchGaps }), + }; +} + +function validateBundle(bundle) { + const requiredTopLevel = ["manuscript", "protocol", "evidenceArtifacts", "reproducibility"]; + for (const key of requiredTopLevel) { + if (!bundle || !bundle[key]) { + throw new Error(`missing required bundle field: ${key}`); + } + } + if (!Array.isArray(bundle.manuscript.claims)) { + throw new Error("manuscript.claims must be an array"); + } + if (!Array.isArray(bundle.protocol.commitments)) { + throw new Error("protocol.commitments must be an array"); + } + if (!Array.isArray(bundle.evidenceArtifacts)) { + throw new Error("evidenceArtifacts must be an array"); + } +} + +function round(value) { + return Math.round(value * 1000) / 1000; +} + +module.exports = { + analyzeResearchAssistantSuite, + buildEvidenceMap, + classifyRisk, + computeReadinessScore, + evaluateReproducibility, + findResearchGaps, + generatePeerReviewReport, + normalizeText, + overlapScore, + stableHash, + traceClaimsToProtocol, +}; diff --git a/research-assistant-protocol-trace/test.js b/research-assistant-protocol-trace/test.js new file mode 100644 index 0000000..4108231 --- /dev/null +++ b/research-assistant-protocol-trace/test.js @@ -0,0 +1,69 @@ +"use strict"; + +const assert = require("node:assert/strict"); +const fs = require("node:fs"); +const path = require("node:path"); +const { + analyzeResearchAssistantSuite, + evaluateReproducibility, + overlapScore, + traceClaimsToProtocol, +} = require("./src/protocol-trace"); + +const samplePath = path.join(__dirname, "sample-data.json"); +const sample = JSON.parse(fs.readFileSync(samplePath, "utf8")); + +function clone(value) { + return JSON.parse(JSON.stringify(value)); +} + +{ + const score = overlapScore( + "CRISPR microglia biomarkers for Alzheimer's progression", + "APOE microglia CRISPR biomarker progression" + ); + assert.ok(score > 0.45, `expected meaningful overlap, got ${score}`); +} + +{ + const traces = traceClaimsToProtocol(sample); + assert.equal(traces.length, 3); + assert.equal(traces[0].status, "aligned"); + assert.equal(traces[1].status, "aligned"); + assert.equal(traces[2].status, "unregistered"); + assert.equal(traces[2].matchedCommitmentId, null); +} + +{ + const report = analyzeResearchAssistantSuite(sample); + assert.equal(report.releaseDecision, "hold-for-revision"); + assert.ok(report.readinessScore >= 60 && report.readinessScore <= 85); + assert.equal(report.reproducibility.confidenceBand, "strong"); + assert.ok(report.peerReview.recommendations.some((item) => item.topic === "Protocol drift")); + assert.ok(report.researchGaps[0].topic.includes("CRISPR microglia")); + assert.match(report.auditDigest, /^[a-f0-9]{64}$/); + + const changed = clone(sample); + changed.manuscript.claims[0].text = "A changed biomarker claim should alter the audit digest"; + assert.notEqual(analyzeResearchAssistantSuite(changed).auditDigest, report.auditDigest); +} + +{ + const weak = clone(sample); + weak.reproducibility.rawDataAvailable = false; + weak.reproducibility.expectedOutputs = [{ path: "results/out.csv" }]; + weak.reproducibility.attempts = []; + const reproducibility = evaluateReproducibility(weak); + assert.equal(reproducibility.confidenceBand, "weak"); + assert.ok(reproducibility.failed.includes("raw-data")); + assert.ok(reproducibility.failed.includes("expected-output")); + assert.ok(reproducibility.failed.includes("attempt-history")); +} + +{ + const missing = clone(sample); + delete missing.protocol; + assert.throws(() => analyzeResearchAssistantSuite(missing), /missing required bundle field: protocol/); +} + +console.log("research-assistant-protocol-trace tests passed");