diff --git a/citation-provenance-readiness/README.md b/citation-provenance-readiness/README.md new file mode 100644 index 0000000..eb4b3d6 --- /dev/null +++ b/citation-provenance-readiness/README.md @@ -0,0 +1,42 @@ +# Citation Provenance Readiness + +This module is a focused slice for SCIBASE issue `#13`, AI-Assisted Research Tools. + +It does not call external AI services or citation APIs. Instead, it gives reviewers a deterministic readiness gate for manuscript claims before a paper leaves the workspace: + +- extracts evidence-backed summary modes for a manuscript packet +- checks whether research claims have citation keys, bibliography records, and local evidence artifacts +- flags citation, statistics, and compliance gaps +- ranks candidate citations with transparent overlap, DOI, recency, and open-access signals +- emits APA and Nature-style insertion plans for reviewer action +- produces a stable digest for audit packets + +## Local Verification + +```bash +cd citation-provenance-readiness +npm run check +npm test +npm run demo +npm run render-demo +``` + +The demo is based on synthetic sample data in `examples/project.json`. + +## Demo Output + +```text +Project: Protocol-Guided Perturbation Screening +Status: hold +Readiness score: 22 +Claims reviewed: 3 +Missing bibliography keys: missing2026 +Top citation insertion: nguyen2025 +Top reviewer action: Add at least one source-backed citation or mark the claim as internal preliminary evidence. +``` + +## Why This Slice Is Distinct + +Existing issue `#13` submissions mostly cover broad AI summarizer, peer-review, and citation demos. This module targets the provenance boundary: whether a concrete manuscript packet has traceable sources, evidence artifacts, statistical context, citation insertions, and compliance statements before an AI-assisted research tool recommends release. + +The implementation is dependency-free, credential-free, and uses synthetic data only. diff --git a/citation-provenance-readiness/docs/demo.mp4 b/citation-provenance-readiness/docs/demo.mp4 new file mode 100644 index 0000000..2a66c00 Binary files /dev/null and b/citation-provenance-readiness/docs/demo.mp4 differ diff --git a/citation-provenance-readiness/docs/demo.svg b/citation-provenance-readiness/docs/demo.svg new file mode 100644 index 0000000..0b1cf16 --- /dev/null +++ b/citation-provenance-readiness/docs/demo.svg @@ -0,0 +1,21 @@ + + + + + Citation provenance readiness + Protocol-Guided Perturbation Screening + + Status + hold + + Score + 22/100 + + Recommended citation + @nguyen2025 + Top reviewer action + Add at least one source-backed citation or mark the claim as internal preliminary evidence. + Missing bibliography + missing2026 + Digest 12a467b7c532f4c9b6e6c8a3 + diff --git a/citation-provenance-readiness/examples/project.json b/citation-provenance-readiness/examples/project.json new file mode 100644 index 0000000..066a0fb --- /dev/null +++ b/citation-provenance-readiness/examples/project.json @@ -0,0 +1,123 @@ +{ + "id": "scibase-protocol-screening-demo", + "title": "Protocol-Guided Perturbation Screening", + "manuscript": { + "sections": [ + { + "id": "abstract", + "title": "Abstract", + "text": "We describe a CRISPR perturbation screen for MAPK stress response signatures and a reproducible notebook workflow for follow-up analysis." + }, + { + "id": "methods", + "title": "Methods", + "text": "The analysis pipeline uses notebooks, a small synthetic dataset, and a protocol checklist to compare perturbation response clusters." + }, + { + "id": "results", + "title": "Results", + "text": "CRISPR perturbation reduced MAPK activation in the treated cohort. The workflow is fully reproducible across all reviewer machines. A secondary endpoint reached p < 0.049 in a small exploratory subset." + }, + { + "id": "limitations", + "title": "Limitations", + "text": "The dataset is synthetic for this demo and should be replaced with controlled-access evidence before external review." + } + ] + }, + "claims": [ + { + "id": "c1", + "sectionId": "results", + "text": "CRISPR perturbation reduced MAPK activation in the treated cohort.", + "citationKeys": ["doe2024"], + "evidenceIds": ["e1"], + "tags": ["CRISPR", "MAPK", "single-cell"] + }, + { + "id": "c2", + "sectionId": "results", + "text": "The workflow is fully reproducible across all reviewer machines.", + "citationKeys": [], + "evidenceIds": ["e2"], + "tags": ["reproducibility", "notebook", "workflow"] + }, + { + "id": "c3", + "sectionId": "results", + "text": "A secondary endpoint reached p < 0.049 in a small exploratory subset.", + "citationKeys": ["missing2026"], + "evidenceIds": [], + "tags": ["statistics", "exploratory", "endpoint"] + } + ], + "bibliography": [ + { + "key": "doe2024", + "title": "Single-cell perturbation screens for pathway response mapping", + "authors": ["Doe, J.", "Ibrahim, S."], + "year": 2024, + "venue": "Open Molecular Systems", + "doi": "10.5555/omols.2024.1001", + "url": "https://example.org/omols-2024-1001" + } + ], + "candidateCitations": [ + { + "key": "nguyen2025", + "title": "Reproducible notebook capsules for computational biology review", + "authors": ["Nguyen, A.", "Patel, R."], + "year": 2025, + "venue": "Journal of Open Reproducibility", + "doi": "10.5555/jor.2025.021", + "url": "https://example.org/jor-2025-021", + "tags": ["reproducibility", "notebook", "workflow", "review"], + "openAccess": true + }, + { + "key": "chen2023", + "title": "Confidence interval reporting for exploratory omics endpoints", + "authors": ["Chen, L.", "Morrison, T."], + "year": 2023, + "venue": "Statistical Methods in Biology", + "doi": "10.5555/smb.2023.44", + "url": "https://example.org/smb-2023-44", + "tags": ["statistics", "exploratory", "confidence interval", "endpoint"], + "openAccess": true + }, + { + "key": "alvarez2022", + "title": "MAPK pathway annotations in single-cell stress models", + "authors": ["Alvarez, M."], + "year": 2022, + "venue": "Cell Systems Notes", + "doi": "", + "url": "https://example.org/cell-systems-notes-mapk", + "tags": ["MAPK", "single-cell", "stress response"], + "openAccess": false + } + ], + "evidenceArtifacts": [ + { + "id": "e1", + "label": "Synthetic perturbation response table", + "type": "dataset", + "checksum": "sha256:2b5cf2c7b8a6d0e5e01b6a5dce0afac0", + "access": "reviewer-visible", + "supports": ["c1"] + }, + { + "id": "e2", + "label": "Notebook replay manifest", + "type": "notebook-manifest", + "checksum": "sha256:7c8e4b1b326b2a4d7efbf2a79d1bb0a5", + "access": "reviewer-visible", + "supports": ["c2"] + } + ], + "compliance": { + "dataAvailability": false, + "ethicsStatement": true, + "codeAvailability": false + } +} diff --git a/citation-provenance-readiness/package.json b/citation-provenance-readiness/package.json new file mode 100644 index 0000000..c4af289 --- /dev/null +++ b/citation-provenance-readiness/package.json @@ -0,0 +1,16 @@ +{ + "name": "citation-provenance-readiness", + "version": "1.0.0", + "description": "Dependency-free citation provenance readiness module for SCIBASE AI-assisted research tools.", + "type": "module", + "scripts": { + "check": "node --check src/citation-provenance-readiness.mjs && node --check tests/citation-provenance-readiness.test.mjs && node --check scripts/render-demo.mjs", + "test": "node --test tests/citation-provenance-readiness.test.mjs", + "demo": "node src/citation-provenance-readiness.mjs examples/project.json", + "render-demo": "node scripts/render-demo.mjs" + }, + "engines": { + "node": ">=20" + }, + "license": "MIT" +} diff --git a/citation-provenance-readiness/requirement-map.md b/citation-provenance-readiness/requirement-map.md new file mode 100644 index 0000000..28119f8 --- /dev/null +++ b/citation-provenance-readiness/requirement-map.md @@ -0,0 +1,34 @@ +# Requirement Map + +Issue `#13` asks for AI-assisted research tools at MVP level. This module covers a narrow, reviewer-ready slice of that workflow. + +## AI Paper Summarizer + +- Abstract-style summary: `buildSummaryModes().abstract` +- Executive summary: `buildSummaryModes().executive` +- Layperson explanation: `buildSummaryModes().layperson` +- Key findings and next actions: CLI output and `diagnostics[].action` + +## AI Peer Review Aid + +- Missing citation and bibliography checks: `buildClaimDiagnostics` +- Statistical context checks for p-value claims without confidence intervals: `buildClaimDiagnostics` +- Data/code/ethics availability checks: `buildProjectDiagnostics` +- Reviewer-ready actions: `diagnostics[].action` +- Stable audit digest: `digest` + +## AI Citation Tool + +- Citation candidate ranking: `rankCandidateCitations` +- Transparent reasons for each recommendation: `recommendedCitations[].reasons` +- APA and Nature formatting: `formatCitation` +- One-click style insertion text: `citationInsertions[].insertionText` +- Similar-source style tags: candidate and claim tag overlap scoring + +## Safety And Reviewability + +- No external API calls +- No live credentials, private data, payment data, or user identity data +- Synthetic demo data only +- Deterministic test coverage in `tests/citation-provenance-readiness.test.mjs` +- Demo artifact generated by `npm run render-demo` diff --git a/citation-provenance-readiness/scripts/render-demo.mjs b/citation-provenance-readiness/scripts/render-demo.mjs new file mode 100644 index 0000000..c251463 --- /dev/null +++ b/citation-provenance-readiness/scripts/render-demo.mjs @@ -0,0 +1,46 @@ +import { mkdirSync, writeFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import project from "../examples/project.json" with { type: "json" }; +import { buildCitationProvenanceReadiness } from "../src/citation-provenance-readiness.mjs"; + +const root = dirname(dirname(fileURLToPath(import.meta.url))); +const docsDir = join(root, "docs"); +mkdirSync(docsDir, { recursive: true }); + +const report = buildCitationProvenanceReadiness(project); +const topInsertion = report.citationInsertions[0]; +const topDiagnostic = report.diagnostics[0]; +const svg = ` + + + + Citation provenance readiness + ${escapeXml(report.title)} + + Status + ${escapeXml(report.readinessStatus)} + + Score + ${report.readinessScore}/100 + + Recommended citation + @${escapeXml(topInsertion.recommendedCitations[0].key)} + Top reviewer action + ${escapeXml(topDiagnostic.action)} + Missing bibliography + ${escapeXml(report.missingBibliography.join(", ") || "none")} + Digest ${report.digest.slice(0, 24)} + +`; + +writeFileSync(join(docsDir, "demo.svg"), svg); +console.log(`Wrote ${join(docsDir, "demo.svg")}`); + +function escapeXml(value) { + return String(value) + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll('"', """); +} diff --git a/citation-provenance-readiness/src/citation-provenance-readiness.mjs b/citation-provenance-readiness/src/citation-provenance-readiness.mjs new file mode 100644 index 0000000..d952576 --- /dev/null +++ b/citation-provenance-readiness/src/citation-provenance-readiness.mjs @@ -0,0 +1,404 @@ +import { createHash } from "node:crypto"; +import { readFileSync } from "node:fs"; +import { fileURLToPath } from "node:url"; + +const STOP_WORDS = new Set([ + "about", + "after", + "against", + "between", + "because", + "before", + "could", + "during", + "every", + "fully", + "their", + "there", + "these", + "those", + "through", + "under", + "using", + "where", + "which", + "while", + "without" +]); + +export function buildCitationProvenanceReadiness(project) { + const normalized = normalizeProject(project); + const claims = normalized.claims.map((claim) => enrichClaim(claim, normalized)); + const bibliography = buildBibliographyIndex(normalized.bibliography); + const citedKeys = unique(claims.flatMap((claim) => claim.citationKeys)); + const missingBibliography = citedKeys.filter((key) => !bibliography.has(key)); + const unusedBibliography = normalized.bibliography + .map((entry) => entry.key) + .filter((key) => !citedKeys.includes(key)); + + const citationInsertions = claims + .filter((claim) => claim.citationKeys.length === 0 || claim.citationKeys.some((key) => !bibliography.has(key))) + .map((claim) => buildInsertionPlan(claim, normalized.candidateCitations)); + + const diagnostics = [ + ...buildClaimDiagnostics(claims, bibliography), + ...buildProjectDiagnostics(normalized) + ]; + + const severityCounts = countSeverities(diagnostics); + const readinessScore = Math.max( + 0, + 100 - severityCounts.high * 18 - severityCounts.medium * 8 - severityCounts.low * 3 + ); + + const readinessStatus = + severityCounts.high > 0 ? "hold" : severityCounts.medium > 0 ? "review-needed" : "ready"; + + const summaries = buildSummaryModes(normalized, claims, diagnostics); + const reportCore = { + projectId: normalized.id, + title: normalized.title, + readinessStatus, + readinessScore, + claimCount: claims.length, + evidenceArtifactCount: normalized.evidenceArtifacts.length, + citedKeys, + missingBibliography, + unusedBibliography, + diagnostics, + citationInsertions, + summaries + }; + + return { + ...reportCore, + digest: digest(reportCore) + }; +} + +export function normalizeProject(project) { + if (!project || typeof project !== "object") { + throw new TypeError("Project input must be an object."); + } + + return { + id: requiredString(project.id, "id"), + title: requiredString(project.title, "title"), + manuscript: { + sections: array(project.manuscript?.sections, "manuscript.sections").map((section) => ({ + id: requiredString(section.id, "section.id"), + title: requiredString(section.title, "section.title"), + text: requiredString(section.text, "section.text") + })) + }, + claims: array(project.claims, "claims").map((claim) => ({ + id: requiredString(claim.id, "claim.id"), + sectionId: requiredString(claim.sectionId, "claim.sectionId"), + text: requiredString(claim.text, "claim.text"), + citationKeys: array(claim.citationKeys ?? [], "claim.citationKeys"), + evidenceIds: array(claim.evidenceIds ?? [], "claim.evidenceIds"), + tags: array(claim.tags ?? [], "claim.tags") + })), + bibliography: array(project.bibliography ?? [], "bibliography").map((entry) => ({ + key: requiredString(entry.key, "bibliography.key"), + title: requiredString(entry.title, "bibliography.title"), + authors: array(entry.authors ?? [], "bibliography.authors"), + year: Number(entry.year), + venue: entry.venue ?? "", + doi: entry.doi ?? "", + url: entry.url ?? "" + })), + candidateCitations: array(project.candidateCitations ?? [], "candidateCitations").map((entry) => ({ + key: requiredString(entry.key, "candidateCitations.key"), + title: requiredString(entry.title, "candidateCitations.title"), + authors: array(entry.authors ?? [], "candidateCitations.authors"), + year: Number(entry.year), + venue: entry.venue ?? "", + doi: entry.doi ?? "", + url: entry.url ?? "", + tags: array(entry.tags ?? [], "candidateCitations.tags"), + openAccess: Boolean(entry.openAccess) + })), + evidenceArtifacts: array(project.evidenceArtifacts ?? [], "evidenceArtifacts").map((artifact) => ({ + id: requiredString(artifact.id, "evidenceArtifacts.id"), + label: requiredString(artifact.label, "evidenceArtifacts.label"), + type: requiredString(artifact.type, "evidenceArtifacts.type"), + checksum: artifact.checksum ?? "", + access: artifact.access ?? "unknown", + supports: array(artifact.supports ?? [], "evidenceArtifacts.supports") + })), + compliance: { + dataAvailability: Boolean(project.compliance?.dataAvailability), + ethicsStatement: Boolean(project.compliance?.ethicsStatement), + codeAvailability: Boolean(project.compliance?.codeAvailability) + } + }; +} + +export function rankCandidateCitations(claim, candidateCitations) { + const claimTerms = new Set([...extractTerms(claim.text), ...claim.tags.map(normalizeTerm)]); + + return candidateCitations + .map((candidate) => { + const candidateTerms = new Set([ + ...extractTerms(candidate.title), + ...candidate.tags.map(normalizeTerm), + ...extractTerms(candidate.venue) + ]); + const overlap = [...claimTerms].filter((term) => candidateTerms.has(term)); + const recencyBoost = Number.isFinite(candidate.year) ? Math.max(0, Math.min(8, candidate.year - 2018)) : 0; + const doiBoost = candidate.doi ? 4 : 0; + const openAccessBoost = candidate.openAccess ? 3 : 0; + const score = overlap.length * 8 + recencyBoost + doiBoost + openAccessBoost; + + return { + key: candidate.key, + title: candidate.title, + score, + reasons: [ + overlap.length ? `matches ${overlap.slice(0, 5).join(", ")}` : "low lexical overlap", + candidate.openAccess ? "open access" : "access needs review", + candidate.doi ? "has DOI" : "missing DOI" + ], + formatted: { + apa: formatCitation(candidate, "apa"), + nature: formatCitation(candidate, "nature") + } + }; + }) + .sort((a, b) => b.score - a.score || a.key.localeCompare(b.key)); +} + +export function formatCitation(entry, style = "apa") { + const authors = entry.authors?.length ? entry.authors.join(", ") : "Unknown"; + const year = Number.isFinite(entry.year) ? entry.year : "n.d."; + + if (style === "nature") { + return `${authors}. ${entry.title}. ${entry.venue || "Unpublished"} (${year}).`; + } + + const doi = entry.doi ? ` https://doi.org/${entry.doi}` : entry.url ? ` ${entry.url}` : ""; + return `${authors} (${year}). ${entry.title}. ${entry.venue || "Unpublished"}.${doi}`; +} + +function enrichClaim(claim, project) { + const section = project.manuscript.sections.find((candidate) => candidate.id === claim.sectionId); + const evidence = claim.evidenceIds + .map((id) => project.evidenceArtifacts.find((artifact) => artifact.id === id)) + .filter(Boolean); + + return { + ...claim, + sectionTitle: section?.title ?? "Unknown section", + evidence, + hasStatisticalClaim: /\bp\s*[<=>]\s*0?\.\d+|\bstatistically significant\b/i.test(claim.text), + hasConfidenceInterval: /\bconfidence interval\b|\bci\b|95%/i.test(claim.text) + }; +} + +function buildBibliographyIndex(entries) { + return new Map(entries.map((entry) => [entry.key, entry])); +} + +function buildInsertionPlan(claim, candidateCitations) { + const ranked = rankCandidateCitations(claim, candidateCitations).slice(0, 3); + const top = ranked[0] ?? null; + + return { + claimId: claim.id, + claimText: claim.text, + sectionTitle: claim.sectionTitle, + currentCitationKeys: claim.citationKeys, + recommendedCitations: ranked, + insertionText: top + ? `${claim.text} @${top.key}` + : claim.text, + reviewerNote: top + ? `Add @${top.key} or another source that supports the claim before release.` + : "No candidate citation scored above zero; request a domain reviewer source." + }; +} + +function buildClaimDiagnostics(claims, bibliography) { + const diagnostics = []; + + for (const claim of claims) { + if (claim.citationKeys.length === 0) { + diagnostics.push({ + id: `claim-${claim.id}-citation-gap`, + severity: "high", + claimId: claim.id, + area: "citation", + message: "Claim has no supporting citation key.", + action: "Add at least one source-backed citation or mark the claim as internal preliminary evidence." + }); + } + + for (const key of claim.citationKeys) { + if (!bibliography.has(key)) { + diagnostics.push({ + id: `claim-${claim.id}-missing-${key}`, + severity: "high", + claimId: claim.id, + area: "bibliography", + message: `Citation key @${key} is used but missing from the bibliography.`, + action: `Add a bibliography record for @${key} or replace it with a verified source.` + }); + } + } + + if (claim.evidence.length === 0) { + diagnostics.push({ + id: `claim-${claim.id}-evidence-gap`, + severity: "medium", + claimId: claim.id, + area: "evidence", + message: "Claim is not linked to a local evidence artifact.", + action: "Attach a dataset, notebook, protocol, or reviewer note with a stable checksum." + }); + } + + if (claim.hasStatisticalClaim && !claim.hasConfidenceInterval) { + diagnostics.push({ + id: `claim-${claim.id}-statistical-context`, + severity: "medium", + claimId: claim.id, + area: "statistics", + message: "Statistical claim mentions significance without confidence interval context.", + action: "Add effect size and confidence interval context before pre-review." + }); + } + } + + return diagnostics; +} + +function buildProjectDiagnostics(project) { + const diagnostics = []; + const fullText = project.manuscript.sections.map((section) => section.text).join("\n"); + + if (!project.compliance.dataAvailability && /dataset|data|cohort|sample/i.test(fullText)) { + diagnostics.push({ + id: "project-data-availability", + severity: "high", + area: "compliance", + message: "Manuscript discusses data but the data availability statement is not ready.", + action: "Add a data availability statement or explain controlled-access restrictions." + }); + } + + if (!project.compliance.codeAvailability && /notebook|pipeline|script|model/i.test(fullText)) { + diagnostics.push({ + id: "project-code-availability", + severity: "medium", + area: "compliance", + message: "Manuscript discusses executable work but code availability is not ready.", + action: "Publish or attach the relevant notebook, script, or reproducibility runbook." + }); + } + + if (/patient|human subject|clinical/i.test(fullText) && !project.compliance.ethicsStatement) { + diagnostics.push({ + id: "project-ethics-statement", + severity: "medium", + area: "compliance", + message: "Human-subject language appears without an ethics statement.", + action: "Add IRB, consent, or non-human-subject justification before external review." + }); + } + + return diagnostics; +} + +function buildSummaryModes(project, claims, diagnostics) { + const topTags = unique(claims.flatMap((claim) => claim.tags)).slice(0, 5); + const highCount = diagnostics.filter((diagnostic) => diagnostic.severity === "high").length; + const supportedClaims = claims.filter((claim) => claim.citationKeys.length > 0 && claim.evidence.length > 0).length; + + return { + abstract: `${project.title} was screened for citation provenance across ${claims.length} claims. ${supportedClaims} claims already link both citations and local evidence, while ${highCount} high-priority provenance gaps need review.`, + executive: `Readiness depends on closing ${highCount} high-priority gaps before release. Main topics checked: ${topTags.join(", ")}.`, + layperson: `This report checks whether important research claims have trustworthy sources and evidence files before the paper is shared.` + }; +} + +function countSeverities(diagnostics) { + return diagnostics.reduce( + (counts, diagnostic) => { + counts[diagnostic.severity] += 1; + return counts; + }, + { high: 0, medium: 0, low: 0 } + ); +} + +function extractTerms(text) { + return unique( + String(text) + .toLowerCase() + .replace(/[^a-z0-9\s-]/g, " ") + .split(/\s+/) + .map(normalizeTerm) + .filter((term) => term.length > 4 && !STOP_WORDS.has(term)) + ); +} + +function normalizeTerm(value) { + return String(value).toLowerCase().replace(/[^a-z0-9-]/g, ""); +} + +function digest(value) { + return createHash("sha256").update(stableStringify(value)).digest("hex"); +} + +function stableStringify(value) { + if (Array.isArray(value)) { + return `[${value.map(stableStringify).join(",")}]`; + } + + if (value && typeof value === "object") { + return `{${Object.keys(value) + .sort() + .map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`) + .join(",")}}`; + } + + return JSON.stringify(value); +} + +function unique(values) { + return [...new Set(values.filter((value) => value !== undefined && value !== null && value !== ""))]; +} + +function requiredString(value, fieldName) { + if (typeof value !== "string" || value.trim() === "") { + throw new TypeError(`${fieldName} must be a non-empty string.`); + } + + return value; +} + +function array(value, fieldName) { + if (!Array.isArray(value)) { + throw new TypeError(`${fieldName} must be an array.`); + } + + return value; +} + +if (process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1]) { + const inputPath = process.argv[2] ?? new URL("../examples/project.json", import.meta.url); + const project = JSON.parse(readFileSync(inputPath, "utf8")); + const report = buildCitationProvenanceReadiness(project); + const topInsertion = report.citationInsertions[0]; + const topDiagnostic = report.diagnostics[0]; + + console.log(`Project: ${report.title}`); + console.log(`Status: ${report.readinessStatus}`); + console.log(`Readiness score: ${report.readinessScore}`); + console.log(`Claims reviewed: ${report.claimCount}`); + console.log(`Missing bibliography keys: ${report.missingBibliography.join(", ") || "none"}`); + console.log(`Top citation insertion: ${topInsertion?.recommendedCitations[0]?.key ?? "none"}`); + console.log(`Top reviewer action: ${topDiagnostic?.action ?? "No action needed."}`); + console.log(`Digest: ${report.digest}`); +} diff --git a/citation-provenance-readiness/tests/citation-provenance-readiness.test.mjs b/citation-provenance-readiness/tests/citation-provenance-readiness.test.mjs new file mode 100644 index 0000000..6ab11ce --- /dev/null +++ b/citation-provenance-readiness/tests/citation-provenance-readiness.test.mjs @@ -0,0 +1,49 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import { + buildCitationProvenanceReadiness, + formatCitation, + normalizeProject, + rankCandidateCitations +} from "../src/citation-provenance-readiness.mjs"; +import sampleProject from "../examples/project.json" with { type: "json" }; + +describe("citation provenance readiness", () => { + it("builds a hold report when bibliography and citation coverage are incomplete", () => { + const report = buildCitationProvenanceReadiness(sampleProject); + + assert.equal(report.readinessStatus, "hold"); + assert.ok(report.readinessScore < 80); + assert.deepEqual(report.missingBibliography, ["missing2026"]); + assert.ok(report.diagnostics.some((diagnostic) => diagnostic.id === "claim-c2-citation-gap")); + assert.ok(report.diagnostics.some((diagnostic) => diagnostic.id === "claim-c3-statistical-context")); + }); + + it("ranks open citation candidates with explainable reasons", () => { + const project = normalizeProject(sampleProject); + const claim = project.claims.find((candidate) => candidate.id === "c2"); + const ranked = rankCandidateCitations(claim, project.candidateCitations); + + assert.equal(ranked[0].key, "nguyen2025"); + assert.ok(ranked[0].score > ranked[1].score); + assert.ok(ranked[0].reasons.some((reason) => reason.includes("open access"))); + }); + + it("produces stable summaries, insertions, and digest", () => { + const first = buildCitationProvenanceReadiness(sampleProject); + const second = buildCitationProvenanceReadiness(sampleProject); + + assert.equal(first.digest, second.digest); + assert.match(first.summaries.abstract, /citation provenance/); + assert.equal(first.citationInsertions[0].claimId, "c2"); + assert.match(first.citationInsertions[0].insertionText, /@nguyen2025/); + }); + + it("formats citation suggestions for reviewer-ready insertion plans", () => { + const project = normalizeProject(sampleProject); + const citation = project.candidateCitations.find((entry) => entry.key === "nguyen2025"); + + assert.match(formatCitation(citation, "apa"), /Nguyen, A\./); + assert.match(formatCitation(citation, "nature"), /Reproducible notebook capsules/); + }); +});