diff --git a/.env.example b/.env.example index ea632d4..d703de5 100644 --- a/.env.example +++ b/.env.example @@ -2,6 +2,7 @@ APP_ID= PRIVATE_KEY= WEBHOOK_SECRET= MARKETPLACE_WEBHOOK_SECRET= +# Used by PR security scans. Contributor trust scoring runs locally. BRIN_API_BASE=https://api.brin.sh PORT=3000 LOG_LEVEL=info diff --git a/.gitignore b/.gitignore index b9eabdc..f1d6900 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ node_modules/ dist/ +data/ .env *.log .DS_Store diff --git a/README.md b/README.md index 2954c2f..ee5a5b8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# Brin GitHub App +# Superagent GitHub App -A GitHub App that automatically scans pull requests for security threats and evaluates contributor trust profiles using the [Brin](https://brin.sh) API. +A GitHub App that automatically scans pull requests for security threats and evaluates contributor trust profiles. ## What it does @@ -8,7 +8,7 @@ When installed on a repository, the app reacts to pull request events and runs t **PR Security Scan** -- Analyzes the PR diff for security threats (credential leaks, obfuscated payloads, dependency attacks, etc.) and reports a score from 0-100 with a verdict. -**Contributor Trust Check** -- Evaluates the PR author's GitHub profile across identity, behavior, content, and social graph dimensions to flag accounts that warrant additional review. +**Contributor Trust Check** -- Evaluates the PR author's GitHub profile across identity, behavior, and content dimensions to flag accounts that warrant additional review. Results are surfaced as: @@ -39,7 +39,7 @@ Create a new GitHub App at `https://github.com/settings/apps/new` with these set - Checks: Read & Write - Pull requests: Read & Write - Issues: Read & Write (for PR comments) -- Contents: Read (for `.github/brin.yml` config) +- Contents: Read (for repository config) - Metadata: Read **Webhook events:** @@ -61,6 +61,8 @@ PRIVATE_KEY= WEBHOOK_SECRET= ``` +Contributor trust scoring runs locally in this app and uses the GitHub App installation token to fetch profile and activity signals. + ### 3. Install dependencies and run ```bash @@ -78,7 +80,7 @@ Go to your app's installation page and install it on the repositories you want t ## Repo configuration -Repositories can optionally add a `.github/brin.yml` file to customize behavior: +Repositories can optionally add a configuration file to customize behavior: ```yaml prScan: @@ -112,21 +114,23 @@ src/ ├── services/ │ ├── prScan.ts # PR scan orchestration │ ├── contributorTrust.ts # Contributor trust orchestration +│ ├── contributorScanner.ts # Local contributor scoring facade +│ ├── githubContributor.ts # GitHub profile/activity signal collection │ ├── checkRuns.ts # GitHub Check Runs API wrapper │ ├── comments.ts # Marker-based comment management + rendering │ ├── labels.ts # Label ensure/set logic -│ └── config.ts # .github/brin.yml loader +│ └── config.ts # Repository config loader └── lib/ ├── env.ts # Environment variable validation ├── logger.ts # Structured logging (pino) ├── types.ts # Shared types, constants, label/marker defs - ├── brinApi.ts # Brin API HTTP client + ├── contributorScoring.ts # Contributor scoring formulas └── policy.ts # Verdict evaluation and threshold logic ``` ## Re-running checks -Maintainers can re-run any Brin check from the GitHub UI by clicking "Re-run" on the check run. The app handles `check_run.rerequested` events and re-executes the corresponding scan. +Maintainers can re-run any Superagent check from the GitHub UI by clicking "Re-run" on the check run. The app handles `check_run.rerequested` events and re-executes the corresponding scan. ## Development diff --git a/src/lib/__tests__/brinApi.test.ts b/src/lib/__tests__/brinApi.test.ts index cc820b6..4d4b713 100644 --- a/src/lib/__tests__/brinApi.test.ts +++ b/src/lib/__tests__/brinApi.test.ts @@ -1,5 +1,5 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; -import { scanContributor, scanPr } from "../brinApi.js"; +import { scanPr } from "../brinApi.js"; vi.mock("../logger.js", () => ({ childLogger: () => ({ @@ -37,31 +37,3 @@ describe("scanPr", () => { ); }); }); - -describe("scanContributor", () => { - beforeEach(() => { - process.env.BRIN_API_BASE = "https://brin.example"; - vi.unstubAllGlobals(); - }); - - it("forwards a GitHub token for private contributor access", async () => { - const fetchMock = vi.fn().mockResolvedValue( - new Response(JSON.stringify({ score: 90, verdict: "safe" }), { - status: 200, - headers: { "content-type": "application/json" }, - }), - ); - vi.stubGlobal("fetch", fetchMock); - - await scanContributor("octocat", { - githubToken: "ghs_installation_token", - }); - - expect(fetchMock).toHaveBeenCalledWith( - "https://brin.example/contributor/octocat?details=true&mode=full", - expect.objectContaining({ - headers: { "x-github-token": "ghs_installation_token" }, - }), - ); - }); -}); diff --git a/src/lib/__tests__/contributorScoring.test.ts b/src/lib/__tests__/contributorScoring.test.ts new file mode 100644 index 0000000..166d81b --- /dev/null +++ b/src/lib/__tests__/contributorScoring.test.ts @@ -0,0 +1,216 @@ +import { describe, expect, it } from "vitest"; +import { + compositeScore, + emptyActivitySummary, + scoreContributor, + scoreContributorBehavior, + scoreContributorContent, + scoreContributorIdentity, + verdictForScore, + type ActivitySummary, + type PrSummary, + type UserProfile, +} from "../contributorScoring.js"; + +const NOW = new Date("2026-01-15T12:00:00.000Z"); +const MS_PER_DAY = 24 * 60 * 60 * 1000; + +function profile(ageDays?: number, email?: string): UserProfile { + return { + login: "testuser", + accountAgeDays: ageDays, + publicRepos: 10, + followers: 50, + email, + reposContributedTo: [], + orgs: [], + hasGpgKeys: false, + }; +} + +function activity( + totalEvents: number, + lastEventDaysAgo: number | undefined, + oldestEventDaysAgo: number | undefined, + distinctRepos7d: number, + hasForkOnly: boolean, +): ActivitySummary { + return { + ...emptyActivitySummary(), + repos: ["owner/repo"], + totalEvents, + lastEventAt: + lastEventDaysAgo == null + ? undefined + : new Date(NOW.getTime() - lastEventDaysAgo * MS_PER_DAY), + oldestEventAt: + oldestEventDaysAgo == null + ? undefined + : new Date(NOW.getTime() - oldestEventDaysAgo * MS_PER_DAY), + pushCount: totalEvents, + prCount: hasForkOnly ? 0 : 1, + distinctRepos7d, + hasForkOnlyActivity: hasForkOnly, + }; +} + +describe("contributor identity scoring", () => { + it("scores established contributors high", () => { + const p = profile(2000, "user@example.com"); + const [score, threats] = scoreContributorIdentity(p, true, ["rust-lang", "tokio-rs"]); + expect(score).toBe(100); + expect(threats).toEqual([]); + }); + + it("flags new accounts with no trust signals", () => { + const [score, threats] = scoreContributorIdentity(profile(5), false, []); + expect(score).toBe(8); + expect(threats).toHaveLength(1); + expect(threats[0]?.type).toBe("malicious_new_account"); + }); + + it("caps org bonus and distinguishes corporate email", () => { + const manyOrgs = Array.from({ length: 5 }, (_, i) => `org-${i}`); + const [orgScore] = scoreContributorIdentity(profile(400, "dev@gmail.com"), false, manyOrgs); + const [freeScore] = scoreContributorIdentity(profile(1000, "dev@gmail.com")); + const [corpScore] = scoreContributorIdentity(profile(1000, "dev@acme.co")); + + expect(orgScore).toBe(93); + expect(freeScore).toBe(73); + expect(corpScore).toBe(78); + }); + + it("counts profile metadata and contribution volume bonuses", () => { + const p = profile(4000, "dev@company.io"); + p.company = "Company Inc"; + p.followers = 5000; + p.totalContributions = 20000; + p.blog = "https://dev.company.io"; + p.xUsername = "devhandle"; + p.bio = "Staff engineer"; + + const [score, threats] = scoreContributorIdentity(p, true, ["rust-lang"]); + expect(score).toBe(100); + expect(threats).toEqual([]); + }); +}); + +describe("contributor behavior scoring", () => { + it("scores normal active contributors high", () => { + const [score, threats] = scoreContributorBehavior( + activity(30, 1, 60, 3, false), + 1000, + NOW, + ); + expect(score).toBe(90); + expect(threats).toEqual([]); + }); + + it("detects dormant accounts with a narrow activity spike", () => { + const [score, threats] = scoreContributorBehavior( + activity(25, 0, 5, 4, false), + 365, + NOW, + ); + expect(score).toBe(65); + expect(threats[0]?.type).toBe("sleeper_account"); + }); + + it("penalizes cross-repo velocity and fork-only activity", () => { + const [velocityScore] = scoreContributorBehavior(activity(50, 1, 30, 12, false), 500, NOW); + const [forkOnlyScore] = scoreContributorBehavior(activity(10, 2, 20, 2, true), 30, NOW); + + expect(velocityScore).toBe(70); + expect(forkOnlyScore).toBe(80); + }); + + it("detects PR spray, unsolicited PRs, rejected PRs, and low merge rate", () => { + const spray = activity(10, 0, 1, 5, false); + spray.prOpened24h = 7; + spray.prOpenedCount = 7; + spray.prTargetRepos7d = 5; + expect(scoreContributorBehavior(spray, 90, NOW)[1].some((t) => t.type === "pr_spray")) + .toBe(true); + + const unsolicited = activity(10, 0, 7, 3, false); + unsolicited.prOpenedCount = 5; + unsolicited.unsolicitedPrRatio = 0.9; + unsolicited.unsolicitedPrRepoCount = 4; + unsolicited.prTargetRepos = ["a/1", "b/2", "c/3", "d/4", "e/5"]; + expect(scoreContributorBehavior(unsolicited, 200, NOW)[0]).toBe(65); + + const rejected = activity(10, 0, 7, 3, false); + rejected.prRejectedRepos = 4; + rejected.prClosedNotMerged = 4; + expect(scoreContributorBehavior(rejected, 200, NOW)[1].some((t) => t.type === "pr_rejected_across_repos")) + .toBe(true); + + const lowMerge = activity(10, 0, 7, 3, false); + lowMerge.prClosedNotMerged = 6; + lowMerge.unsolicitedPrRatio = 0.5; + expect(scoreContributorBehavior(lowMerge, 200, NOW)[1].some((t) => t.type === "low_merge_rate")) + .toBe(true); + }); +}); + +describe("contributor content scoring", () => { + it("keeps content at 100 when there are no recent PRs", () => { + expect(scoreContributorContent(emptyActivitySummary(), 0)).toEqual([100, []]); + }); + + it("penalizes empty bodies and missing issue linkage", () => { + const prs: PrSummary[] = [0, 5, 10, 2, 3].map((bodyLen, index) => ({ + title: `fix: typo ${index}`, + bodyLen, + hasIssueRef: false, + repo: `a/${index}`, + })); + const summary = { ...emptyActivitySummary(), prOpenedCount: 5, recentPrs: prs }; + + const [score, threats] = scoreContributorContent(summary, 0); + expect(score).toBe(65); + expect(threats.some((t) => t.type === "no_issue_linkage")).toBe(true); + }); + + it("penalizes low effort PRs to unfamiliar repos", () => { + const summary = { + ...emptyActivitySummary(), + prOpenedCount: 4, + recentPrs: [1, 2, 3, 80].map((bodyLen, index) => ({ + title: `fix: ${index}`, + bodyLen, + hasIssueRef: index === 3, + repo: `a/${index}`, + })), + }; + + const [score, threats] = scoreContributorContent(summary, 0.75); + expect(score).toBe(55); + expect(threats.some((t) => t.type === "low_effort_pr")).toBe(true); + }); +}); + +describe("contributor composite scoring", () => { + it("uses only identity, behavior, and content weights", () => { + expect(compositeScore({ identity: 90, behavior: 80, content: 70 })).toBe(81); + }); + + it("uses conservative verdict boundaries", () => { + expect(verdictForScore(80)).toBe("safe"); + expect(verdictForScore(79)).toBe("caution"); + expect(verdictForScore(49)).toBe("suspicious"); + expect(verdictForScore(19)).toBe("dangerous"); + }); + + it("caps high confidence because contributor graph is not used", () => { + const p = profile(4000, "dev@company.io"); + p.followers = 5000; + p.totalContributions = 20000; + p.orgs = ["rust-lang", "tokio-rs"]; + p.hasGpgKeys = true; + const result = scoreContributor(p, activity(30, 1, 60, 3, false), NOW); + + expect(result.score).toBeGreaterThanOrEqual(85); + expect(result.confidence).toBe("medium"); + }); +}); diff --git a/src/lib/brinApi.ts b/src/lib/brinApi.ts index e202d4a..d97e3b7 100644 --- a/src/lib/brinApi.ts +++ b/src/lib/brinApi.ts @@ -1,6 +1,6 @@ import { env } from "./env.js"; import { childLogger } from "./logger.js"; -import type { PrScanResult, ContributorResult } from "./types.js"; +import type { PrScanResult } from "./types.js"; export async function scanPr( owner: string, @@ -32,31 +32,3 @@ export async function scanPr( return {}; } } - -export async function scanContributor( - login: string, - options: { githubToken?: string } = {}, -): Promise { - const url = `${env.brinApiBase}/contributor/${login}?details=true&mode=full`; - const log = childLogger({ service: "brin-api", endpoint: "contributor", login }); - const headers = options.githubToken - ? { "x-github-token": options.githubToken } - : undefined; - - try { - const res = await fetch(url, { - headers, - signal: AbortSignal.timeout(30_000), - }); - if (!res.ok) { - log.warn({ status: res.status }, "Brin contributor API returned non-OK status"); - return {}; - } - const data = (await res.json()) as ContributorResult; - log.info({ score: data.score, verdict: data.verdict }, "Contributor scan response"); - return data; - } catch (err) { - log.error({ err }, "Brin contributor API request failed"); - return {}; - } -} diff --git a/src/lib/contributorScoring.ts b/src/lib/contributorScoring.ts new file mode 100644 index 0000000..1ca5182 --- /dev/null +++ b/src/lib/contributorScoring.ts @@ -0,0 +1,371 @@ +import type { ContributorResult } from "./types.js"; + +const MS_PER_DAY = 24 * 60 * 60 * 1000; + +const FREE_EMAIL_DOMAINS = new Set([ + "gmail.com", + "googlemail.com", + "outlook.com", + "hotmail.com", + "live.com", + "yahoo.com", + "yahoo.co.jp", + "icloud.com", + "me.com", + "protonmail.com", + "proton.me", + "aol.com", + "mail.com", + "yandex.com", + "gmx.com", + "gmx.de", +]); + +export interface ContributorDimensions { + identity: number; + behavior: number; + content: number; +} + +export interface ContributorThreat { + type: string; + severity: string; + detail: string; +} + +export interface UserProfile { + nodeId?: string; + login: string; + accountAgeDays?: number; + publicRepos: number; + followers: number; + company?: string; + email?: string; + blog?: string; + bio?: string; + xUsername?: string; + totalContributions?: number; + reposContributedTo: string[]; + orgs: string[]; + hasGpgKeys: boolean; +} + +export interface PrSummary { + title: string; + bodyLen: number; + hasIssueRef: boolean; + repo: string; +} + +export interface ActivitySummary { + repos: string[]; + totalEvents: number; + lastEventAt?: Date; + oldestEventAt?: Date; + pushCount: number; + prCount: number; + reviewCount: number; + distinctRepos7d: number; + hasForkOnlyActivity: boolean; + prOpenedCount: number; + prTargetRepos7d: number; + prOpened24h: number; + prTargetRepos: string[]; + prClosedNotMerged: number; + prMergedCount: number; + prRejectedRepos: number; + unsolicitedPrRatio: number; + unsolicitedPrRepoCount: number; + recentPrs: PrSummary[]; +} + +export interface ContributorScoreResult { + dimensions: ContributorDimensions; + score: number; + verdict: string; + confidence: string; + threats: ContributorThreat[]; +} + +export function emptyActivitySummary(): ActivitySummary { + return { + repos: [], + totalEvents: 0, + pushCount: 0, + prCount: 0, + reviewCount: 0, + distinctRepos7d: 0, + hasForkOnlyActivity: false, + prOpenedCount: 0, + prTargetRepos7d: 0, + prOpened24h: 0, + prTargetRepos: [], + prClosedNotMerged: 0, + prMergedCount: 0, + prRejectedRepos: 0, + unsolicitedPrRatio: 0, + unsolicitedPrRepoCount: 0, + recentPrs: [], + }; +} + +export function scoreContributorIdentity( + profile: UserProfile, + hasGpg: boolean = profile.hasGpgKeys, + orgs: string[] = profile.orgs, +): [number, ContributorThreat[]] { + const threats: ContributorThreat[] = []; + const age = profile.accountAgeDays; + let score = + age == null ? 30 : age < 30 ? 10 : age < 90 ? 25 : age < 365 ? 40 : age < 1095 ? 65 : 85; + + if (profile.email) { + const domain = profile.email.includes("@") + ? profile.email.split("@").at(-1)?.toLowerCase() + : undefined; + score += domain && !FREE_EMAIL_DOMAINS.has(domain) ? 10 : 5; + } else { + score -= 5; + } + + if (hasGpg) score += 10; + score += Math.min(orgs.length * 10, 20); + if (profile.company) score += 5; + + if (profile.followers >= 1000) { + score += 10; + } else if (profile.followers >= 100) { + score += 5; + } else if (profile.followers >= 10) { + score += 3; + } + + if ((profile.totalContributions ?? 0) >= 5000) { + score += 10; + } else if ((profile.totalContributions ?? 0) >= 1000) { + score += 7; + } else if ((profile.totalContributions ?? 0) >= 100) { + score += 3; + } + + if (profile.blog) score += 3; + if (profile.xUsername) score += 3; + if (profile.bio) score += 2; + + if (age != null && age < 30 && !hasGpg && orgs.length === 0) { + threats.push({ + type: "malicious_new_account", + severity: "high", + detail: `Account is ${age} days old with no GPG keys and no org memberships`, + }); + } + + return [clampScore(score), threats]; +} + +export function scoreContributorBehavior( + summary: ActivitySummary, + accountAgeDays?: number, + now: Date = new Date(), +): [number, ContributorThreat[]] { + const threats: ContributorThreat[] = []; + let score = 80; + const age = accountAgeDays ?? 0; + + if (age > 180 && summary.totalEvents > 20 && summary.totalEvents < 90 && summary.oldestEventAt) { + const eventSpanDays = daysBetween(now, summary.oldestEventAt); + if (eventSpanDays <= 7) { + score -= 25; + threats.push({ + type: "sleeper_account", + severity: "medium", + detail: `Account is ${age} days old but all ${summary.totalEvents} events are within the last ${eventSpanDays} day(s)`, + }); + } + } + + if (summary.distinctRepos7d > 10) score -= 20; + + if (summary.lastEventAt) { + const daysSince = daysBetween(now, summary.lastEventAt); + if (daysSince <= 30) { + score += 10; + } else if (daysSince <= 90) { + score += 5; + } + } + + if (summary.hasForkOnlyActivity && age < 90) score -= 10; + + const prSprayThreshold = age > 365 && summary.totalEvents >= 50 ? 15 : age > 180 ? 10 : 5; + if (summary.prOpened24h >= prSprayThreshold && summary.prTargetRepos7d >= 4) { + score -= 20; + threats.push({ + type: "pr_spray", + severity: "medium", + detail: `${summary.prOpened24h} PRs opened in the last 24 hours across ${summary.prTargetRepos7d} repositories`, + }); + } + + if (summary.prTargetRepos7d >= 8) score -= 15; + + if (summary.unsolicitedPrRatio > 0.8 && summary.prOpenedCount >= 3) { + score -= 25; + threats.push({ + type: "unsolicited_pr_pattern", + severity: "high", + detail: `${Math.round(summary.unsolicitedPrRatio * 100)}% of PRs target repos with zero prior commit history (${summary.unsolicitedPrRepoCount} of ${summary.prTargetRepos.length} repos)`, + }); + } else if (summary.unsolicitedPrRatio > 0.5 && summary.prOpenedCount >= 5) { + score -= 15; + } + + if (summary.prRejectedRepos >= 3) { + score -= 20; + threats.push({ + type: "pr_rejected_across_repos", + severity: "medium", + detail: `PRs closed without merge in ${summary.prRejectedRepos} distinct repos`, + }); + } + + if ( + summary.prClosedNotMerged >= 5 && + summary.prMergedCount === 0 && + summary.unsolicitedPrRatio > 0 + ) { + score -= 15; + threats.push({ + type: "low_merge_rate", + severity: "low", + detail: `${summary.prClosedNotMerged} PRs closed without merge, 0 merged`, + }); + } + + return [clampScore(score), threats]; +} + +export function scoreContributorContent( + summary: ActivitySummary, + unsolicitedPrRatio: number, +): [number, ContributorThreat[]] { + const threats: ContributorThreat[] = []; + let score = 100; + + if (summary.recentPrs.length === 0) { + return [score, threats]; + } + + if (summary.prOpenedCount >= 3) { + const emptyCount = summary.recentPrs.filter((pr) => pr.bodyLen < 20).length; + if (emptyCount / summary.recentPrs.length > 0.6) { + score -= 20; + } + } + + if (summary.prOpenedCount >= 5 && !summary.recentPrs.some((pr) => pr.hasIssueRef)) { + score -= 15; + threats.push({ + type: "no_issue_linkage", + severity: "low", + detail: `None of ${summary.prOpenedCount} recent PRs reference an issue`, + }); + } + + if (unsolicitedPrRatio > 0.5) { + const lowEffortCount = summary.recentPrs.filter((pr) => pr.bodyLen < 20).length; + if (lowEffortCount >= 3) { + score -= 25; + threats.push({ + type: "low_effort_pr", + severity: "medium", + detail: `${lowEffortCount} PRs with minimal descriptions sent to unfamiliar repos`, + }); + } + } + + return [clampScore(score), threats]; +} + +export function scoreContributor( + profile: UserProfile, + activity: ActivitySummary, + now: Date = new Date(), +): ContributorScoreResult { + const [identity, identityThreats] = scoreContributorIdentity( + profile, + profile.hasGpgKeys, + profile.orgs, + ); + const [behavior, behaviorThreats] = scoreContributorBehavior( + activity, + profile.accountAgeDays, + now, + ); + const [content, contentThreats] = scoreContributorContent( + activity, + activity.unsolicitedPrRatio, + ); + const dimensions = { identity, behavior, content }; + const score = compositeScore(dimensions); + const confidence = capConfidenceForContributor(deriveConfidence(dimensions, score)); + + return { + dimensions, + score, + verdict: verdictForScore(score), + confidence, + threats: [...identityThreats, ...behaviorThreats, ...contentThreats], + }; +} + +export function toContributorResult( + login: string, + scoreResult: ContributorScoreResult, +): ContributorResult { + return { + name: login, + score: scoreResult.score, + verdict: scoreResult.verdict, + confidence: scoreResult.confidence, + threats: scoreResult.threats, + sub_scores: scoreResult.dimensions, + }; +} + +export function compositeScore(dimensions: ContributorDimensions): number { + return clampScore( + Math.round(dimensions.identity * 0.35 + dimensions.behavior * 0.4 + dimensions.content * 0.25), + ); +} + +export function verdictForScore(score: number): string { + if (score >= 80) return "safe"; + if (score >= 50) return "caution"; + if (score >= 20) return "suspicious"; + return "dangerous"; +} + +export function deriveConfidence(dimensions: ContributorDimensions, score: number): string { + if ( + score >= 85 && + dimensions.identity >= 75 && + dimensions.behavior >= 70 && + dimensions.content >= 75 + ) { + return "high"; + } + return score >= 45 ? "medium" : "low"; +} + +function capConfidenceForContributor(confidence: string): string { + return confidence === "high" ? "medium" : confidence; +} + +function clampScore(score: number): number { + return Math.min(100, Math.max(0, score)); +} + +function daysBetween(later: Date, earlier: Date): number { + return Math.max(0, Math.floor((later.getTime() - earlier.getTime()) / MS_PER_DAY)); +} diff --git a/src/lib/db.ts b/src/lib/db.ts index 66d9ff7..2dcde2a 100644 --- a/src/lib/db.ts +++ b/src/lib/db.ts @@ -34,6 +34,12 @@ db.exec(` added_at TEXT NOT NULL DEFAULT (datetime('now')), UNIQUE(installation_id, repo_full_name) ); + + CREATE TABLE IF NOT EXISTS contributor_scans ( + login TEXT PRIMARY KEY, + result_json TEXT NOT NULL, + scanned_at TEXT NOT NULL + ); `); logger.info({ path: env.dbPath }, "Database initialized"); @@ -98,6 +104,20 @@ export const queries: Record = { SUM(CASE WHEN active = 0 THEN 1 ELSE 0 END) as removed FROM installations `), + + getContributorScan: db.prepare(` + SELECT result_json as resultJson, scanned_at as scannedAt + FROM contributor_scans + WHERE login = @login + `), + + upsertContributorScan: db.prepare(` + INSERT INTO contributor_scans (login, result_json, scanned_at) + VALUES (@login, @resultJson, @scannedAt) + ON CONFLICT(login) DO UPDATE SET + result_json = @resultJson, + scanned_at = @scannedAt + `), }; export function saveInstallation( diff --git a/src/lib/types.ts b/src/lib/types.ts index 88c6122..b5f3461 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -55,7 +55,6 @@ export interface ContributorResult { identity?: number; behavior?: number; content?: number; - graph?: number; }; } diff --git a/src/services/__tests__/comments.test.ts b/src/services/__tests__/comments.test.ts index 70740e5..1f09b91 100644 --- a/src/services/__tests__/comments.test.ts +++ b/src/services/__tests__/comments.test.ts @@ -55,7 +55,6 @@ describe("renderContributorTrustComment", () => { identity: 20, behavior: 30, content: 40, - graph: 10, }, }; const body = renderContributorTrustComment(result); @@ -72,7 +71,7 @@ describe("renderContributorTrustComment", () => { expect(body).toContain("| Identity | 20 |"); expect(body).toContain("| Behavior | 30 |"); expect(body).toContain("| Content | 40 |"); - expect(body).toContain("| Graph | 10 |"); + expect(body).not.toContain("| Graph |"); expect(body).toContain("[Full profile](https://brin.sh/contributor/sketchy-user?details=true)"); }); @@ -97,7 +96,7 @@ describe("renderContributorTrustComment", () => { const body = renderContributorTrustComment(result); expect(body).toContain("| Identity | \u2014 |"); - expect(body).toContain("| Graph | \u2014 |"); + expect(body).not.toContain("| Graph |"); }); it("omits full profile link when url is missing", () => { diff --git a/src/services/__tests__/contributorTrust.test.ts b/src/services/__tests__/contributorTrust.test.ts new file mode 100644 index 0000000..6ee3003 --- /dev/null +++ b/src/services/__tests__/contributorTrust.test.ts @@ -0,0 +1,146 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { CHECK_NAMES, DEFAULT_CONFIG } from "../../lib/types.js"; +import { scanContributorLocally } from "../contributorScanner.js"; +import { runContributorTrust } from "../contributorTrust.js"; + +vi.mock("../contributorScanner.js", () => ({ + scanContributorLocally: vi.fn(), +})); + +vi.mock("../../lib/logger.js", () => ({ + childLogger: () => ({ + error: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + }), +})); + +const scanContributorLocallyMock = vi.mocked(scanContributorLocally); + +describe("runContributorTrust", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("uses the local scanner result to flag contributors", async () => { + scanContributorLocallyMock.mockResolvedValue({ + name: "sketchy-user", + score: 20, + verdict: "suspicious", + confidence: "low", + threats: [ + { + type: "malicious_new_account", + severity: "high", + detail: "Account is new", + }, + ], + sub_scores: { identity: 10, behavior: 50, content: 40 }, + }); + const octokit = mockOctokit(); + + await runContributorTrust(octokit, { + owner: "acme", + repo: "repo", + prNumber: 12, + headSha: "abc123", + authorLogin: "sketchy-user", + config: DEFAULT_CONFIG, + }); + + expect(scanContributorLocallyMock).toHaveBeenCalledWith("sketchy-user", { + githubToken: "ghs_installation_token", + }); + expect(octokit.rest.checks.create).toHaveBeenCalledWith( + expect.objectContaining({ + name: CHECK_NAMES.CONTRIBUTOR_TRUST, + head_sha: "abc123", + status: "in_progress", + }), + ); + expect(octokit.rest.checks.update).toHaveBeenCalledWith( + expect.objectContaining({ + check_run_id: 42, + conclusion: "failure", + output: expect.objectContaining({ + title: "Contributor flagged for review", + summary: "Score: 20/100 \u00b7 Verdict: suspicious", + }), + }), + ); + expect(octokit.rest.issues.setLabels).toHaveBeenCalledWith( + expect.objectContaining({ + issue_number: 12, + labels: ["keep", "contributor:flagged"], + }), + ); + expect(octokit.rest.issues.createComment).toHaveBeenCalledWith( + expect.objectContaining({ + body: expect.stringContaining("Contributor Trust Check"), + }), + ); + const commentBody = vi.mocked(octokit.rest.issues.createComment).mock.calls[0]?.[0].body; + expect(commentBody).toContain("| Identity | 10 |"); + expect(commentBody).not.toContain("| Graph |"); + }); + + it("uses the local scanner result to verify contributors", async () => { + scanContributorLocallyMock.mockResolvedValue({ + name: "trusted-user", + score: 90, + verdict: "safe", + confidence: "medium", + sub_scores: { identity: 90, behavior: 90, content: 90 }, + }); + const octokit = mockOctokit([{ id: 99, body: " old comment" }]); + + await runContributorTrust(octokit, { + owner: "acme", + repo: "repo", + prNumber: 12, + headSha: "abc123", + authorLogin: "trusted-user", + config: DEFAULT_CONFIG, + }); + + expect(octokit.rest.checks.update).toHaveBeenCalledWith( + expect.objectContaining({ + conclusion: "success", + output: expect.objectContaining({ + title: "Contributor verified", + summary: "Score: 90/100 \u00b7 Verdict: safe", + }), + }), + ); + expect(octokit.rest.issues.setLabels).toHaveBeenCalledWith( + expect.objectContaining({ labels: ["keep", "contributor:verified"] }), + ); + expect(octokit.rest.issues.deleteComment).toHaveBeenCalledWith( + expect.objectContaining({ comment_id: 99 }), + ); + }); +}); + +function mockOctokit(comments: Array<{ id: number; body: string }> = []) { + return { + auth: vi.fn().mockResolvedValue({ token: "ghs_installation_token" }), + paginate: vi.fn().mockResolvedValue(comments), + rest: { + checks: { + create: vi.fn().mockResolvedValue({ data: { id: 42 } }), + update: vi.fn().mockResolvedValue({}), + }, + issues: { + getLabel: vi.fn().mockRejectedValue({ status: 404 }), + createLabel: vi.fn().mockResolvedValue({}), + updateLabel: vi.fn().mockResolvedValue({}), + listLabelsOnIssue: vi.fn().mockResolvedValue({ data: [{ name: "keep" }] }), + setLabels: vi.fn().mockResolvedValue({}), + listComments: vi.fn(), + updateComment: vi.fn().mockResolvedValue({}), + createComment: vi.fn().mockResolvedValue({}), + deleteComment: vi.fn().mockResolvedValue({}), + }, + }, + } as any; +} diff --git a/src/services/__tests__/githubContributor.test.ts b/src/services/__tests__/githubContributor.test.ts new file mode 100644 index 0000000..89212b3 --- /dev/null +++ b/src/services/__tests__/githubContributor.test.ts @@ -0,0 +1,225 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { + batchCheckCommitHistory, + collectContributorSignals, + fetchActivitySummary, + fetchUserProfile, + fetchUserOrgs, +} from "../githubContributor.js"; + +const NOW = new Date("2026-01-15T12:00:00.000Z"); + +describe("githubContributor", () => { + beforeEach(() => { + vi.unstubAllGlobals(); + }); + + it("fetches a user profile through GitHub GraphQL", async () => { + const fetchMock = vi.fn().mockResolvedValue( + jsonResponse({ + data: { + user: { + id: "U_1", + login: "octocat", + createdAt: "2020-01-01T00:00:00Z", + repositories: { totalCount: 8 }, + followers: { totalCount: 42 }, + company: "GitHub", + email: "octo@github.com", + websiteUrl: "https://github.blog", + bio: "Mona", + twitterUsername: "octocat", + contributionsCollection: { + contributionCalendar: { totalContributions: 100 }, + }, + repositoriesContributedTo: { nodes: [{ nameWithOwner: "github/docs" }] }, + publicKeys: { totalCount: 1 }, + }, + }, + }), + ); + vi.stubGlobal("fetch", fetchMock); + + const profile = await fetchUserProfile("octocat", "ghs_token", NOW); + + expect(profile).toMatchObject({ + nodeId: "U_1", + login: "octocat", + publicRepos: 8, + followers: 42, + company: "GitHub", + email: "octo@github.com", + orgs: [], + reposContributedTo: ["github/docs"], + hasGpgKeys: true, + }); + expect(profile.accountAgeDays).toBeGreaterThan(2000); + expect(fetchMock).toHaveBeenCalledWith( + "https://api.github.com/graphql", + expect.objectContaining({ + method: "POST", + headers: expect.objectContaining({ authorization: "Bearer ghs_token" }), + }), + ); + }); + + it("fetches public orgs through REST separately from the profile query", async () => { + const fetchMock = vi.fn().mockResolvedValue(jsonResponse([{ login: "github" }])); + vi.stubGlobal("fetch", fetchMock); + + await expect(fetchUserOrgs("octocat", "ghs_token")).resolves.toEqual(["github"]); + expect(fetchMock).toHaveBeenCalledWith( + "https://api.github.com/users/octocat/orgs", + expect.objectContaining({ + headers: expect.objectContaining({ authorization: "Bearer ghs_token" }), + }), + ); + }); + + it("parses public activity and batches PR detail lookup", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse([ + event("PushEvent", "owner/repo", "2026-01-14T12:00:00Z"), + prEvent("opened", "target/repo", 7, false, "2026-01-15T06:00:00Z"), + prEvent("closed", "target2/repo", 8, false, "2026-01-12T00:00:00Z"), + event("PullRequestReviewEvent", "review/repo", "2026-01-10T00:00:00Z"), + ]), + ) + .mockResolvedValueOnce( + jsonResponse({ + data: { + pr0: { + pullRequest: { + title: "Fix docs", + body: "Fixes #123", + }, + }, + }, + }), + ); + vi.stubGlobal("fetch", fetchMock); + + const activity = await fetchActivitySummary("octocat", "ghs_token", NOW); + + expect(activity.totalEvents).toBe(4); + expect(activity.pushCount).toBe(1); + expect(activity.prOpenedCount).toBe(1); + expect(activity.prOpened24h).toBe(1); + expect(activity.prClosedNotMerged).toBe(1); + expect(activity.prRejectedRepos).toBe(1); + expect(activity.prTargetRepos).toEqual(["target/repo"]); + expect(activity.recentPrs).toEqual([ + { title: "Fix docs", bodyLen: 10, hasIssueRef: true, repo: "target/repo" }, + ]); + }); + + it("checks commit history with one GraphQL batch", async () => { + const fetchMock = vi.fn().mockResolvedValue( + jsonResponse({ + data: { + repo0: { defaultBranchRef: { target: { history: { totalCount: 0 } } } }, + repo1: { defaultBranchRef: { target: { history: { totalCount: 4 } } } }, + }, + }), + ); + vi.stubGlobal("fetch", fetchMock); + + const counts = await batchCheckCommitHistory("U_1", [["a", "one"], ["b", "two"]], "token"); + + expect(counts.get("a/one")).toBe(0); + expect(counts.get("b/two")).toBe(4); + }); + + it("collects profile, activity, commits, and unsolicited PR ratio", async () => { + const fetchMock = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => { + const url = input.toString(); + const body = typeof init?.body === "string" ? init.body : ""; + if (url.endsWith("/graphql") && body.includes("user(login")) { + return jsonResponse({ + data: { + user: { + id: "U_1", + login: "octocat", + createdAt: "2020-01-01T00:00:00Z", + repositories: { totalCount: 1 }, + followers: { totalCount: 1 }, + contributionsCollection: { + contributionCalendar: { totalContributions: 10 }, + }, + repositoriesContributedTo: { nodes: [] }, + publicKeys: { totalCount: 0 }, + }, + }, + }); + } + if (url.includes("/search/commits")) { + return jsonResponse({ total_count: 1234 }); + } + if (url.includes("/users/octocat/orgs")) { + return jsonResponse([{ login: "github" }]); + } + if (url.includes("/events/public")) { + return jsonResponse([prEvent("opened", "target/repo", 7, false, "2026-01-15T06:00:00Z")]); + } + if (url.endsWith("/graphql") && body.includes("pullRequest")) { + return jsonResponse({ + data: { pr0: { pullRequest: { title: "Fix typo", body: "" } } }, + }); + } + if (url.endsWith("/graphql") && body.includes("history(author")) { + return jsonResponse({ + data: { repo0: { defaultBranchRef: { target: { history: { totalCount: 0 } } } } }, + }); + } + throw new Error(`unexpected fetch: ${url}`); + }); + vi.stubGlobal("fetch", fetchMock); + + const signals = await collectContributorSignals("octocat", { githubToken: "token" }, NOW); + + expect(signals.profile?.totalContributions).toBe(1234); + expect(signals.profile?.orgs).toEqual(["github"]); + expect(signals.activity.unsolicitedPrRatio).toBe(1); + expect(signals.activity.unsolicitedPrRepoCount).toBe(1); + }); +}); + +function jsonResponse(body: unknown): Response { + return new Response(JSON.stringify(body), { + status: 200, + headers: { "content-type": "application/json" }, + }); +} + +function event(type: string, repo: string, createdAt: string) { + return { + type, + created_at: createdAt, + repo: { name: repo }, + payload: {}, + }; +} + +function prEvent( + action: string, + repo: string, + number: number, + merged: boolean, + createdAt: string, +) { + return { + type: "PullRequestEvent", + created_at: createdAt, + repo: { name: repo }, + payload: { + action, + pull_request: { + number, + merged, + base: { repo: { full_name: repo } }, + }, + }, + }; +} diff --git a/src/services/comments.ts b/src/services/comments.ts index 1705b89..793ae7a 100644 --- a/src/services/comments.ts +++ b/src/services/comments.ts @@ -116,7 +116,7 @@ export function renderContributorTrustComment( body += `| Identity | ${fmt(sub.identity)} | Account age, contribution history, GPG keys, org memberships |\n`; body += `| Behavior | ${fmt(sub.behavior)} | PR patterns, unsolicited contribution ratio, activity cadence |\n`; body += `| Content | ${fmt(sub.content)} | PR body substance, issue linkage, contribution quality |\n`; - body += `| Graph | ${fmt(sub.graph)} | Cross-repo trust, co-contributor relationships |\n\n`; + body += `\n`; body += `\n\n`; body += `Analyzed by [Brin](https://brin.sh)`; diff --git a/src/services/contributorScanCache.ts b/src/services/contributorScanCache.ts new file mode 100644 index 0000000..44c64df --- /dev/null +++ b/src/services/contributorScanCache.ts @@ -0,0 +1,44 @@ +import type { ContributorResult } from "../lib/types.js"; +import { queries } from "../lib/db.js"; +import { childLogger } from "../lib/logger.js"; + +export const CONTRIBUTOR_SCAN_CACHE_TTL_MS = 10 * 60 * 1000; + +const log = childLogger({ service: "contributor-scan-cache" }); + +interface CachedContributorScanRow { + resultJson: string; + scannedAt: string; +} + +export function getCachedContributorScan( + login: string, + maxAgeMs: number = CONTRIBUTOR_SCAN_CACHE_TTL_MS, +): ContributorResult | undefined { + try { + const row = queries.getContributorScan.get({ login }) as CachedContributorScanRow | undefined; + if (!row) return undefined; + + const scannedAt = Date.parse(row.scannedAt); + if (!Number.isFinite(scannedAt) || Date.now() - scannedAt > maxAgeMs) { + return undefined; + } + + return JSON.parse(row.resultJson) as ContributorResult; + } catch (err) { + log.warn({ err, login }, "Contributor scan cache read failed"); + return undefined; + } +} + +export function saveCachedContributorScan(login: string, result: ContributorResult): void { + try { + queries.upsertContributorScan.run({ + login, + resultJson: JSON.stringify(result), + scannedAt: new Date().toISOString(), + }); + } catch (err) { + log.warn({ err, login }, "Contributor scan cache write failed"); + } +} diff --git a/src/services/contributorScanner.ts b/src/services/contributorScanner.ts new file mode 100644 index 0000000..1878d09 --- /dev/null +++ b/src/services/contributorScanner.ts @@ -0,0 +1,65 @@ +import { + compositeScore, + deriveConfidence, + scoreContributor, + toContributorResult, + verdictForScore, + type ContributorDimensions, + type ContributorScoreResult, +} from "../lib/contributorScoring.js"; +import type { ContributorResult } from "../lib/types.js"; +import { childLogger } from "../lib/logger.js"; +import { collectContributorSignals } from "./githubContributor.js"; +import { + getCachedContributorScan, + saveCachedContributorScan, +} from "./contributorScanCache.js"; + +export async function scanContributorLocally( + login: string, + options: { githubToken?: string } = {}, +): Promise { + const log = childLogger({ service: "contributor-scanner", login }); + const cached = getCachedContributorScan(login); + if (cached) { + log.info({ score: cached.score, verdict: cached.verdict }, "Contributor scan cache hit"); + return cached; + } + + try { + const { profile, activity } = await collectContributorSignals(login, options); + const scoreResult = profile + ? scoreContributor(profile, activity) + : scoreContributorWithMissingProfile(); + const result = toContributorResult(login, scoreResult); + + saveCachedContributorScan(login, result); + log.info({ score: result.score, verdict: result.verdict }, "Contributor scan completed"); + return result; + } catch (err) { + log.error({ err }, "Local contributor scan failed"); + return {}; + } +} + +function scoreContributorWithMissingProfile(): ContributorScoreResult { + const dimensions: ContributorDimensions = { + identity: 30, + behavior: 80, + content: 100, + }; + const score = compositeScore(dimensions); + const confidence = capConfidenceForContributor(deriveConfidence(dimensions, score)); + + return { + dimensions, + score, + verdict: verdictForScore(score), + confidence, + threats: [], + }; +} + +function capConfidenceForContributor(confidence: string): string { + return confidence === "high" ? "medium" : confidence; +} diff --git a/src/services/contributorTrust.ts b/src/services/contributorTrust.ts index 3874fa8..bf99441 100644 --- a/src/services/contributorTrust.ts +++ b/src/services/contributorTrust.ts @@ -1,8 +1,8 @@ import type { Octokit } from "octokit"; import type { RepoConfig } from "../lib/types.js"; import { CHECK_NAMES, MARKERS, LABEL_DEFS } from "../lib/types.js"; -import { scanContributor } from "../lib/brinApi.js"; import { evaluateContributor } from "../lib/policy.js"; +import { scanContributorLocally } from "./contributorScanner.js"; import { createInProgressCheck, completeCheck } from "./checkRuns.js"; import { upsertComment, @@ -55,7 +55,7 @@ export async function runContributorTrust( ); const githubToken = await getGitHubToken(octokit); - const result = await scanContributor(authorLogin, { githubToken }); + const result = await scanContributorLocally(authorLogin, { githubToken }); const { isSafe } = evaluateContributor(result, config); log.info( diff --git a/src/services/githubContributor.ts b/src/services/githubContributor.ts new file mode 100644 index 0000000..9055194 --- /dev/null +++ b/src/services/githubContributor.ts @@ -0,0 +1,497 @@ +import { + emptyActivitySummary, + type ActivitySummary, + type PrSummary, + type UserProfile, +} from "../lib/contributorScoring.js"; + +const GITHUB_API = "https://api.github.com"; +const GITHUB_API_VERSION = "2022-11-28"; +const USER_AGENT = "brin-github/0.1"; +const REQUEST_TIMEOUT_MS = 30_000; +const ISSUE_REF_RE = /#\d+/; + +export interface ContributorSignals { + profile?: UserProfile; + activity: ActivitySummary; +} + +export async function collectContributorSignals( + login: string, + options: { githubToken?: string } = {}, + now: Date = new Date(), +): Promise { + const profileResult = await Promise.allSettled([ + fetchUserProfile(login, options.githubToken, now), + fetchUserOrgs(login, options.githubToken), + fetchTotalCommitCount(login, options.githubToken), + fetchActivitySummary(login, options.githubToken, now), + ]); + + const [profileSettled, orgsSettled, commitCountSettled, activitySettled] = profileResult; + if (profileSettled.status === "rejected") { + return { activity: emptyActivitySummary() }; + } + + const profile = profileSettled.value; + if (orgsSettled.status === "fulfilled") { + profile.orgs = orgsSettled.value; + } + if (commitCountSettled.status === "fulfilled" && commitCountSettled.value != null) { + profile.totalContributions = commitCountSettled.value; + } + + let activity = + activitySettled.status === "fulfilled" ? activitySettled.value : emptyActivitySummary(); + for (const repo of activity.repos) { + if (!profile.reposContributedTo.includes(repo)) { + profile.reposContributedTo.push(repo); + } + } + + if (profile.nodeId && activity.prTargetRepos.length > 0) { + const repos = activity.prTargetRepos + .slice(0, 10) + .map(parseRepoIdentifier) + .filter((repo): repo is [string, string] => repo != null); + const commitCounts = await batchCheckCommitHistory(profile.nodeId, repos, options.githubToken); + if (repos.length > 0) { + const unsolicitedCount = repos.filter(([owner, repo]) => { + return (commitCounts.get(`${owner}/${repo}`) ?? 0) === 0; + }).length; + activity = { + ...activity, + unsolicitedPrRatio: unsolicitedCount / repos.length, + unsolicitedPrRepoCount: unsolicitedCount, + }; + } + } + + return { profile, activity }; +} + +export async function fetchUserProfile( + login: string, + githubToken?: string, + now: Date = new Date(), +): Promise { + const query = ` + query($login: String!) { + user(login: $login) { + id + login + createdAt + repositories(privacy: PUBLIC) { totalCount } + followers { totalCount } + company + email + websiteUrl + bio + twitterUsername + contributionsCollection { + contributionCalendar { totalContributions } + } + repositoriesContributedTo(first: 10, contributionTypes: COMMIT) { + nodes { nameWithOwner } + } + publicKeys(first: 1) { totalCount } + } + } + `; + const data = await graphqlQuery(query, { login }, githubToken); + if (!data.user) { + throw new Error(`User ${login} not found`); + } + + const user = data.user; + const createdAt = parseDate(user.createdAt); + const accountAgeDays = createdAt ? daysBetween(now, createdAt) : undefined; + + return { + nodeId: user.id, + login: user.login ?? login, + accountAgeDays, + publicRepos: user.repositories?.totalCount ?? 0, + followers: user.followers?.totalCount ?? 0, + company: optionalString(user.company), + email: optionalString(user.email), + blog: optionalString(user.websiteUrl), + bio: optionalString(user.bio), + xUsername: optionalString(user.twitterUsername), + totalContributions: + user.contributionsCollection?.contributionCalendar?.totalContributions, + reposContributedTo: + user.repositoriesContributedTo?.nodes + ?.map((node) => node?.nameWithOwner) + .filter((repo): repo is string => Boolean(repo)) ?? [], + orgs: [], + hasGpgKeys: (user.publicKeys?.totalCount ?? 0) > 0, + }; +} + +export async function fetchUserOrgs(login: string, githubToken?: string): Promise { + const url = `${GITHUB_API}/users/${encodeURIComponent(login)}/orgs`; + const response = await fetch(url, { + headers: githubHeaders(githubToken), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), + }); + if (!response.ok) { + return []; + } + + const orgs = (await response.json()) as Array<{ login?: string }>; + return Array.isArray(orgs) + ? orgs.map((org) => org.login).filter((org): org is string => Boolean(org)) + : []; +} + +export async function fetchActivitySummary( + login: string, + githubToken?: string, + now: Date = new Date(), +): Promise { + const url = `${GITHUB_API}/users/${encodeURIComponent(login)}/events/public?per_page=100`; + const response = await fetch(url, { + headers: githubHeaders(githubToken), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), + }); + if (!response.ok) { + return emptyActivitySummary(); + } + + const events = (await response.json()) as GitHubEvent[]; + if (!Array.isArray(events) || events.length === 0) { + return emptyActivitySummary(); + } + + const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000); + const twentyFourHoursAgo = new Date(now.getTime() - 24 * 60 * 60 * 1000); + const repos: string[] = []; + const repos7d = new Set(); + const prTargetRepos: string[] = []; + const prTargetRepos7d = new Set(); + const prRejectedRepos = new Set(); + const prRefs: Array<[string, number]> = []; + + let pushCount = 0; + let prCount = 0; + let reviewCount = 0; + let prOpenedCount = 0; + let prOpened24h = 0; + let prClosedNotMerged = 0; + let prMergedCount = 0; + let lastEventAt: Date | undefined; + let oldestEventAt: Date | undefined; + let hasUpstreamPr = false; + + for (const event of events) { + const createdAt = parseDate(event.created_at); + if (createdAt) { + if (!lastEventAt || createdAt > lastEventAt) lastEventAt = createdAt; + if (!oldestEventAt || createdAt < oldestEventAt) oldestEventAt = createdAt; + } + + const repoName = event.repo?.name; + if (event.type === "PushEvent") { + pushCount += 1; + addRepoSignals(repoName, createdAt, repos, repos7d, sevenDaysAgo); + continue; + } + + if (event.type === "PullRequestEvent") { + prCount += 1; + hasUpstreamPr = true; + addRepoSignals(repoName, createdAt, repos, repos7d, sevenDaysAgo); + + const payload = event.payload ?? {}; + const action = typeof payload.action === "string" ? payload.action : ""; + const pr = payload.pull_request ?? {}; + const baseRepo = + optionalString(pr.base?.repo?.full_name) ?? optionalString(repoName) ?? ""; + const prNumber = typeof pr.number === "number" ? pr.number : undefined; + + if (action === "opened") { + prOpenedCount += 1; + if (createdAt && createdAt > twentyFourHoursAgo) prOpened24h += 1; + addUnique(prTargetRepos, baseRepo); + if (baseRepo && createdAt && createdAt > sevenDaysAgo) { + prTargetRepos7d.add(baseRepo); + } + if (baseRepo && prNumber != null && prRefs.length < 20) { + prRefs.push([baseRepo, prNumber]); + } + } else if (action === "closed") { + const merged = Boolean(pr.merged); + if (merged) { + prMergedCount += 1; + } else { + prClosedNotMerged += 1; + if (baseRepo) prRejectedRepos.add(baseRepo); + } + } + continue; + } + + if (event.type === "PullRequestReviewEvent") { + reviewCount += 1; + hasUpstreamPr = true; + addRepoSignals(repoName, createdAt, repos, repos7d, sevenDaysAgo); + } + } + + const recentPrs = await batchFetchPrDetails(prRefs, githubToken); + + return { + repos, + totalEvents: events.length, + lastEventAt, + oldestEventAt, + pushCount, + prCount, + reviewCount, + distinctRepos7d: repos7d.size, + hasForkOnlyActivity: pushCount > 0 && !hasUpstreamPr && prCount === 0 && reviewCount === 0, + prOpenedCount, + prTargetRepos7d: prTargetRepos7d.size, + prOpened24h, + prTargetRepos, + prClosedNotMerged, + prMergedCount, + prRejectedRepos: prRejectedRepos.size, + unsolicitedPrRatio: 0, + unsolicitedPrRepoCount: 0, + recentPrs, + }; +} + +export async function fetchTotalCommitCount( + login: string, + githubToken?: string, +): Promise { + const url = new URL(`${GITHUB_API}/search/commits`); + url.searchParams.set("q", `author:${login}`); + url.searchParams.set("per_page", "1"); + const response = await fetch(url, { + headers: githubHeaders(githubToken), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), + }); + if (!response.ok) { + return undefined; + } + const body = (await response.json()) as { total_count?: number }; + return typeof body.total_count === "number" ? body.total_count : undefined; +} + +export async function batchCheckCommitHistory( + userNodeId: string, + repos: Array<[string, string]>, + githubToken?: string, +): Promise> { + const results = new Map(); + if (repos.length === 0) return results; + + const fragments = repos.map(([owner, repo], index) => { + return `repo${index}: repository(owner: ${JSON.stringify(owner)}, name: ${JSON.stringify(repo)}) { + defaultBranchRef { + target { + ... on Commit { + history(author: { id: ${JSON.stringify(userNodeId)} }, first: 0) { + totalCount + } + } + } + } + }`; + }); + + try { + const data = await graphqlQuery>( + `query { ${fragments.join("\n")} }`, + {}, + githubToken, + ); + repos.forEach(([owner, repo], index) => { + const totalCount = + data[`repo${index}`]?.defaultBranchRef?.target?.history?.totalCount ?? 0; + results.set(`${owner}/${repo}`, totalCount); + }); + } catch { + return results; + } + + return results; +} + +export async function batchFetchPrDetails( + prRefs: Array<[string, number]>, + githubToken?: string, +): Promise { + if (prRefs.length === 0) return []; + + const validRefs: Array<{ index: number; repoFull: string }> = []; + const fragments = prRefs.flatMap(([repoFull, number], index) => { + const parsed = parseRepoIdentifier(repoFull); + if (!parsed) return []; + const [owner, repo] = parsed; + validRefs.push({ index, repoFull }); + return `pr${index}: repository(owner: ${JSON.stringify(owner)}, name: ${JSON.stringify(repo)}) { + pullRequest(number: ${number}) { + title + body + } + }`; + }); + + if (fragments.length === 0) return []; + + try { + const data = await graphqlQuery>( + `query { ${fragments.join("\n")} }`, + {}, + githubToken, + ); + return validRefs.map(({ index, repoFull }) => { + const pr = data[`pr${index}`]?.pullRequest; + const title = pr?.title ?? ""; + const body = pr?.body ?? ""; + return { + title, + bodyLen: body.length, + hasIssueRef: ISSUE_REF_RE.test(title) || ISSUE_REF_RE.test(body), + repo: repoFull, + }; + }); + } catch { + return []; + } +} + +export function parseRepoIdentifier(repoFull: string): [string, string] | undefined { + const parts = repoFull.split("/"); + if (parts.length !== 2 || !parts[0] || !parts[1]) return undefined; + return [parts[0], parts[1]]; +} + +async function graphqlQuery( + query: string, + variables: Record, + githubToken?: string, +): Promise { + const response = await fetch(`${GITHUB_API}/graphql`, { + method: "POST", + headers: { + ...githubHeaders(githubToken), + "content-type": "application/json", + }, + body: JSON.stringify({ query, variables }), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), + }); + if (!response.ok) { + throw new Error(`GitHub GraphQL returned ${response.status}`); + } + + const body = (await response.json()) as GqlResponse; + if (body.errors?.length) { + throw new Error(`GitHub GraphQL errors: ${body.errors.map((e) => e.message).join("; ")}`); + } + if (!body.data) { + throw new Error("GitHub GraphQL response missing data"); + } + return body.data; +} + +function githubHeaders(githubToken?: string): HeadersInit { + return { + accept: "application/vnd.github+json", + "user-agent": USER_AGENT, + "x-github-api-version": GITHUB_API_VERSION, + ...(githubToken ? { authorization: `Bearer ${githubToken}` } : {}), + }; +} + +function addRepoSignals( + repoName: string | undefined, + createdAt: Date | undefined, + repos: string[], + repos7d: Set, + sevenDaysAgo: Date, +) { + if (!repoName) return; + addUnique(repos, repoName); + if (createdAt && createdAt > sevenDaysAgo) repos7d.add(repoName); +} + +function addUnique(values: string[], value: string) { + if (value && !values.includes(value)) { + values.push(value); + } +} + +function parseDate(value: unknown): Date | undefined { + if (typeof value !== "string") return undefined; + const millis = Date.parse(value); + return Number.isNaN(millis) ? undefined : new Date(millis); +} + +function optionalString(value: unknown): string | undefined { + return typeof value === "string" && value.length > 0 ? value : undefined; +} + +function daysBetween(later: Date, earlier: Date): number { + return Math.max(0, Math.floor((later.getTime() - earlier.getTime()) / (24 * 60 * 60 * 1000))); +} + +interface GqlResponse { + data?: T; + errors?: Array<{ message: string }>; +} + +interface GqlUserData { + user?: { + id?: string; + login?: string; + createdAt?: string; + repositories?: { totalCount?: number }; + followers?: { totalCount?: number }; + company?: string | null; + email?: string | null; + websiteUrl?: string | null; + bio?: string | null; + twitterUsername?: string | null; + organizations?: { nodes?: Array<{ login?: string } | null> }; + contributionsCollection?: { + contributionCalendar?: { totalContributions?: number }; + }; + repositoriesContributedTo?: { nodes?: Array<{ nameWithOwner?: string } | null> }; + publicKeys?: { totalCount?: number }; + } | null; +} + +interface GitHubEvent { + type?: string; + created_at?: string; + repo?: { name?: string }; + payload?: { + action?: string; + pull_request?: { + number?: number; + merged?: boolean; + base?: { repo?: { full_name?: string } }; + }; + }; +} + +interface CommitHistoryNode { + defaultBranchRef?: { + target?: { + history?: { totalCount?: number }; + }; + } | null; +} + +interface PullRequestDetailNode { + pullRequest?: { + title?: string; + body?: string; + } | null; +}