From 658c0f5b8ef2812c84f325a2843e3556f21e28e4 Mon Sep 17 00:00:00 2001 From: msukkari Date: Thu, 26 Feb 2026 15:06:42 -0800 Subject: [PATCH 1/8] feat(web): add MCP and API key usage tracking to analytics Move audit event creation from client-side to service functions for search, repos, file source, and file tree endpoints. Add source metadata to distinguish MCP requests from other API calls. Extend analytics SQL to include new actions and display MCP request and API request counts on the analytics dashboard. Co-Authored-By: Claude Haiku 4.5 --- .../components/searchBar/searchBar.tsx | 8 ----- .../app/api/(server)/repos/listReposApi.ts | 15 +++++++++- .../web/src/ee/features/analytics/actions.ts | 20 +++++++++---- .../features/analytics/analyticsContent.tsx | 30 ++++++++++++++++--- .../web/src/ee/features/analytics/types.ts | 2 ++ packages/web/src/ee/features/audit/types.ts | 1 + .../web/src/features/git/getFileSourceApi.ts | 15 +++++++++- packages/web/src/features/git/getTreeApi.ts | 15 +++++++++- packages/web/src/features/search/searchApi.ts | 28 +++++++++++++++-- 9 files changed, 111 insertions(+), 23 deletions(-) diff --git a/packages/web/src/app/[domain]/components/searchBar/searchBar.tsx b/packages/web/src/app/[domain]/components/searchBar/searchBar.tsx index dda7ab2ab..fa27b1945 100644 --- a/packages/web/src/app/[domain]/components/searchBar/searchBar.tsx +++ b/packages/web/src/app/[domain]/components/searchBar/searchBar.tsx @@ -42,7 +42,6 @@ import { Separator } from "@/components/ui/separator"; import { Tooltip, TooltipTrigger, TooltipContent } from "@/components/ui/tooltip"; import { Toggle } from "@/components/ui/toggle"; import { useDomain } from "@/hooks/useDomain"; -import { createAuditAction } from "@/ee/features/audit/actions"; import tailwind from "@/tailwind"; import { CaseSensitiveIcon, RegexIcon } from "lucide-react"; @@ -216,13 +215,6 @@ export const SearchBar = ({ setIsSuggestionsEnabled(false); setIsHistorySearchEnabled(false); - createAuditAction({ - action: "user.performed_code_search", - metadata: { - message: query, - }, - }) - const url = createPathWithQueryParams(`/${domain}/search`, [SearchQueryParams.query, query], [SearchQueryParams.isRegexEnabled, isRegexEnabled ? "true" : null], diff --git a/packages/web/src/app/api/(server)/repos/listReposApi.ts b/packages/web/src/app/api/(server)/repos/listReposApi.ts index d5f743cbb..adffe1a00 100644 --- a/packages/web/src/app/api/(server)/repos/listReposApi.ts +++ b/packages/web/src/app/api/(server)/repos/listReposApi.ts @@ -1,11 +1,24 @@ import { sew } from "@/actions"; +import { getAuditService } from "@/ee/features/audit/factory"; import { ListReposQueryParams, RepositoryQuery } from "@/lib/types"; import { withOptionalAuthV2 } from "@/withAuthV2"; import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils"; import { env } from "@sourcebot/shared"; +import { headers } from "next/headers"; export const listRepos = async ({ query, page, perPage, sort, direction }: ListReposQueryParams) => sew(() => - withOptionalAuthV2(async ({ org, prisma }) => { + withOptionalAuthV2(async ({ org, prisma, user }) => { + if (user) { + const source = (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; + getAuditService().createAudit({ + action: 'user.listed_repos', + actor: { id: user.id, type: 'user' }, + target: { id: org.id.toString(), type: 'org' }, + orgId: org.id, + metadata: { source }, + }).catch(() => {}); + } + const skip = (page - 1) * perPage; const orderByField = sort === 'pushed' ? 'pushedAt' : 'name'; const baseUrl = env.AUTH_URL; diff --git a/packages/web/src/ee/features/analytics/actions.ts b/packages/web/src/ee/features/analytics/actions.ts index 4610fd91b..3c353b03b 100644 --- a/packages/web/src/ee/features/analytics/actions.ts +++ b/packages/web/src/ee/features/analytics/actions.ts @@ -27,21 +27,25 @@ export const getAnalytics = async (domain: string, apiKey: string | undefined = date_trunc('week', "timestamp") AS week, date_trunc('month', "timestamp") AS month, action, - "actorId" + "actorId", + metadata FROM "Audit" WHERE "orgId" = ${org.id} AND action IN ( 'user.performed_code_search', 'user.performed_find_references', 'user.performed_goto_definition', - 'user.created_ask_chat' + 'user.created_ask_chat', + 'user.listed_repos', + 'user.fetched_file_source', + 'user.fetched_file_tree' ) ), - + periods AS ( SELECT unnest(array['day', 'week', 'month']) AS period ), - + buckets AS ( SELECT generate_series( @@ -67,7 +71,7 @@ export const getAnalytics = async (domain: string, apiKey: string | undefined = ), 'month' ), - + aggregated AS ( SELECT b.period, @@ -79,6 +83,8 @@ export const getAnalytics = async (domain: string, apiKey: string | undefined = COUNT(*) FILTER (WHERE c.action = 'user.performed_code_search') AS code_searches, COUNT(*) FILTER (WHERE c.action IN ('user.performed_find_references', 'user.performed_goto_definition')) AS navigations, COUNT(*) FILTER (WHERE c.action = 'user.created_ask_chat') AS ask_chats, + COUNT(*) FILTER (WHERE c.metadata->>'source' = 'mcp') AS mcp_requests, + COUNT(*) FILTER (WHERE c.metadata->>'source' IS NOT NULL AND c.metadata->>'source' != 'mcp') AS api_requests, COUNT(DISTINCT c."actorId") AS active_users FROM core c JOIN LATERAL ( @@ -86,13 +92,15 @@ export const getAnalytics = async (domain: string, apiKey: string | undefined = ) b ON true GROUP BY b.period, bucket ) - + SELECT b.period, b.bucket, COALESCE(a.code_searches, 0)::int AS code_searches, COALESCE(a.navigations, 0)::int AS navigations, COALESCE(a.ask_chats, 0)::int AS ask_chats, + COALESCE(a.mcp_requests, 0)::int AS mcp_requests, + COALESCE(a.api_requests, 0)::int AS api_requests, COALESCE(a.active_users, 0)::int AS active_users FROM buckets b LEFT JOIN aggregated a diff --git a/packages/web/src/ee/features/analytics/analyticsContent.tsx b/packages/web/src/ee/features/analytics/analyticsContent.tsx index 093b2c7ec..562c9f888 100644 --- a/packages/web/src/ee/features/analytics/analyticsContent.tsx +++ b/packages/web/src/ee/features/analytics/analyticsContent.tsx @@ -2,7 +2,7 @@ import { ChartTooltip } from "@/components/ui/chart" import { Area, AreaChart, ResponsiveContainer, XAxis, YAxis } from "recharts" -import { Users, LucideIcon, Search, ArrowRight, Activity, Calendar, MessageCircle } from "lucide-react" +import { Users, LucideIcon, Search, ArrowRight, Activity, Calendar, MessageCircle, Wrench, Key } from "lucide-react" import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card" import { ChartContainer } from "@/components/ui/chart" import { useQuery } from "@tanstack/react-query" @@ -28,7 +28,7 @@ interface AnalyticsChartProps { title: string icon: LucideIcon period: "day" | "week" | "month" - dataKey: "code_searches" | "navigations" | "ask_chats" | "active_users" + dataKey: "code_searches" | "navigations" | "ask_chats" | "mcp_requests" | "api_requests" | "active_users" color: string gradientId: string } @@ -175,7 +175,7 @@ function LoadingSkeleton() { {/* Chart skeletons */} - {[1, 2, 3, 4].map((chartIndex) => ( + {[1, 2, 3, 4, 5, 6].map((chartIndex) => (
@@ -217,7 +217,7 @@ export function AnalyticsContent() { dark: "#60a5fa", }, searches: { - light: "#f59e0b", + light: "#f59e0b", dark: "#fbbf24", }, navigations: { @@ -228,6 +228,14 @@ export function AnalyticsContent() { light: "#8b5cf6", dark: "#a78bfa", }, + mcpRequests: { + light: "#10b981", + dark: "#34d399", + }, + apiRequests: { + light: "#14b8a6", + dark: "#2dd4bf", + }, }), []) const getColor = (colorKey: keyof typeof chartColors) => { @@ -289,6 +297,20 @@ export function AnalyticsContent() { dataKey: "ask_chats" as const, gradientId: "askChats", }, + { + title: `${periodLabels[selectedPeriod]} MCP Requests`, + icon: Wrench, + color: getColor("mcpRequests"), + dataKey: "mcp_requests" as const, + gradientId: "mcpRequests", + }, + { + title: `${periodLabels[selectedPeriod]} API Requests`, + icon: Key, + color: getColor("apiRequests"), + dataKey: "api_requests" as const, + gradientId: "apiRequests", + }, ] return ( diff --git a/packages/web/src/ee/features/analytics/types.ts b/packages/web/src/ee/features/analytics/types.ts index c2b573616..67d5b019b 100644 --- a/packages/web/src/ee/features/analytics/types.ts +++ b/packages/web/src/ee/features/analytics/types.ts @@ -6,6 +6,8 @@ export const analyticsResponseSchema = z.array(z.object({ code_searches: z.number(), navigations: z.number(), ask_chats: z.number(), + mcp_requests: z.number(), + api_requests: z.number(), active_users: z.number(), })) export type AnalyticsResponse = z.infer; \ No newline at end of file diff --git a/packages/web/src/ee/features/audit/types.ts b/packages/web/src/ee/features/audit/types.ts index bd19d6bb0..e79b6957f 100644 --- a/packages/web/src/ee/features/audit/types.ts +++ b/packages/web/src/ee/features/audit/types.ts @@ -17,6 +17,7 @@ export const auditMetadataSchema = z.object({ message: z.string().optional(), api_key: z.string().optional(), emails: z.string().optional(), // comma separated list of emails + source: z.string().optional(), // request source (e.g., 'mcp') from X-Sourcebot-Client-Source header }) export type AuditMetadata = z.infer; diff --git a/packages/web/src/features/git/getFileSourceApi.ts b/packages/web/src/features/git/getFileSourceApi.ts index 94492ddf3..f098e654c 100644 --- a/packages/web/src/features/git/getFileSourceApi.ts +++ b/packages/web/src/features/git/getFileSourceApi.ts @@ -1,11 +1,13 @@ import { sew } from '@/actions'; import { getBrowsePath } from '@/app/[domain]/browse/hooks/utils'; +import { getAuditService } from '@/ee/features/audit/factory'; import { SINGLE_TENANT_ORG_DOMAIN } from '@/lib/constants'; import { detectLanguageFromFilename } from '@/lib/languageDetection'; import { ServiceError, notFound, fileNotFound, unexpectedError } from '@/lib/serviceError'; import { getCodeHostBrowseFileAtBranchUrl } from '@/lib/utils'; import { withOptionalAuthV2 } from '@/withAuthV2'; import { getRepoPath } from '@sourcebot/shared'; +import { headers } from 'next/headers'; import simpleGit from 'simple-git'; import z from 'zod'; import { CodeHostType } from '@sourcebot/db'; @@ -30,7 +32,18 @@ export const fileSourceResponseSchema = z.object({ }); export type FileSourceResponse = z.infer; -export const getFileSource = async ({ path: filePath, repo: repoName, ref }: FileSourceRequest): Promise => sew(() => withOptionalAuthV2(async ({ org, prisma }) => { +export const getFileSource = async ({ path: filePath, repo: repoName, ref }: FileSourceRequest): Promise => sew(() => withOptionalAuthV2(async ({ org, prisma, user }) => { + if (user) { + const source = (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; + getAuditService().createAudit({ + action: 'user.fetched_file_source', + actor: { id: user.id, type: 'user' }, + target: { id: org.id.toString(), type: 'org' }, + orgId: org.id, + metadata: { source }, + }).catch(() => {}); + } + const repo = await prisma.repo.findFirst({ where: { name: repoName, orgId: org.id }, }); diff --git a/packages/web/src/features/git/getTreeApi.ts b/packages/web/src/features/git/getTreeApi.ts index a4af9acb7..3136b5586 100644 --- a/packages/web/src/features/git/getTreeApi.ts +++ b/packages/web/src/features/git/getTreeApi.ts @@ -1,7 +1,9 @@ import { sew } from '@/actions'; +import { getAuditService } from '@/ee/features/audit/factory'; import { notFound, ServiceError, unexpectedError } from '@/lib/serviceError'; import { withOptionalAuthV2 } from "@/withAuthV2"; import { getRepoPath } from '@sourcebot/shared'; +import { headers } from 'next/headers'; import simpleGit from 'simple-git'; import z from 'zod'; import { fileTreeNodeSchema } from './types'; @@ -25,7 +27,18 @@ export type GetTreeResponse = z.infer; * into a single tree. */ export const getTree = async ({ repoName, revisionName, paths }: GetTreeRequest): Promise => sew(() => - withOptionalAuthV2(async ({ org, prisma }) => { + withOptionalAuthV2(async ({ org, prisma, user }) => { + if (user) { + const source = (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; + getAuditService().createAudit({ + action: 'user.fetched_file_tree', + actor: { id: user.id, type: 'user' }, + target: { id: org.id.toString(), type: 'org' }, + orgId: org.id, + metadata: { source }, + }).catch(() => {}); + } + const repo = await prisma.repo.findFirst({ where: { name: repoName, diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index ff2fb0da7..01cf33ec8 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -1,8 +1,10 @@ import { sew } from "@/actions"; +import { getAuditService } from "@/ee/features/audit/factory"; import { getRepoPermissionFilterForUser } from "@/prisma"; import { withOptionalAuthV2 } from "@/withAuthV2"; import { PrismaClient, UserWithAccounts } from "@sourcebot/db"; import { env, hasEntitlement } from "@sourcebot/shared"; +import { headers } from "next/headers"; import { QueryIR } from './ir'; import { parseQuerySyntaxIntoIR } from './parser'; import { SearchOptions } from "./types"; @@ -25,7 +27,18 @@ type QueryIRSearchRequest = { type SearchRequest = QueryStringSearchRequest | QueryIRSearchRequest; export const search = (request: SearchRequest) => sew(() => - withOptionalAuthV2(async ({ prisma, user }) => { + withOptionalAuthV2(async ({ prisma, user, org }) => { + if (user) { + const source = (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; + getAuditService().createAudit({ + action: 'user.performed_code_search', + actor: { id: user.id, type: 'user' }, + target: { id: org.id.toString(), type: 'org' }, + orgId: org.id, + metadata: { source }, + }).catch(() => {}); + } + const repoSearchScope = await getAccessibleRepoNamesForUser({ user, prisma }); // If needed, parse the query syntax into the query intermediate representation. @@ -45,7 +58,18 @@ export const search = (request: SearchRequest) => sew(() => })); export const streamSearch = (request: SearchRequest) => sew(() => - withOptionalAuthV2(async ({ prisma, user }) => { + withOptionalAuthV2(async ({ prisma, user, org }) => { + if (user) { + const source = (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; + getAuditService().createAudit({ + action: 'user.performed_code_search', + actor: { id: user.id, type: 'user' }, + target: { id: org.id.toString(), type: 'org' }, + orgId: org.id, + metadata: { source }, + }).catch(() => {}); + } + const repoSearchScope = await getAccessibleRepoNamesForUser({ user, prisma }); // If needed, parse the query syntax into the query intermediate representation. From 6726cf249177a8e0cb2e4310dccce4cdb96037f7 Mon Sep 17 00:00:00 2001 From: msukkari Date: Thu, 26 Feb 2026 15:07:05 -0800 Subject: [PATCH 2/8] chore: update CHANGELOG for MCP analytics tracking (#948) --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6a682f50..e255b2d0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Added MCP and API key usage tracking to analytics dashboard. Move audit events from client-side to service functions to capture all API calls (web UI, MCP, and non-MCP). Display MCP requests and API requests on separate charts. [#948](https://github.com/sourcebot-dev/sourcebot/pull/948) + ### Fixed - Fixed search query parser rejecting parenthesized regex alternation in filter values (e.g. `file:(test|spec)`, `-file:(test|spec)`). [#946](https://github.com/sourcebot-dev/sourcebot/pull/946) - Fixed `content:` filter ignoring the regex toggle. [#947](https://github.com/sourcebot-dev/sourcebot/pull/947) From 3af7c41689460dde4af75f81d1667afc1e55eb6d Mon Sep 17 00:00:00 2001 From: msukkari Date: Thu, 26 Feb 2026 16:59:07 -0800 Subject: [PATCH 3/8] feat: add audit log retention policy, update analytics UI and docs - Add SOURCEBOT_EE_AUDIT_RETENTION_DAYS env var (default 180) and AuditLogPruner background job that prunes old audit records daily in batches - Surface retention period and oldest record date in analytics page header - Update audit action types table in docs (remove 4 stale, add 11 missing) - Add audit log storage section to sizing guide with enterprise callout and storage estimates - Update mock data script with mixed-usage user profiles and new audit actions Co-Authored-By: Claude Opus 4.6 --- docs/docs/configuration/audit-logs.mdx | 49 ++- .../configuration/environment-variables.mdx | 1 + docs/docs/deployment/sizing-guide.mdx | 28 ++ packages/backend/src/ee/auditLogPruner.ts | 71 ++++ packages/backend/src/index.ts | 4 + .../db/tools/scripts/inject-audit-data.ts | 358 ++++++++++++------ packages/shared/src/env.server.ts | 1 + .../web/src/ee/features/analytics/actions.ts | 18 +- .../features/analytics/analyticsContent.tsx | 16 +- .../web/src/ee/features/analytics/types.ts | 12 +- 10 files changed, 411 insertions(+), 147 deletions(-) create mode 100644 packages/backend/src/ee/auditLogPruner.ts diff --git a/docs/docs/configuration/audit-logs.mdx b/docs/docs/configuration/audit-logs.mdx index 6b38a4ea5..8828cb8e5 100644 --- a/docs/docs/configuration/audit-logs.mdx +++ b/docs/docs/configuration/audit-logs.mdx @@ -15,6 +15,9 @@ This feature gives security and compliance teams the necessary information to en ## Enabling/Disabling Audit Logs Audit logs are enabled by default and can be controlled with the `SOURCEBOT_EE_AUDIT_LOGGING_ENABLED` [environment variable](/docs/configuration/environment-variables). +## Retention Policy +By default, audit logs older than 180 days are automatically pruned daily. You can configure the retention period using the `SOURCEBOT_EE_AUDIT_RETENTION_DAYS` [environment variable](/docs/configuration/environment-variables). Set it to `0` to disable automatic pruning and retain logs indefinitely. + ## Fetching Audit Logs Audit logs are stored in the [postgres database](/docs/overview#architecture) connected to Sourcebot. To fetch all of the audit logs, you can use the following API: @@ -110,30 +113,37 @@ curl --request GET '$SOURCEBOT_URL/api/ee/audit' \ | Action | Actor Type | Target Type | | :------- | :------ | :------| -| `api_key.creation_failed` | `user` | `org` | | `api_key.created` | `user` | `api_key` | -| `api_key.deletion_failed` | `user` | `org` | +| `api_key.creation_failed` | `user` | `org` | | `api_key.deleted` | `user` | `api_key` | +| `api_key.deletion_failed` | `user` | `org` | +| `audit.fetch` | `user` | `org` | +| `chat.deleted` | `user` | `chat` | +| `chat.shared_with_users` | `user` | `chat` | +| `chat.unshared_with_user` | `user` | `chat` | +| `chat.visibility_updated` | `user` | `chat` | +| `org.ownership_transfer_failed` | `user` | `org` | +| `org.ownership_transferred` | `user` | `org` | +| `user.created_ask_chat` | `user` | `org` | | `user.creation_failed` | `user` | `user` | -| `user.owner_created` | `user` | `org` | -| `user.performed_code_search` | `user` | `org` | -| `user.performed_find_references` | `user` | `org` | -| `user.performed_goto_definition` | `user` | `org` | -| `user.created_ask_chat` | `user` | `org` | -| `user.jit_provisioning_failed` | `user` | `org` | -| `user.jit_provisioned` | `user` | `org` | -| `user.join_request_creation_failed` | `user` | `org` | -| `user.join_requested` | `user` | `org` | -| `user.join_request_approve_failed` | `user` | `account_join_request` | -| `user.join_request_approved` | `user` | `account_join_request` | -| `user.invite_failed` | `user` | `org` | -| `user.invites_created` | `user` | `org` | +| `user.delete` | `user` | `user` | +| `user.fetched_file_source` | `user` | `org` | +| `user.fetched_file_tree` | `user` | `org` | | `user.invite_accept_failed` | `user` | `invite` | | `user.invite_accepted` | `user` | `invite` | +| `user.invite_failed` | `user` | `org` | +| `user.invites_created` | `user` | `org` | +| `user.join_request_approve_failed` | `user` | `account_join_request` | +| `user.join_request_approved` | `user` | `account_join_request` | +| `user.list` | `user` | `org` | +| `user.listed_repos` | `user` | `org` | +| `user.owner_created` | `user` | `org` | +| `user.performed_code_search` | `user` | `org` | +| `user.performed_find_references` | `user` | `org` | +| `user.performed_goto_definition` | `user` | `org` | +| `user.read` | `user` | `user` | | `user.signed_in` | `user` | `user` | | `user.signed_out` | `user` | `user` | -| `org.ownership_transfer_failed` | `user` | `org` | -| `org.ownership_transferred` | `user` | `org` | ## Response schema @@ -180,7 +190,7 @@ curl --request GET '$SOURCEBOT_URL/api/ee/audit' \ }, "targetType": { "type": "string", - "enum": ["user", "org", "file", "api_key", "account_join_request", "invite"] + "enum": ["user", "org", "file", "api_key", "account_join_request", "invite", "chat"] }, "sourcebotVersion": { "type": "string" @@ -192,7 +202,8 @@ curl --request GET '$SOURCEBOT_URL/api/ee/audit' \ "properties": { "message": { "type": "string" }, "api_key": { "type": "string" }, - "emails": { "type": "string" } + "emails": { "type": "string" }, + "source": { "type": "string" } }, "additionalProperties": false }, diff --git a/docs/docs/configuration/environment-variables.mdx b/docs/docs/configuration/environment-variables.mdx index 54a0609e1..e802da0fe 100644 --- a/docs/docs/configuration/environment-variables.mdx +++ b/docs/docs/configuration/environment-variables.mdx @@ -42,6 +42,7 @@ The following environment variables allow you to configure your Sourcebot deploy | `HTTPS_PROXY` | - |

HTTPS proxy URL for routing SSL requests through a proxy server (e.g., `http://proxy.company.com:8080`). Requires `NODE_USE_ENV_PROXY=1`.

| | `NO_PROXY` | - |

Comma-separated list of hostnames or domains that should bypass the proxy (e.g., `localhost,127.0.0.1,.internal.domain`). Requires `NODE_USE_ENV_PROXY=1`.

| | `SOURCEBOT_EE_AUDIT_LOGGING_ENABLED` | `true` |

Enables/disables audit logging

| +| `SOURCEBOT_EE_AUDIT_RETENTION_DAYS` | `180` |

The number of days to retain audit logs. Audit log records older than this will be automatically pruned daily. Set to `0` to disable pruning and retain logs indefinitely.

| | `AUTH_EE_GCP_IAP_ENABLED` | `false` |

When enabled, allows Sourcebot to automatically register/login from a successful GCP IAP redirect

| | `AUTH_EE_GCP_IAP_AUDIENCE` | - |

The GCP IAP audience to use when verifying JWT tokens. Must be set to enable GCP IAP JIT provisioning

| | `EXPERIMENT_EE_PERMISSION_SYNC_ENABLED` | `false` |

Enables [permission syncing](/docs/features/permission-syncing).

| diff --git a/docs/docs/deployment/sizing-guide.mdx b/docs/docs/deployment/sizing-guide.mdx index 0dd3b7344..0966ff141 100644 --- a/docs/docs/deployment/sizing-guide.mdx +++ b/docs/docs/deployment/sizing-guide.mdx @@ -45,6 +45,34 @@ If your instance is resource-constrained, you can reduce the concurrency of back Lowering these values reduces peak resource usage at the cost of slower initial indexing. +## Audit log storage + + +Audit logging is an enterprise feature and is only available with an [enterprise license](/docs/overview#license-key). If you are not on an enterprise plan, audit logs are not stored and this section does not apply. + + +[Audit logs](/docs/configuration/audit-logs) are stored in the Postgres database connected to your Sourcebot deployment. Each audit record captures the action performed, the actor, the target, a timestamp, and optional metadata (e.g., request source). There are three database indexes on the audit table to support analytics and lookup queries. + +**Estimated storage per audit event: ~350 bytes** (including row data and indexes). + + +The table below assumes 50 events per user per day. The actual number depends on usage patterns — each user action (code search, file view, navigation, Ask chat, etc.) creates one audit event. Users who interact via [MCP](/docs/features/mcp-server) or the [API](/docs/api-reference/search) tend to generate significantly more events than web-only users, so your real usage may vary. + + +| Team size | Avg events / user / day | Daily events | Monthly storage | 6-month storage | +|---|---|---|---|---| +| 10 users | 50 | 500 | ~5 MB | ~30 MB | +| 50 users | 50 | 2,500 | ~25 MB | ~150 MB | +| 100 users | 50 | 5,000 | ~50 MB | ~300 MB | +| 500 users | 50 | 25,000 | ~250 MB | ~1.5 GB | +| 1,000 users | 50 | 50,000 | ~500 MB | ~3 GB | + +### Retention policy + +By default, audit logs older than **180 days** are automatically pruned daily by a background job. You can adjust this with the `SOURCEBOT_EE_AUDIT_RETENTION_DAYS` [environment variable](/docs/configuration/environment-variables). Set it to `0` to disable pruning and retain logs indefinitely. + +For most deployments, the default 180-day retention keeps database size manageable. If you have a large team with heavy MCP/API usage and need longer retention, plan your Postgres disk allocation accordingly using the estimates above. + ## Monitoring We recommend monitoring the following metrics after deployment to validate your sizing: diff --git a/packages/backend/src/ee/auditLogPruner.ts b/packages/backend/src/ee/auditLogPruner.ts new file mode 100644 index 000000000..aa98cd0a8 --- /dev/null +++ b/packages/backend/src/ee/auditLogPruner.ts @@ -0,0 +1,71 @@ +import { PrismaClient } from "@sourcebot/db"; +import { createLogger, env } from "@sourcebot/shared"; +import { setIntervalAsync } from "../utils.js"; + +const BATCH_SIZE = 10_000; +const ONE_DAY_MS = 24 * 60 * 60 * 1000; + +const logger = createLogger('audit-log-pruner'); + +export class AuditLogPruner { + private interval?: NodeJS.Timeout; + + constructor(private db: PrismaClient) {} + + startScheduler() { + if (env.SOURCEBOT_EE_AUDIT_LOGGING_ENABLED !== 'true') { + logger.info('Audit logging is disabled, skipping audit log pruner.'); + return; + } + + if (env.SOURCEBOT_EE_AUDIT_RETENTION_DAYS <= 0) { + logger.info('SOURCEBOT_EE_AUDIT_RETENTION_DAYS is 0, audit log pruning is disabled.'); + return; + } + + logger.info(`Audit log pruner started. Retaining logs for ${env.SOURCEBOT_EE_AUDIT_RETENTION_DAYS} days.`); + + // Run immediately on startup, then every 24 hours + this.pruneOldAuditLogs(); + this.interval = setIntervalAsync(() => this.pruneOldAuditLogs(), ONE_DAY_MS); + } + + async dispose() { + if (this.interval) { + clearInterval(this.interval); + this.interval = undefined; + } + } + + private async pruneOldAuditLogs() { + const cutoff = new Date(Date.now() - env.SOURCEBOT_EE_AUDIT_RETENTION_DAYS * ONE_DAY_MS); + let totalDeleted = 0; + + logger.info(`Pruning audit logs older than ${cutoff.toISOString()}...`); + + // Delete in batches to avoid long-running transactions + while (true) { + const batch = await this.db.audit.findMany({ + where: { timestamp: { lt: cutoff } }, + select: { id: true }, + take: BATCH_SIZE, + }); + + if (batch.length === 0) break; + + const result = await this.db.audit.deleteMany({ + where: { id: { in: batch.map(r => r.id) } }, + }); + + totalDeleted += result.count; + + if (batch.length < BATCH_SIZE) break; + } + + if (totalDeleted > 0) { + logger.info(`Pruned ${totalDeleted} audit log records.`); + } else { + logger.info('No audit log records to prune.'); + } + } +} diff --git a/packages/backend/src/index.ts b/packages/backend/src/index.ts index 81c39b84d..5892fc70c 100644 --- a/packages/backend/src/index.ts +++ b/packages/backend/src/index.ts @@ -12,6 +12,7 @@ import { ConfigManager } from "./configManager.js"; import { ConnectionManager } from './connectionManager.js'; import { INDEX_CACHE_DIR, REPOS_CACHE_DIR, SHUTDOWN_SIGNALS } from './constants.js'; import { AccountPermissionSyncer } from "./ee/accountPermissionSyncer.js"; +import { AuditLogPruner } from "./ee/auditLogPruner.js"; import { GithubAppManager } from "./ee/githubAppManager.js"; import { RepoPermissionSyncer } from './ee/repoPermissionSyncer.js'; import { shutdownPosthog } from "./posthog.js"; @@ -64,9 +65,11 @@ const repoPermissionSyncer = new RepoPermissionSyncer(prisma, settings, redis); const accountPermissionSyncer = new AccountPermissionSyncer(prisma, settings, redis); const repoIndexManager = new RepoIndexManager(prisma, settings, redis, promClient); const configManager = new ConfigManager(prisma, connectionManager, env.CONFIG_PATH); +const auditLogPruner = new AuditLogPruner(prisma); connectionManager.startScheduler(); repoIndexManager.startScheduler(); +auditLogPruner.startScheduler(); if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && !hasEntitlement('permission-syncing')) { logger.error('Permission syncing is not supported in current plan. Please contact team@sourcebot.dev for assistance.'); @@ -105,6 +108,7 @@ const listenToShutdownSignals = () => { await connectionManager.dispose() await repoPermissionSyncer.dispose() await accountPermissionSyncer.dispose() + await auditLogPruner.dispose() await configManager.dispose() await prisma.$disconnect(); diff --git a/packages/db/tools/scripts/inject-audit-data.ts b/packages/db/tools/scripts/inject-audit-data.ts index 56478e3e5..404ee9c45 100644 --- a/packages/db/tools/scripts/inject-audit-data.ts +++ b/packages/db/tools/scripts/inject-audit-data.ts @@ -1,18 +1,35 @@ import { Script } from "../scriptRunner"; -import { PrismaClient } from "../../dist"; +import { PrismaClient, Prisma } from "../../dist"; import { confirmAction } from "../utils"; +// User profile: defines how a user interacts with Sourcebot +interface UserProfile { + id: string + // Whether this user uses the web UI, and how active they are (0 = never, 1 = heavy) + webWeight: number + // Whether this user uses MCP, and how active they are (0 = never, 1 = heavy) + mcpWeight: number + // Whether this user uses the API directly, and how active they are (0 = never, 1 = heavy) + apiWeight: number + // API source label (for non-MCP API usage) + apiSource: string + // How likely they are to be active on a weekday (0-1) + weekdayActivity: number + // How likely they are to be active on a weekend (0-1) + weekendActivity: number +} + // Generate realistic audit data for analytics testing -// Simulates 50 engineers with varying activity patterns +// Simulates 50 users with mixed usage patterns across web UI, MCP, and API export const injectAuditData: Script = { run: async (prisma: PrismaClient) => { const orgId = 1; - + // Check if org exists const org = await prisma.org.findUnique({ where: { id: orgId } }); - + if (!org) { console.error(`Organization with id ${orgId} not found. Please create it first.`); return; @@ -20,154 +37,259 @@ export const injectAuditData: Script = { console.log(`Injecting audit data for organization: ${org.name} (${org.domain})`); - // Generate 50 fake user IDs - const userIds = Array.from({ length: 50 }, (_, i) => `user_${String(i + 1).padStart(3, '0')}`); - - // Actions we're tracking - const actions = [ - 'user.performed_code_search', - 'user.performed_find_references', - 'user.performed_goto_definition', - 'user.created_ask_chat' - ]; + const apiSources = ['cli', 'sdk', 'custom-app']; + + // Build user profiles with mixed usage patterns + const users: UserProfile[] = []; + + // Web-only users (20): browse the UI, never use MCP or API + for (let i = 0; i < 20; i++) { + users.push({ + id: `user_${String(users.length + 1).padStart(3, '0')}`, + webWeight: 0.6 + Math.random() * 0.4, // 0.6-1.0 + mcpWeight: 0, + apiWeight: 0, + apiSource: '', + weekdayActivity: 0.7 + Math.random() * 0.2, + weekendActivity: 0.05 + Math.random() * 0.15, + }); + } + + // Hybrid web + MCP users (12): use the web UI daily and also have MCP set up in their IDE + for (let i = 0; i < 12; i++) { + users.push({ + id: `user_${String(users.length + 1).padStart(3, '0')}`, + webWeight: 0.4 + Math.random() * 0.4, // 0.4-0.8 + mcpWeight: 0.5 + Math.random() * 0.5, // 0.5-1.0 + apiWeight: 0, + apiSource: '', + weekdayActivity: 0.8 + Math.random() * 0.15, + weekendActivity: 0.1 + Math.random() * 0.2, + }); + } + + // MCP-heavy users (8): primarily use MCP through their IDE, occasionally check the web UI + for (let i = 0; i < 8; i++) { + users.push({ + id: `user_${String(users.length + 1).padStart(3, '0')}`, + webWeight: 0.05 + Math.random() * 0.2, // 0.05-0.25 (occasional) + mcpWeight: 0.7 + Math.random() * 0.3, // 0.7-1.0 + apiWeight: 0, + apiSource: '', + weekdayActivity: 0.85 + Math.random() * 0.1, + weekendActivity: 0.3 + Math.random() * 0.3, + }); + } + + // API-only users (5): automated scripts/CI, no web UI or MCP + for (let i = 0; i < 5; i++) { + users.push({ + id: `user_${String(users.length + 1).padStart(3, '0')}`, + webWeight: 0, + mcpWeight: 0, + apiWeight: 0.6 + Math.random() * 0.4, + apiSource: apiSources[i % apiSources.length], + weekdayActivity: 0.9 + Math.random() * 0.1, + weekendActivity: 0.6 + Math.random() * 0.3, + }); + } + + // Hybrid web + API users (5): developers who use both the UI and have scripts that call the API + for (let i = 0; i < 5; i++) { + users.push({ + id: `user_${String(users.length + 1).padStart(3, '0')}`, + webWeight: 0.3 + Math.random() * 0.4, + mcpWeight: 0, + apiWeight: 0.4 + Math.random() * 0.4, + apiSource: apiSources[i % apiSources.length], + weekdayActivity: 0.8 + Math.random() * 0.15, + weekendActivity: 0.1 + Math.random() * 0.2, + }); + } // Generate data for the last 90 days const endDate = new Date(); const startDate = new Date(); startDate.setDate(startDate.getDate() - 90); + const webOnlyCount = users.filter(u => u.webWeight > 0 && u.mcpWeight === 0 && u.apiWeight === 0).length; + const hybridWebMcpCount = users.filter(u => u.webWeight > 0 && u.mcpWeight > 0).length; + const mcpHeavyCount = users.filter(u => u.mcpWeight > 0 && u.webWeight < 0.3).length; + const apiOnlyCount = users.filter(u => u.apiWeight > 0 && u.webWeight === 0 && u.mcpWeight === 0).length; + const hybridWebApiCount = users.filter(u => u.webWeight > 0 && u.apiWeight > 0).length; + console.log(`Generating data from ${startDate.toISOString().split('T')[0]} to ${endDate.toISOString().split('T')[0]}`); + console.log(`User breakdown: ${webOnlyCount} web-only, ${hybridWebMcpCount} web+MCP, ${mcpHeavyCount} MCP-heavy, ${apiOnlyCount} API-only, ${hybridWebApiCount} web+API`); confirmAction(); + function randomTimestamp(date: Date, isWeekend: boolean): Date { + const ts = new Date(date); + if (isWeekend) { + ts.setHours(9 + Math.floor(Math.random() * 12)); + } else { + ts.setHours(9 + Math.floor(Math.random() * 9)); + } + ts.setMinutes(Math.floor(Math.random() * 60)); + ts.setSeconds(Math.floor(Math.random() * 60)); + return ts; + } + + function scaledCount(baseMin: number, baseMax: number, weight: number, isWeekend: boolean): number { + const weekendFactor = isWeekend ? 0.3 : 1.0; + const scaledMax = Math.round(baseMax * weight * weekendFactor); + const scaledMin = Math.min(Math.round(baseMin * weight * weekendFactor), scaledMax); + if (scaledMax <= 0) return 0; + return scaledMin + Math.floor(Math.random() * (scaledMax - scaledMin + 1)); + } + + async function createAudits( + userId: string, + action: string, + count: number, + currentDate: Date, + isWeekend: boolean, + targetType: string, + metadata?: Prisma.InputJsonValue, + ) { + for (let i = 0; i < count; i++) { + await prisma.audit.create({ + data: { + timestamp: randomTimestamp(currentDate, isWeekend), + action, + actorId: userId, + actorType: 'user', + targetId: `${targetType}_${Math.floor(Math.random() * 1000)}`, + targetType, + sourcebotVersion: '1.0.0', + orgId, + ...(metadata ? { metadata } : {}), + } + }); + } + } + // Generate data for each day for (let d = new Date(startDate); d <= endDate; d.setDate(d.getDate() + 1)) { const currentDate = new Date(d); - const dayOfWeek = currentDate.getDay(); // 0 = Sunday, 6 = Saturday + const dayOfWeek = currentDate.getDay(); const isWeekend = dayOfWeek === 0 || dayOfWeek === 6; - - // For each user, generate activity for this day - for (const userId of userIds) { - // Determine if user is active today (higher chance on weekdays) - const isActiveToday = isWeekend - ? Math.random() < 0.15 // 15% chance on weekends - : Math.random() < 0.85; // 85% chance on weekdays - - if (!isActiveToday) continue; - - // Generate code searches (2-5 per day) - const codeSearches = isWeekend - ? Math.floor(Math.random() * 2) + 1 // 1-2 on weekends - : Math.floor(Math.random() * 4) + 2; // 2-5 on weekdays - - // Generate navigation actions (5-10 per day) - const navigationActions = isWeekend - ? Math.floor(Math.random() * 3) + 1 // 1-3 on weekends - : Math.floor(Math.random() * 6) + 5; // 5-10 on weekdays - - // Create code search records - for (let i = 0; i < codeSearches; i++) { - const timestamp = new Date(currentDate); - // Spread throughout the day (9 AM to 6 PM on weekdays, more random on weekends) - if (isWeekend) { - timestamp.setHours(9 + Math.floor(Math.random() * 12)); - timestamp.setMinutes(Math.floor(Math.random() * 60)); - } else { - timestamp.setHours(9 + Math.floor(Math.random() * 9)); - timestamp.setMinutes(Math.floor(Math.random() * 60)); + + for (const user of users) { + // Determine if user is active today + const activityChance = isWeekend ? user.weekendActivity : user.weekdayActivity; + if (Math.random() >= activityChance) continue; + + // --- Web UI activity (no source metadata) --- + if (user.webWeight > 0) { + // Code searches (2-5 base) + await createAudits(user.id, 'user.performed_code_search', + scaledCount(2, 5, user.webWeight, isWeekend), currentDate, isWeekend, 'search'); + + // Navigations: find references + goto definition (5-10 base) + const navCount = scaledCount(5, 10, user.webWeight, isWeekend); + for (let i = 0; i < navCount; i++) { + const action = Math.random() < 0.6 ? 'user.performed_find_references' : 'user.performed_goto_definition'; + await createAudits(user.id, action, 1, currentDate, isWeekend, 'symbol'); } - timestamp.setSeconds(Math.floor(Math.random() * 60)); - - await prisma.audit.create({ - data: { - timestamp, - action: 'user.performed_code_search', - actorId: userId, - actorType: 'user', - targetId: `search_${Math.floor(Math.random() * 1000)}`, - targetType: 'search', - sourcebotVersion: '1.0.0', - orgId - } - }); + + // Ask chats (0-2 base) - web only + await createAudits(user.id, 'user.created_ask_chat', + scaledCount(0, 2, user.webWeight, isWeekend), currentDate, isWeekend, 'org'); + + // File source views (3-8 base) + await createAudits(user.id, 'user.fetched_file_source', + scaledCount(3, 8, user.webWeight, isWeekend), currentDate, isWeekend, 'file'); + + // File tree browsing (2-5 base) + await createAudits(user.id, 'user.fetched_file_tree', + scaledCount(2, 5, user.webWeight, isWeekend), currentDate, isWeekend, 'repo'); + + // List repos (1-3 base) + await createAudits(user.id, 'user.listed_repos', + scaledCount(1, 3, user.webWeight, isWeekend), currentDate, isWeekend, 'org'); } - // Create navigation action records - for (let i = 0; i < navigationActions; i++) { - const timestamp = new Date(currentDate); - if (isWeekend) { - timestamp.setHours(9 + Math.floor(Math.random() * 12)); - timestamp.setMinutes(Math.floor(Math.random() * 60)); - } else { - timestamp.setHours(9 + Math.floor(Math.random() * 9)); - timestamp.setMinutes(Math.floor(Math.random() * 60)); - } - timestamp.setSeconds(Math.floor(Math.random() * 60)); + // --- MCP activity (source='mcp') --- + if (user.mcpWeight > 0) { + const meta: Prisma.InputJsonValue = { source: 'mcp' }; - // Randomly choose between find references and goto definition - const action = Math.random() < 0.6 ? 'user.performed_find_references' : 'user.performed_goto_definition'; + // MCP code searches (5-15 base) - higher volume than web + await createAudits(user.id, 'user.performed_code_search', + scaledCount(5, 15, user.mcpWeight, isWeekend), currentDate, isWeekend, 'search', meta); - await prisma.audit.create({ - data: { - timestamp, - action, - actorId: userId, - actorType: 'user', - targetId: `symbol_${Math.floor(Math.random() * 1000)}`, - targetType: 'symbol', - sourcebotVersion: '1.0.0', - orgId - } - }); + // MCP file source fetches (5-12 base) + await createAudits(user.id, 'user.fetched_file_source', + scaledCount(5, 12, user.mcpWeight, isWeekend), currentDate, isWeekend, 'file', meta); + + // MCP file tree fetches (3-6 base) + await createAudits(user.id, 'user.fetched_file_tree', + scaledCount(3, 6, user.mcpWeight, isWeekend), currentDate, isWeekend, 'repo', meta); + + // MCP list repos (3-8 base) + await createAudits(user.id, 'user.listed_repos', + scaledCount(3, 8, user.mcpWeight, isWeekend), currentDate, isWeekend, 'org', meta); } - // Generate Ask chat sessions (0-2 per day on weekdays, 0-1 on weekends) - const askChats = isWeekend - ? Math.floor(Math.random() * 2) // 0-1 on weekends - : Math.floor(Math.random() * 3); // 0-2 on weekdays - - // Create Ask chat records - for (let i = 0; i < askChats; i++) { - const timestamp = new Date(currentDate); - if (isWeekend) { - timestamp.setHours(9 + Math.floor(Math.random() * 12)); - timestamp.setMinutes(Math.floor(Math.random() * 60)); - } else { - timestamp.setHours(9 + Math.floor(Math.random() * 9)); - timestamp.setMinutes(Math.floor(Math.random() * 60)); - } - timestamp.setSeconds(Math.floor(Math.random() * 60)); - - await prisma.audit.create({ - data: { - timestamp, - action: 'user.created_ask_chat', - actorId: userId, - actorType: 'user', - targetId: orgId.toString(), - targetType: 'org', - sourcebotVersion: '1.0.0', - orgId - } - }); + // --- API activity (source=cli/sdk/custom-app) --- + if (user.apiWeight > 0) { + const meta: Prisma.InputJsonValue = { source: user.apiSource }; + + // API code searches (10-30 base) - highest volume, automated + await createAudits(user.id, 'user.performed_code_search', + scaledCount(10, 30, user.apiWeight, isWeekend), currentDate, isWeekend, 'search', meta); + + // API file source fetches (8-20 base) + await createAudits(user.id, 'user.fetched_file_source', + scaledCount(8, 20, user.apiWeight, isWeekend), currentDate, isWeekend, 'file', meta); + + // API file tree fetches (4-10 base) + await createAudits(user.id, 'user.fetched_file_tree', + scaledCount(4, 10, user.apiWeight, isWeekend), currentDate, isWeekend, 'repo', meta); + + // API list repos (5-15 base) + await createAudits(user.id, 'user.listed_repos', + scaledCount(5, 15, user.apiWeight, isWeekend), currentDate, isWeekend, 'org', meta); } } } console.log(`\nAudit data injection complete!`); - console.log(`Users: ${userIds.length}`); + console.log(`Users: ${users.length}`); console.log(`Date range: ${startDate.toISOString().split('T')[0]} to ${endDate.toISOString().split('T')[0]}`); - - // Show some statistics + + // Show statistics const stats = await prisma.audit.groupBy({ by: ['action'], where: { orgId }, _count: { action: true } }); - + console.log('\nAction breakdown:'); stats.forEach(stat => { console.log(` ${stat.action}: ${stat._count.action}`); }); + + // Show source breakdown + const allAudits = await prisma.audit.findMany({ + where: { orgId }, + select: { metadata: true } + }); + + let webCount = 0, mcpCount = 0, apiCount = 0; + for (const audit of allAudits) { + const meta = audit.metadata as Record | null; + if (!meta || !meta.source) { + webCount++; + } else if (meta.source === 'mcp') { + mcpCount++; + } else { + apiCount++; + } + } + console.log('\nSource breakdown:'); + console.log(` Web UI (no source): ${webCount}`); + console.log(` MCP (source=mcp): ${mcpCount}`); + console.log(` API (source=other): ${apiCount}`); }, -}; \ No newline at end of file +}; diff --git a/packages/shared/src/env.server.ts b/packages/shared/src/env.server.ts index 15ba17cc9..297a98b71 100644 --- a/packages/shared/src/env.server.ts +++ b/packages/shared/src/env.server.ts @@ -191,6 +191,7 @@ export const env = createEnv({ // EE License SOURCEBOT_EE_LICENSE_KEY: z.string().optional(), SOURCEBOT_EE_AUDIT_LOGGING_ENABLED: booleanSchema.default('true'), + SOURCEBOT_EE_AUDIT_RETENTION_DAYS: numberSchema.default(180), // GitHub app for review agent GITHUB_REVIEW_AGENT_APP_ID: z.string().optional(), diff --git a/packages/web/src/ee/features/analytics/actions.ts b/packages/web/src/ee/features/analytics/actions.ts index 3c353b03b..a75c7bedf 100644 --- a/packages/web/src/ee/features/analytics/actions.ts +++ b/packages/web/src/ee/features/analytics/actions.ts @@ -4,8 +4,8 @@ import { sew, withAuth, withOrgMembership } from "@/actions"; import { OrgRole } from "@sourcebot/db"; import { prisma } from "@/prisma"; import { ServiceError } from "@/lib/serviceError"; -import { AnalyticsResponse } from "./types"; -import { hasEntitlement } from "@sourcebot/shared"; +import { AnalyticsResponse, AnalyticsRow } from "./types"; +import { env, hasEntitlement } from "@sourcebot/shared"; import { ErrorCode } from "@/lib/errorCodes"; import { StatusCodes } from "http-status-codes"; @@ -20,7 +20,7 @@ export const getAnalytics = async (domain: string, apiKey: string | undefined = } satisfies ServiceError; } - const rows = await prisma.$queryRaw` + const rows = await prisma.$queryRaw` WITH core AS ( SELECT date_trunc('day', "timestamp") AS day, @@ -109,6 +109,16 @@ export const getAnalytics = async (domain: string, apiKey: string | undefined = `; - return rows; + const oldestRecord = await prisma.audit.findFirst({ + where: { orgId: org.id }, + orderBy: { timestamp: 'asc' }, + select: { timestamp: true }, + }); + + return { + rows, + retentionDays: env.SOURCEBOT_EE_AUDIT_RETENTION_DAYS, + oldestRecordDate: oldestRecord?.timestamp ?? null, + }; }, /* minRequiredRole = */ OrgRole.MEMBER), /* allowAnonymousAccess = */ true, apiKey ? { apiKey, domain } : undefined) ); \ No newline at end of file diff --git a/packages/web/src/ee/features/analytics/analyticsContent.tsx b/packages/web/src/ee/features/analytics/analyticsContent.tsx index 562c9f888..c8ef9de1f 100644 --- a/packages/web/src/ee/features/analytics/analyticsContent.tsx +++ b/packages/web/src/ee/features/analytics/analyticsContent.tsx @@ -9,7 +9,7 @@ import { useQuery } from "@tanstack/react-query" import { useDomain } from "@/hooks/useDomain" import { unwrapServiceError } from "@/lib/utils" import { Skeleton } from "@/components/ui/skeleton" -import { AnalyticsResponse } from "./types" +import { AnalyticsRow } from "./types" import { getAnalytics } from "./actions" import { useTheme } from "next-themes" import { useMemo, useState } from "react" @@ -24,7 +24,7 @@ const periodLabels: Record = { } interface AnalyticsChartProps { - data: AnalyticsResponse + data: AnalyticsRow[] title: string icon: LucideIcon period: "day" | "week" | "month" @@ -266,7 +266,7 @@ export function AnalyticsContent() { ) } - const periodData = analyticsResponse.filter((row) => row.period === selectedPeriod) + const periodData = analyticsResponse.rows.filter((row) => row.period === selectedPeriod) const charts = [ { @@ -322,6 +322,16 @@ export function AnalyticsContent() {

View usage metrics across your organization.

+
+

+ Retention period: {analyticsResponse.retentionDays > 0 ? `${analyticsResponse.retentionDays} days` : "Indefinite"} +

+ {analyticsResponse.oldestRecordDate && ( +

+ Data since: {new Date(analyticsResponse.oldestRecordDate).toLocaleDateString("en-US", { month: "short", day: "numeric", year: "numeric" })} +

+ )} +
{/* Time Period Selector */} diff --git a/packages/web/src/ee/features/analytics/types.ts b/packages/web/src/ee/features/analytics/types.ts index 67d5b019b..cd20ff8cd 100644 --- a/packages/web/src/ee/features/analytics/types.ts +++ b/packages/web/src/ee/features/analytics/types.ts @@ -1,6 +1,6 @@ import { z } from "zod"; -export const analyticsResponseSchema = z.array(z.object({ +export const analyticsRowSchema = z.object({ period: z.enum(['day', 'week', 'month']), bucket: z.date(), code_searches: z.number(), @@ -9,5 +9,11 @@ export const analyticsResponseSchema = z.array(z.object({ mcp_requests: z.number(), api_requests: z.number(), active_users: z.number(), -})) -export type AnalyticsResponse = z.infer; \ No newline at end of file +}); +export type AnalyticsRow = z.infer; + +export type AnalyticsResponse = { + rows: AnalyticsRow[]; + retentionDays: number; + oldestRecordDate: Date | null; +}; \ No newline at end of file From ad3563138c21a531ff9cc7dd43e4f585d85b5326 Mon Sep 17 00:00:00 2001 From: msukkari Date: Thu, 26 Feb 2026 17:01:12 -0800 Subject: [PATCH 4/8] chore: update CHANGELOG for audit log retention policy (#950) --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e255b2d0e..0b3d0329c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added MCP and API key usage tracking to analytics dashboard. Move audit events from client-side to service functions to capture all API calls (web UI, MCP, and non-MCP). Display MCP requests and API requests on separate charts. [#948](https://github.com/sourcebot-dev/sourcebot/pull/948) +- Added audit log retention policy with `SOURCEBOT_EE_AUDIT_RETENTION_DAYS` environment variable (default 180 days). Daily background job prunes old audit records. [#950](https://github.com/sourcebot-dev/sourcebot/pull/950) ### Fixed - Fixed search query parser rejecting parenthesized regex alternation in filter values (e.g. `file:(test|spec)`, `-file:(test|spec)`). [#946](https://github.com/sourcebot-dev/sourcebot/pull/946) From 794a05c40e01ded44ae2ffae2e04ce216d5d421b Mon Sep 17 00:00:00 2001 From: msukkari Date: Thu, 26 Feb 2026 17:52:13 -0800 Subject: [PATCH 5/8] feat(web): add sourceOverride to getFileSource and getTree Extend the sourceOverride pattern to getFileSource and getTree so internal callers (chat AI agent) can tag audit events with the correct source instead of relying on the HTTP header. Co-Authored-By: Claude Opus 4.6 --- packages/web/src/features/chat/agent.ts | 2 +- packages/web/src/features/chat/tools.ts | 7 ++++--- packages/web/src/features/git/getFileSourceApi.ts | 4 ++-- packages/web/src/features/git/getTreeApi.ts | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/packages/web/src/features/chat/agent.ts b/packages/web/src/features/chat/agent.ts index ec2a30758..e6ae223e2 100644 --- a/packages/web/src/features/chat/agent.ts +++ b/packages/web/src/features/chat/agent.ts @@ -43,7 +43,7 @@ export const createAgentStream = async ({ path: source.path, repo: source.repo, ref: source.revision, - }); + }, { sourceOverride: 'sourcebot-ask-agent' }); if (isServiceError(fileSource)) { logger.error("Error fetching file source:", fileSource); diff --git a/packages/web/src/features/chat/tools.ts b/packages/web/src/features/chat/tools.ts index 87a251214..86af93679 100644 --- a/packages/web/src/features/chat/tools.ts +++ b/packages/web/src/features/chat/tools.ts @@ -115,7 +115,7 @@ export const readFilesTool = tool({ path, repo: repository, ref: revision, - }); + }, { sourceOverride: 'sourcebot-ask-agent' }); })); if (responses.some(isServiceError)) { @@ -221,7 +221,8 @@ export const createCodeSearchTool = (selectedRepos: string[]) => tool({ contextLines: 3, isCaseSensitivityEnabled: caseSensitive, isRegexEnabled: useRegex, - } + }, + sourceOverride: 'sourcebot-ask-agent', }); if (isServiceError(response)) { @@ -253,7 +254,7 @@ export const listReposTool = tool({ description: 'Lists repositories in the organization with optional filtering and pagination.', inputSchema: listReposQueryParamsSchema, execute: async (request: ListReposQueryParams) => { - const reposResponse = await listRepos(request); + const reposResponse = await listRepos({ ...request, sourceOverride: 'sourcebot-ask-agent' }); if (isServiceError(reposResponse)) { return reposResponse; diff --git a/packages/web/src/features/git/getFileSourceApi.ts b/packages/web/src/features/git/getFileSourceApi.ts index f098e654c..30c0b4a2e 100644 --- a/packages/web/src/features/git/getFileSourceApi.ts +++ b/packages/web/src/features/git/getFileSourceApi.ts @@ -32,9 +32,9 @@ export const fileSourceResponseSchema = z.object({ }); export type FileSourceResponse = z.infer; -export const getFileSource = async ({ path: filePath, repo: repoName, ref }: FileSourceRequest): Promise => sew(() => withOptionalAuthV2(async ({ org, prisma, user }) => { +export const getFileSource = async ({ path: filePath, repo: repoName, ref }: FileSourceRequest, { sourceOverride }: { sourceOverride?: string } = {}): Promise => sew(() => withOptionalAuthV2(async ({ org, prisma, user }) => { if (user) { - const source = (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; + const source = sourceOverride ?? (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; getAuditService().createAudit({ action: 'user.fetched_file_source', actor: { id: user.id, type: 'user' }, diff --git a/packages/web/src/features/git/getTreeApi.ts b/packages/web/src/features/git/getTreeApi.ts index 3136b5586..9a054ba5d 100644 --- a/packages/web/src/features/git/getTreeApi.ts +++ b/packages/web/src/features/git/getTreeApi.ts @@ -26,10 +26,10 @@ export type GetTreeResponse = z.infer; * repo/revision, including intermediate directories needed to connect them * into a single tree. */ -export const getTree = async ({ repoName, revisionName, paths }: GetTreeRequest): Promise => sew(() => +export const getTree = async ({ repoName, revisionName, paths }: GetTreeRequest, { sourceOverride }: { sourceOverride?: string } = {}): Promise => sew(() => withOptionalAuthV2(async ({ org, prisma, user }) => { if (user) { - const source = (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; + const source = sourceOverride ?? (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; getAuditService().createAudit({ action: 'user.fetched_file_tree', actor: { id: user.id, type: 'user' }, From 56c7f40b19e3725b99aafa4b4a0e9a5ff2094297 Mon Sep 17 00:00:00 2001 From: msukkari Date: Thu, 26 Feb 2026 17:57:39 -0800 Subject: [PATCH 6/8] chore: remove changelog entry for closed PR #948 Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b3d0329c..5941bc925 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -- Added MCP and API key usage tracking to analytics dashboard. Move audit events from client-side to service functions to capture all API calls (web UI, MCP, and non-MCP). Display MCP requests and API requests on separate charts. [#948](https://github.com/sourcebot-dev/sourcebot/pull/948) - Added audit log retention policy with `SOURCEBOT_EE_AUDIT_RETENTION_DAYS` environment variable (default 180 days). Daily background job prunes old audit records. [#950](https://github.com/sourcebot-dev/sourcebot/pull/950) ### Fixed From f9dea296a361c30157957d8791b6c51d08aca8fc Mon Sep 17 00:00:00 2001 From: msukkari Date: Thu, 26 Feb 2026 18:10:31 -0800 Subject: [PATCH 7/8] feat(web): restructure analytics by source and add global active users - Tag all audit events with source metadata (sourcebot-web-client, sourcebot-ask-agent, sourcebot-ui-codenav, mcp) via sourceOverride - Restructure analytics SQL to segment by Web App (sourcebot-*), MCP, and API (everything else) - Add global active users chart at top of analytics page - Add info hover tooltips explaining each chart - Prefix chart names with their section (Web/MCP/API) for clarity - Update inject-audit-data script to use correct source values Co-Authored-By: Claude Opus 4.6 --- .../db/tools/scripts/inject-audit-data.ts | 26 +- .../app/api/(server)/chat/blocking/route.ts | 12 + .../app/api/(server)/repos/listReposApi.ts | 4 +- .../web/src/ee/features/analytics/actions.ts | 56 +++- .../features/analytics/analyticsContent.tsx | 239 +++++++++++++----- .../web/src/ee/features/analytics/types.ts | 11 +- .../components/symbolHoverPopup/index.tsx | 2 + packages/web/src/features/chat/actions.ts | 3 +- .../features/chat/useCreateNewChatThread.ts | 2 +- packages/web/src/features/codeNav/api.ts | 6 +- packages/web/src/features/search/searchApi.ts | 6 +- 11 files changed, 276 insertions(+), 91 deletions(-) diff --git a/packages/db/tools/scripts/inject-audit-data.ts b/packages/db/tools/scripts/inject-audit-data.ts index 404ee9c45..bcfbf7685 100644 --- a/packages/db/tools/scripts/inject-audit-data.ts +++ b/packages/db/tools/scripts/inject-audit-data.ts @@ -180,34 +180,37 @@ export const injectAuditData: Script = { const activityChance = isWeekend ? user.weekendActivity : user.weekdayActivity; if (Math.random() >= activityChance) continue; - // --- Web UI activity (no source metadata) --- + // --- Web UI activity (source='sourcebot-web-client' or 'sourcebot-ui-codenav') --- if (user.webWeight > 0) { + const webMeta: Prisma.InputJsonValue = { source: 'sourcebot-web-client' }; + const codenavMeta: Prisma.InputJsonValue = { source: 'sourcebot-ui-codenav' }; + // Code searches (2-5 base) await createAudits(user.id, 'user.performed_code_search', - scaledCount(2, 5, user.webWeight, isWeekend), currentDate, isWeekend, 'search'); + scaledCount(2, 5, user.webWeight, isWeekend), currentDate, isWeekend, 'search', webMeta); // Navigations: find references + goto definition (5-10 base) const navCount = scaledCount(5, 10, user.webWeight, isWeekend); for (let i = 0; i < navCount; i++) { const action = Math.random() < 0.6 ? 'user.performed_find_references' : 'user.performed_goto_definition'; - await createAudits(user.id, action, 1, currentDate, isWeekend, 'symbol'); + await createAudits(user.id, action, 1, currentDate, isWeekend, 'symbol', codenavMeta); } // Ask chats (0-2 base) - web only await createAudits(user.id, 'user.created_ask_chat', - scaledCount(0, 2, user.webWeight, isWeekend), currentDate, isWeekend, 'org'); + scaledCount(0, 2, user.webWeight, isWeekend), currentDate, isWeekend, 'org', webMeta); // File source views (3-8 base) await createAudits(user.id, 'user.fetched_file_source', - scaledCount(3, 8, user.webWeight, isWeekend), currentDate, isWeekend, 'file'); + scaledCount(3, 8, user.webWeight, isWeekend), currentDate, isWeekend, 'file', webMeta); // File tree browsing (2-5 base) await createAudits(user.id, 'user.fetched_file_tree', - scaledCount(2, 5, user.webWeight, isWeekend), currentDate, isWeekend, 'repo'); + scaledCount(2, 5, user.webWeight, isWeekend), currentDate, isWeekend, 'repo', webMeta); // List repos (1-3 base) await createAudits(user.id, 'user.listed_repos', - scaledCount(1, 3, user.webWeight, isWeekend), currentDate, isWeekend, 'org'); + scaledCount(1, 3, user.webWeight, isWeekend), currentDate, isWeekend, 'org', webMeta); } // --- MCP activity (source='mcp') --- @@ -279,17 +282,18 @@ export const injectAuditData: Script = { let webCount = 0, mcpCount = 0, apiCount = 0; for (const audit of allAudits) { const meta = audit.metadata as Record | null; - if (!meta || !meta.source) { + const source = meta?.source as string | undefined; + if (source && typeof source === 'string' && source.startsWith('sourcebot-')) { webCount++; - } else if (meta.source === 'mcp') { + } else if (source === 'mcp') { mcpCount++; } else { apiCount++; } } console.log('\nSource breakdown:'); - console.log(` Web UI (no source): ${webCount}`); + console.log(` Web UI (source=sourcebot-*): ${webCount}`); console.log(` MCP (source=mcp): ${mcpCount}`); - console.log(` API (source=other): ${apiCount}`); + console.log(` API (source=other/null): ${apiCount}`); }, }; diff --git a/packages/web/src/app/api/(server)/chat/blocking/route.ts b/packages/web/src/app/api/(server)/chat/blocking/route.ts index 4e887cf0f..c230b5dd0 100644 --- a/packages/web/src/app/api/(server)/chat/blocking/route.ts +++ b/packages/web/src/app/api/(server)/chat/blocking/route.ts @@ -16,6 +16,7 @@ import { createMessageStream } from "../route"; import { InferUIMessageChunk, UITools, UIDataTypes, UIMessage } from "ai"; import { apiHandler } from "@/lib/apiHandler"; import { captureEvent } from "@/lib/posthog"; +import { getAuditService } from "@/ee/features/audit/factory"; const logger = createLogger('chat-blocking-api'); @@ -121,6 +122,17 @@ export const POST = apiHandler(async (request: NextRequest) => { isAnonymous: !user, }); + if (user) { + const source = request.headers.get('X-Sourcebot-Client-Source') ?? undefined; + getAuditService().createAudit({ + action: 'user.created_ask_chat', + actor: { id: user.id, type: 'user' }, + target: { id: org.id.toString(), type: 'org' }, + orgId: org.id, + metadata: { source }, + }).catch(() => {}); + } + // Run the agent to completion logger.debug(`Starting blocking agent for chat ${chat.id}`, { chatId: chat.id, diff --git a/packages/web/src/app/api/(server)/repos/listReposApi.ts b/packages/web/src/app/api/(server)/repos/listReposApi.ts index adffe1a00..8ba2e9c6d 100644 --- a/packages/web/src/app/api/(server)/repos/listReposApi.ts +++ b/packages/web/src/app/api/(server)/repos/listReposApi.ts @@ -6,10 +6,10 @@ import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils"; import { env } from "@sourcebot/shared"; import { headers } from "next/headers"; -export const listRepos = async ({ query, page, perPage, sort, direction }: ListReposQueryParams) => sew(() => +export const listRepos = async ({ query, page, perPage, sort, direction, sourceOverride }: ListReposQueryParams & { sourceOverride?: string }) => sew(() => withOptionalAuthV2(async ({ org, prisma, user }) => { if (user) { - const source = (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; + const source = sourceOverride ?? (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; getAuditService().createAudit({ action: 'user.listed_repos', actor: { id: user.id, type: 'user' }, diff --git a/packages/web/src/ee/features/analytics/actions.ts b/packages/web/src/ee/features/analytics/actions.ts index a75c7bedf..18d9a897b 100644 --- a/packages/web/src/ee/features/analytics/actions.ts +++ b/packages/web/src/ee/features/analytics/actions.ts @@ -80,12 +80,45 @@ export const getAnalytics = async (domain: string, apiKey: string | undefined = WHEN 'week' THEN c.week ELSE c.month END AS bucket, - COUNT(*) FILTER (WHERE c.action = 'user.performed_code_search') AS code_searches, - COUNT(*) FILTER (WHERE c.action IN ('user.performed_find_references', 'user.performed_goto_definition')) AS navigations, - COUNT(*) FILTER (WHERE c.action = 'user.created_ask_chat') AS ask_chats, - COUNT(*) FILTER (WHERE c.metadata->>'source' = 'mcp') AS mcp_requests, - COUNT(*) FILTER (WHERE c.metadata->>'source' IS NOT NULL AND c.metadata->>'source' != 'mcp') AS api_requests, - COUNT(DISTINCT c."actorId") AS active_users + + -- Global active users (any action, any source) + COUNT(DISTINCT c."actorId") AS active_users, + + -- Web App metrics (source LIKE 'sourcebot-%') + COUNT(*) FILTER ( + WHERE c.action = 'user.performed_code_search' + AND c.metadata->>'source' LIKE 'sourcebot-%' + ) AS web_code_searches, + COUNT(*) FILTER ( + WHERE c.action IN ('user.performed_find_references', 'user.performed_goto_definition') + AND c.metadata->>'source' LIKE 'sourcebot-%' + ) AS web_navigations, + COUNT(*) FILTER ( + WHERE c.action = 'user.created_ask_chat' + AND c.metadata->>'source' LIKE 'sourcebot-%' + ) AS web_ask_chats, + COUNT(DISTINCT c."actorId") FILTER ( + WHERE c.metadata->>'source' LIKE 'sourcebot-%' + ) AS web_active_users, + + -- MCP metrics (source = 'mcp') + COUNT(*) FILTER ( + WHERE c.metadata->>'source' = 'mcp' + ) AS mcp_requests, + COUNT(DISTINCT c."actorId") FILTER ( + WHERE c.metadata->>'source' = 'mcp' + ) AS mcp_active_users, + + -- API metrics (source IS NULL or not sourcebot-*/mcp) + COUNT(*) FILTER ( + WHERE c.metadata->>'source' IS NULL + OR (c.metadata->>'source' NOT LIKE 'sourcebot-%' AND c.metadata->>'source' != 'mcp') + ) AS api_requests, + COUNT(DISTINCT c."actorId") FILTER ( + WHERE c.metadata->>'source' IS NULL + OR (c.metadata->>'source' NOT LIKE 'sourcebot-%' AND c.metadata->>'source' != 'mcp') + ) AS api_active_users + FROM core c JOIN LATERAL ( SELECT unnest(array['day', 'week', 'month']) AS period @@ -96,12 +129,15 @@ export const getAnalytics = async (domain: string, apiKey: string | undefined = SELECT b.period, b.bucket, - COALESCE(a.code_searches, 0)::int AS code_searches, - COALESCE(a.navigations, 0)::int AS navigations, - COALESCE(a.ask_chats, 0)::int AS ask_chats, + COALESCE(a.active_users, 0)::int AS active_users, + COALESCE(a.web_code_searches, 0)::int AS web_code_searches, + COALESCE(a.web_navigations, 0)::int AS web_navigations, + COALESCE(a.web_ask_chats, 0)::int AS web_ask_chats, + COALESCE(a.web_active_users, 0)::int AS web_active_users, COALESCE(a.mcp_requests, 0)::int AS mcp_requests, + COALESCE(a.mcp_active_users, 0)::int AS mcp_active_users, COALESCE(a.api_requests, 0)::int AS api_requests, - COALESCE(a.active_users, 0)::int AS active_users + COALESCE(a.api_active_users, 0)::int AS api_active_users FROM buckets b LEFT JOIN aggregated a ON a.period = b.period AND a.bucket = b.bucket diff --git a/packages/web/src/ee/features/analytics/analyticsContent.tsx b/packages/web/src/ee/features/analytics/analyticsContent.tsx index c8ef9de1f..1f295824e 100644 --- a/packages/web/src/ee/features/analytics/analyticsContent.tsx +++ b/packages/web/src/ee/features/analytics/analyticsContent.tsx @@ -2,7 +2,7 @@ import { ChartTooltip } from "@/components/ui/chart" import { Area, AreaChart, ResponsiveContainer, XAxis, YAxis } from "recharts" -import { Users, LucideIcon, Search, ArrowRight, Activity, Calendar, MessageCircle, Wrench, Key } from "lucide-react" +import { Users, LucideIcon, Search, ArrowRight, Activity, Calendar, MessageCircle, Wrench, Key, Info } from "lucide-react" import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card" import { ChartContainer } from "@/components/ui/chart" import { useQuery } from "@tanstack/react-query" @@ -14,6 +14,7 @@ import { getAnalytics } from "./actions" import { useTheme } from "next-themes" import { useMemo, useState } from "react" import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select" +import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip" type TimePeriod = "day" | "week" | "month" @@ -23,17 +24,27 @@ const periodLabels: Record = { month: "Monthly", } +interface ChartDefinition { + title: string + icon: LucideIcon + color: string + dataKey: keyof Omit + gradientId: string + description: string +} + interface AnalyticsChartProps { data: AnalyticsRow[] title: string icon: LucideIcon period: "day" | "week" | "month" - dataKey: "code_searches" | "navigations" | "ask_chats" | "mcp_requests" | "api_requests" | "active_users" + dataKey: keyof Omit color: string gradientId: string + description: string } -function AnalyticsChart({ data, title, icon: Icon, period, dataKey, color, gradientId }: AnalyticsChartProps) { +function AnalyticsChart({ data, title, icon: Icon, period, dataKey, color, gradientId, description }: AnalyticsChartProps) { const { theme } = useTheme() const isDark = theme === "dark" @@ -57,8 +68,16 @@ function AnalyticsChart({ data, title, icon: Icon, period, dataKey, color, gradi > -
+
{title} + + + + + + {description} + +
@@ -159,6 +178,26 @@ function AnalyticsChart({ data, title, icon: Icon, period, dataKey, color, gradi ) } +function ChartSkeletonGroup({ count }: { count: number }) { + return ( + <> + {Array.from({ length: count }, (_, i) => ( + + +
+ + +
+
+ + + +
+ ))} + + ) +} + function LoadingSkeleton() { return (
@@ -174,22 +213,16 @@ function LoadingSkeleton() {
- {/* Chart skeletons */} - {[1, 2, 3, 4, 5, 6].map((chartIndex) => ( - - -
- -
- -
-
-
- - - -
- ))} + {/* Global chart skeleton */} + + + {/* Web App section skeleton */} + + + + {/* API section skeleton */} + + ) } @@ -197,7 +230,7 @@ function LoadingSkeleton() { export function AnalyticsContent() { const domain = useDomain() const { theme } = useTheme() - + // Time period selector state const [selectedPeriod, setSelectedPeriod] = useState("day") @@ -212,19 +245,23 @@ export function AnalyticsContent() { }) const chartColors = useMemo(() => ({ - users: { + globalUsers: { + light: "#6366f1", + dark: "#818cf8", + }, + webUsers: { light: "#3b82f6", dark: "#60a5fa", }, - searches: { + webSearches: { light: "#f59e0b", dark: "#fbbf24", }, - navigations: { + webNavigations: { light: "#ef4444", dark: "#f87171", }, - askChats: { + webAskChats: { light: "#8b5cf6", dark: "#a78bfa", }, @@ -232,10 +269,18 @@ export function AnalyticsContent() { light: "#10b981", dark: "#34d399", }, + mcpUsers: { + light: "#06b6d4", + dark: "#22d3ee", + }, apiRequests: { light: "#14b8a6", dark: "#2dd4bf", }, + apiUsers: { + light: "#f97316", + dark: "#fb923c", + }, }), []) const getColor = (colorKey: keyof typeof chartColors) => { @@ -268,41 +313,66 @@ export function AnalyticsContent() { const periodData = analyticsResponse.rows.filter((row) => row.period === selectedPeriod) - const charts = [ + const globalChart: ChartDefinition = { + title: `${periodLabels[selectedPeriod]} Active Users`, + icon: Users, + color: getColor("globalUsers"), + dataKey: "active_users" as const, + gradientId: "activeUsers", + description: "Unique users who performed any tracked action across all interfaces (web app, MCP, and API).", + } + + const webCharts: ChartDefinition[] = [ { - title: `${periodLabels[selectedPeriod]} Active Users`, + title: `${periodLabels[selectedPeriod]} Web Active Users`, icon: Users, - color: getColor("users"), - dataKey: "active_users" as const, - gradientId: "activeUsers", + color: getColor("webUsers"), + dataKey: "web_active_users" as const, + gradientId: "webActiveUsers", + description: "Unique users who performed any action through the Sourcebot web interface, including searches, navigations, chats, and file views.", }, { - title: `${periodLabels[selectedPeriod]} Code Searches`, + title: `${periodLabels[selectedPeriod]} Web Code Searches`, icon: Search, - color: getColor("searches"), - dataKey: "code_searches" as const, - gradientId: "codeSearches", + color: getColor("webSearches"), + dataKey: "web_code_searches" as const, + gradientId: "webCodeSearches", + description: "Number of code searches performed through the Sourcebot web interface.", }, { - title: `${periodLabels[selectedPeriod]} Navigations`, - icon: ArrowRight, - color: getColor("navigations"), - dataKey: "navigations" as const, - gradientId: "navigations", + title: `${periodLabels[selectedPeriod]} Web Ask Chats`, + icon: MessageCircle, + color: getColor("webAskChats"), + dataKey: "web_ask_chats" as const, + gradientId: "webAskChats", + description: "Number of Ask chat conversations created through the Sourcebot web interface.", }, { - title: `${periodLabels[selectedPeriod]} Ask Chats`, - icon: MessageCircle, - color: getColor("askChats"), - dataKey: "ask_chats" as const, - gradientId: "askChats", + title: `${periodLabels[selectedPeriod]} Web Navigations`, + icon: ArrowRight, + color: getColor("webNavigations"), + dataKey: "web_navigations" as const, + gradientId: "webNavigations", + description: "Number of go-to-definition and find-references actions performed in the web interface.", }, + ] + + const apiCharts: ChartDefinition[] = [ { title: `${periodLabels[selectedPeriod]} MCP Requests`, icon: Wrench, color: getColor("mcpRequests"), dataKey: "mcp_requests" as const, gradientId: "mcpRequests", + description: "Total number of requests made through MCP (Model Context Protocol) integrations.", + }, + { + title: `${periodLabels[selectedPeriod]} MCP Active Users`, + icon: Users, + color: getColor("mcpUsers"), + dataKey: "mcp_active_users" as const, + gradientId: "mcpActiveUsers", + description: "Unique users who made requests through MCP integrations.", }, { title: `${periodLabels[selectedPeriod]} API Requests`, @@ -310,6 +380,15 @@ export function AnalyticsContent() { color: getColor("apiRequests"), dataKey: "api_requests" as const, gradientId: "apiRequests", + description: "Total number of requests made through direct API access, excluding web app and MCP traffic.", + }, + { + title: `${periodLabels[selectedPeriod]} API Active Users`, + icon: Users, + color: getColor("apiUsers"), + dataKey: "api_active_users" as const, + gradientId: "apiActiveUsers", + description: "Unique users who made requests through direct API access, excluding web app and MCP traffic.", }, ] @@ -350,19 +429,63 @@ export function AnalyticsContent() { - {/* Analytics Charts */} - {charts.map((chart) => ( - - ))} + {/* Global Active Users */} + + + {/* Web App Section */} +
+
+

Web App

+

+ Usage from the Sourcebot web interface. +

+
+ {webCharts.map((chart) => ( + + ))} +
+ + {/* API Section */} +
+
+

API

+

+ Usage from MCP integrations and direct API access. +

+
+ {apiCharts.map((chart) => ( + + ))} +
) -} \ No newline at end of file +} diff --git a/packages/web/src/ee/features/analytics/types.ts b/packages/web/src/ee/features/analytics/types.ts index cd20ff8cd..ef44ad287 100644 --- a/packages/web/src/ee/features/analytics/types.ts +++ b/packages/web/src/ee/features/analytics/types.ts @@ -3,12 +3,15 @@ import { z } from "zod"; export const analyticsRowSchema = z.object({ period: z.enum(['day', 'week', 'month']), bucket: z.date(), - code_searches: z.number(), - navigations: z.number(), - ask_chats: z.number(), + active_users: z.number(), + web_code_searches: z.number(), + web_navigations: z.number(), + web_ask_chats: z.number(), + web_active_users: z.number(), mcp_requests: z.number(), + mcp_active_users: z.number(), api_requests: z.number(), - active_users: z.number(), + api_active_users: z.number(), }); export type AnalyticsRow = z.infer; diff --git a/packages/web/src/ee/features/codeNav/components/symbolHoverPopup/index.tsx b/packages/web/src/ee/features/codeNav/components/symbolHoverPopup/index.tsx index 2dd86d505..f72ecd6d4 100644 --- a/packages/web/src/ee/features/codeNav/components/symbolHoverPopup/index.tsx +++ b/packages/web/src/ee/features/codeNav/components/symbolHoverPopup/index.tsx @@ -123,6 +123,7 @@ export const SymbolHoverPopup: React.FC = ({ action: "user.performed_goto_definition", metadata: { message: symbolInfo.symbolName, + source: 'sourcebot-ui-codenav', }, }); @@ -176,6 +177,7 @@ export const SymbolHoverPopup: React.FC = ({ action: "user.performed_find_references", metadata: { message: symbolInfo.symbolName, + source: 'sourcebot-ui-codenav', }, }) diff --git a/packages/web/src/features/chat/actions.ts b/packages/web/src/features/chat/actions.ts index 86a9292fb..8503b7308 100644 --- a/packages/web/src/features/chat/actions.ts +++ b/packages/web/src/features/chat/actions.ts @@ -130,7 +130,7 @@ User question: ${message}`; return result.text; } -export const createChat = async () => sew(() => +export const createChat = async ({ source }: { source?: string } = {}) => sew(() => withOptionalAuthV2(async ({ org, user, prisma }) => { const isGuestUser = user === undefined; @@ -160,6 +160,7 @@ export const createChat = async () => sew(() => type: "org", }, orgId: org.id, + metadata: { source }, }); } diff --git a/packages/web/src/features/chat/useCreateNewChatThread.ts b/packages/web/src/features/chat/useCreateNewChatThread.ts index d9af1c9de..37eba2330 100644 --- a/packages/web/src/features/chat/useCreateNewChatThread.ts +++ b/packages/web/src/features/chat/useCreateNewChatThread.ts @@ -37,7 +37,7 @@ export const useCreateNewChatThread = ({ isAuthenticated = false }: UseCreateNew const inputMessage = createUIMessage(text, mentions.map((mention) => mention.data), selectedSearchScopes); setIsLoading(true); - const response = await createChat(); + const response = await createChat({ source: 'sourcebot-web-client' }); if (isServiceError(response)) { toast({ description: `❌ Failed to create chat. Reason: ${response.message}` diff --git a/packages/web/src/features/codeNav/api.ts b/packages/web/src/features/codeNav/api.ts index 83e0a8873..93c2e492f 100644 --- a/packages/web/src/features/codeNav/api.ts +++ b/packages/web/src/features/codeNav/api.ts @@ -57,7 +57,8 @@ export const findSearchBasedSymbolReferences = async (props: FindRelatedSymbolsR options: { matches: MAX_REFERENCE_COUNT, contextLines: 0, - } + }, + sourceOverride: 'sourcebot-ui-codenav', }); if (isServiceError(searchResult)) { @@ -116,7 +117,8 @@ export const findSearchBasedSymbolDefinitions = async (props: FindRelatedSymbols options: { matches: MAX_REFERENCE_COUNT, contextLines: 0, - } + }, + sourceOverride: 'sourcebot-ui-codenav', }); if (isServiceError(searchResult)) { diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index 01cf33ec8..b80490bba 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -15,6 +15,7 @@ type QueryStringSearchRequest = { queryType: 'string'; query: string; options: SearchOptions; + sourceOverride?: string; } type QueryIRSearchRequest = { @@ -22,6 +23,7 @@ type QueryIRSearchRequest = { query: QueryIR; // Omit options that are specific to query syntax parsing. options: Omit; + sourceOverride?: string; } type SearchRequest = QueryStringSearchRequest | QueryIRSearchRequest; @@ -29,7 +31,7 @@ type SearchRequest = QueryStringSearchRequest | QueryIRSearchRequest; export const search = (request: SearchRequest) => sew(() => withOptionalAuthV2(async ({ prisma, user, org }) => { if (user) { - const source = (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; + const source = request.sourceOverride ?? (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; getAuditService().createAudit({ action: 'user.performed_code_search', actor: { id: user.id, type: 'user' }, @@ -60,7 +62,7 @@ export const search = (request: SearchRequest) => sew(() => export const streamSearch = (request: SearchRequest) => sew(() => withOptionalAuthV2(async ({ prisma, user, org }) => { if (user) { - const source = (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; + const source = request.sourceOverride ?? (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; getAuditService().createAudit({ action: 'user.performed_code_search', actor: { id: user.id, type: 'user' }, From ab8b86fd977841c6b882e5793c5351a8157dd425 Mon Sep 17 00:00:00 2001 From: msukkari Date: Thu, 26 Feb 2026 18:23:57 -0800 Subject: [PATCH 8/8] feat(db): backfill audit source metadata and add v2 inject script Add a migration that backfills the 'source' field in audit metadata for historical events created before source tracking was introduced. All old events were web-only, so code searches and chats get 'sourcebot-web-client' and navigations get 'sourcebot-ui-codenav'. Also restore the original inject-audit-data script and add inject-audit-data-v2 with source-aware mock data generation. Co-Authored-By: Claude Opus 4.6 --- .../migration.sql | 19 + packages/db/tools/scriptRunner.ts | 2 + .../db/tools/scripts/inject-audit-data-v2.ts | 299 +++++++++++++++ .../db/tools/scripts/inject-audit-data.ts | 362 ++++++------------ 4 files changed, 438 insertions(+), 244 deletions(-) create mode 100644 packages/db/prisma/migrations/20260226000000_backfill_audit_source_metadata/migration.sql create mode 100644 packages/db/tools/scripts/inject-audit-data-v2.ts diff --git a/packages/db/prisma/migrations/20260226000000_backfill_audit_source_metadata/migration.sql b/packages/db/prisma/migrations/20260226000000_backfill_audit_source_metadata/migration.sql new file mode 100644 index 000000000..72485b9aa --- /dev/null +++ b/packages/db/prisma/migrations/20260226000000_backfill_audit_source_metadata/migration.sql @@ -0,0 +1,19 @@ +-- Backfill source metadata for historical audit events. +-- +-- Before this change, all audit events were created from the web UI without +-- a 'source' field in metadata. The new analytics dashboard segments events +-- by source (sourcebot-*, mcp, or null/other for API). Without this backfill, +-- historical web UI events would be misclassified as API traffic. + +-- Code searches and chat creation were web-only (no server-side audit existed) +UPDATE "Audit" +SET metadata = jsonb_set(COALESCE(metadata, '{}')::jsonb, '{source}', '"sourcebot-web-client"') +WHERE action IN ('user.performed_code_search', 'user.created_ask_chat') + AND (metadata IS NULL OR metadata->>'source' IS NULL); + +-- Navigation events (find references, goto definition) were web-only +-- (created from the symbolHoverPopup client component) +UPDATE "Audit" +SET metadata = jsonb_set(COALESCE(metadata, '{}')::jsonb, '{source}', '"sourcebot-ui-codenav"') +WHERE action IN ('user.performed_find_references', 'user.performed_goto_definition') + AND (metadata IS NULL OR metadata->>'source' IS NULL); diff --git a/packages/db/tools/scriptRunner.ts b/packages/db/tools/scriptRunner.ts index 8c7b5ab55..9732b9a84 100644 --- a/packages/db/tools/scriptRunner.ts +++ b/packages/db/tools/scriptRunner.ts @@ -2,6 +2,7 @@ import { PrismaClient } from "@sourcebot/db"; import { ArgumentParser } from "argparse"; import { migrateDuplicateConnections } from "./scripts/migrate-duplicate-connections"; import { injectAuditData } from "./scripts/inject-audit-data"; +import { injectAuditDataV2 } from "./scripts/inject-audit-data-v2"; import { injectUserData } from "./scripts/inject-user-data"; import { confirmAction } from "./utils"; import { injectRepoData } from "./scripts/inject-repo-data"; @@ -14,6 +15,7 @@ export interface Script { export const scripts: Record = { "migrate-duplicate-connections": migrateDuplicateConnections, "inject-audit-data": injectAuditData, + "inject-audit-data-v2": injectAuditDataV2, "inject-user-data": injectUserData, "inject-repo-data": injectRepoData, "test-repo-query-perf": testRepoQueryPerf, diff --git a/packages/db/tools/scripts/inject-audit-data-v2.ts b/packages/db/tools/scripts/inject-audit-data-v2.ts new file mode 100644 index 000000000..2d789e1a2 --- /dev/null +++ b/packages/db/tools/scripts/inject-audit-data-v2.ts @@ -0,0 +1,299 @@ +import { Script } from "../scriptRunner"; +import { PrismaClient, Prisma } from "../../dist"; +import { confirmAction } from "../utils"; + +// User profile: defines how a user interacts with Sourcebot +interface UserProfile { + id: string + // Whether this user uses the web UI, and how active they are (0 = never, 1 = heavy) + webWeight: number + // Whether this user uses MCP, and how active they are (0 = never, 1 = heavy) + mcpWeight: number + // Whether this user uses the API directly, and how active they are (0 = never, 1 = heavy) + apiWeight: number + // API source label (for non-MCP API usage) + apiSource: string + // How likely they are to be active on a weekday (0-1) + weekdayActivity: number + // How likely they are to be active on a weekend (0-1) + weekendActivity: number +} + +// Generate realistic audit data for analytics testing +// Simulates 50 users with mixed usage patterns across web UI, MCP, and API +export const injectAuditDataV2: Script = { + run: async (prisma: PrismaClient) => { + const orgId = 1; + + // Check if org exists + const org = await prisma.org.findUnique({ + where: { id: orgId } + }); + + if (!org) { + console.error(`Organization with id ${orgId} not found. Please create it first.`); + return; + } + + console.log(`Injecting audit data for organization: ${org.name} (${org.domain})`); + + const apiSources = ['cli', 'sdk', 'custom-app']; + + // Build user profiles with mixed usage patterns + const users: UserProfile[] = []; + + // Web-only users (20): browse the UI, never use MCP or API + for (let i = 0; i < 20; i++) { + users.push({ + id: `user_${String(users.length + 1).padStart(3, '0')}`, + webWeight: 0.6 + Math.random() * 0.4, // 0.6-1.0 + mcpWeight: 0, + apiWeight: 0, + apiSource: '', + weekdayActivity: 0.7 + Math.random() * 0.2, + weekendActivity: 0.05 + Math.random() * 0.15, + }); + } + + // Hybrid web + MCP users (12): use the web UI daily and also have MCP set up in their IDE + for (let i = 0; i < 12; i++) { + users.push({ + id: `user_${String(users.length + 1).padStart(3, '0')}`, + webWeight: 0.4 + Math.random() * 0.4, // 0.4-0.8 + mcpWeight: 0.5 + Math.random() * 0.5, // 0.5-1.0 + apiWeight: 0, + apiSource: '', + weekdayActivity: 0.8 + Math.random() * 0.15, + weekendActivity: 0.1 + Math.random() * 0.2, + }); + } + + // MCP-heavy users (8): primarily use MCP through their IDE, occasionally check the web UI + for (let i = 0; i < 8; i++) { + users.push({ + id: `user_${String(users.length + 1).padStart(3, '0')}`, + webWeight: 0.05 + Math.random() * 0.2, // 0.05-0.25 (occasional) + mcpWeight: 0.7 + Math.random() * 0.3, // 0.7-1.0 + apiWeight: 0, + apiSource: '', + weekdayActivity: 0.85 + Math.random() * 0.1, + weekendActivity: 0.3 + Math.random() * 0.3, + }); + } + + // API-only users (5): automated scripts/CI, no web UI or MCP + for (let i = 0; i < 5; i++) { + users.push({ + id: `user_${String(users.length + 1).padStart(3, '0')}`, + webWeight: 0, + mcpWeight: 0, + apiWeight: 0.6 + Math.random() * 0.4, + apiSource: apiSources[i % apiSources.length], + weekdayActivity: 0.9 + Math.random() * 0.1, + weekendActivity: 0.6 + Math.random() * 0.3, + }); + } + + // Hybrid web + API users (5): developers who use both the UI and have scripts that call the API + for (let i = 0; i < 5; i++) { + users.push({ + id: `user_${String(users.length + 1).padStart(3, '0')}`, + webWeight: 0.3 + Math.random() * 0.4, + mcpWeight: 0, + apiWeight: 0.4 + Math.random() * 0.4, + apiSource: apiSources[i % apiSources.length], + weekdayActivity: 0.8 + Math.random() * 0.15, + weekendActivity: 0.1 + Math.random() * 0.2, + }); + } + + // Generate data for the last 90 days + const endDate = new Date(); + const startDate = new Date(); + startDate.setDate(startDate.getDate() - 90); + + const webOnlyCount = users.filter(u => u.webWeight > 0 && u.mcpWeight === 0 && u.apiWeight === 0).length; + const hybridWebMcpCount = users.filter(u => u.webWeight > 0 && u.mcpWeight > 0).length; + const mcpHeavyCount = users.filter(u => u.mcpWeight > 0 && u.webWeight < 0.3).length; + const apiOnlyCount = users.filter(u => u.apiWeight > 0 && u.webWeight === 0 && u.mcpWeight === 0).length; + const hybridWebApiCount = users.filter(u => u.webWeight > 0 && u.apiWeight > 0).length; + + console.log(`Generating data from ${startDate.toISOString().split('T')[0]} to ${endDate.toISOString().split('T')[0]}`); + console.log(`User breakdown: ${webOnlyCount} web-only, ${hybridWebMcpCount} web+MCP, ${mcpHeavyCount} MCP-heavy, ${apiOnlyCount} API-only, ${hybridWebApiCount} web+API`); + + confirmAction(); + + function randomTimestamp(date: Date, isWeekend: boolean): Date { + const ts = new Date(date); + if (isWeekend) { + ts.setHours(9 + Math.floor(Math.random() * 12)); + } else { + ts.setHours(9 + Math.floor(Math.random() * 9)); + } + ts.setMinutes(Math.floor(Math.random() * 60)); + ts.setSeconds(Math.floor(Math.random() * 60)); + return ts; + } + + function scaledCount(baseMin: number, baseMax: number, weight: number, isWeekend: boolean): number { + const weekendFactor = isWeekend ? 0.3 : 1.0; + const scaledMax = Math.round(baseMax * weight * weekendFactor); + const scaledMin = Math.min(Math.round(baseMin * weight * weekendFactor), scaledMax); + if (scaledMax <= 0) return 0; + return scaledMin + Math.floor(Math.random() * (scaledMax - scaledMin + 1)); + } + + async function createAudits( + userId: string, + action: string, + count: number, + currentDate: Date, + isWeekend: boolean, + targetType: string, + metadata?: Prisma.InputJsonValue, + ) { + for (let i = 0; i < count; i++) { + await prisma.audit.create({ + data: { + timestamp: randomTimestamp(currentDate, isWeekend), + action, + actorId: userId, + actorType: 'user', + targetId: `${targetType}_${Math.floor(Math.random() * 1000)}`, + targetType, + sourcebotVersion: '1.0.0', + orgId, + ...(metadata ? { metadata } : {}), + } + }); + } + } + + // Generate data for each day + for (let d = new Date(startDate); d <= endDate; d.setDate(d.getDate() + 1)) { + const currentDate = new Date(d); + const dayOfWeek = currentDate.getDay(); + const isWeekend = dayOfWeek === 0 || dayOfWeek === 6; + + for (const user of users) { + // Determine if user is active today + const activityChance = isWeekend ? user.weekendActivity : user.weekdayActivity; + if (Math.random() >= activityChance) continue; + + // --- Web UI activity (source='sourcebot-web-client' or 'sourcebot-ui-codenav') --- + if (user.webWeight > 0) { + const webMeta: Prisma.InputJsonValue = { source: 'sourcebot-web-client' }; + const codenavMeta: Prisma.InputJsonValue = { source: 'sourcebot-ui-codenav' }; + + // Code searches (2-5 base) + await createAudits(user.id, 'user.performed_code_search', + scaledCount(2, 5, user.webWeight, isWeekend), currentDate, isWeekend, 'search', webMeta); + + // Navigations: find references + goto definition (5-10 base) + const navCount = scaledCount(5, 10, user.webWeight, isWeekend); + for (let i = 0; i < navCount; i++) { + const action = Math.random() < 0.6 ? 'user.performed_find_references' : 'user.performed_goto_definition'; + await createAudits(user.id, action, 1, currentDate, isWeekend, 'symbol', codenavMeta); + } + + // Ask chats (0-2 base) - web only + await createAudits(user.id, 'user.created_ask_chat', + scaledCount(0, 2, user.webWeight, isWeekend), currentDate, isWeekend, 'org', webMeta); + + // File source views (3-8 base) + await createAudits(user.id, 'user.fetched_file_source', + scaledCount(3, 8, user.webWeight, isWeekend), currentDate, isWeekend, 'file', webMeta); + + // File tree browsing (2-5 base) + await createAudits(user.id, 'user.fetched_file_tree', + scaledCount(2, 5, user.webWeight, isWeekend), currentDate, isWeekend, 'repo', webMeta); + + // List repos (1-3 base) + await createAudits(user.id, 'user.listed_repos', + scaledCount(1, 3, user.webWeight, isWeekend), currentDate, isWeekend, 'org', webMeta); + } + + // --- MCP activity (source='mcp') --- + if (user.mcpWeight > 0) { + const meta: Prisma.InputJsonValue = { source: 'mcp' }; + + // MCP code searches (5-15 base) - higher volume than web + await createAudits(user.id, 'user.performed_code_search', + scaledCount(5, 15, user.mcpWeight, isWeekend), currentDate, isWeekend, 'search', meta); + + // MCP file source fetches (5-12 base) + await createAudits(user.id, 'user.fetched_file_source', + scaledCount(5, 12, user.mcpWeight, isWeekend), currentDate, isWeekend, 'file', meta); + + // MCP file tree fetches (3-6 base) + await createAudits(user.id, 'user.fetched_file_tree', + scaledCount(3, 6, user.mcpWeight, isWeekend), currentDate, isWeekend, 'repo', meta); + + // MCP list repos (3-8 base) + await createAudits(user.id, 'user.listed_repos', + scaledCount(3, 8, user.mcpWeight, isWeekend), currentDate, isWeekend, 'org', meta); + } + + // --- API activity (source=cli/sdk/custom-app) --- + if (user.apiWeight > 0) { + const meta: Prisma.InputJsonValue = { source: user.apiSource }; + + // API code searches (10-30 base) - highest volume, automated + await createAudits(user.id, 'user.performed_code_search', + scaledCount(10, 30, user.apiWeight, isWeekend), currentDate, isWeekend, 'search', meta); + + // API file source fetches (8-20 base) + await createAudits(user.id, 'user.fetched_file_source', + scaledCount(8, 20, user.apiWeight, isWeekend), currentDate, isWeekend, 'file', meta); + + // API file tree fetches (4-10 base) + await createAudits(user.id, 'user.fetched_file_tree', + scaledCount(4, 10, user.apiWeight, isWeekend), currentDate, isWeekend, 'repo', meta); + + // API list repos (5-15 base) + await createAudits(user.id, 'user.listed_repos', + scaledCount(5, 15, user.apiWeight, isWeekend), currentDate, isWeekend, 'org', meta); + } + } + } + + console.log(`\nAudit data injection complete!`); + console.log(`Users: ${users.length}`); + console.log(`Date range: ${startDate.toISOString().split('T')[0]} to ${endDate.toISOString().split('T')[0]}`); + + // Show statistics + const stats = await prisma.audit.groupBy({ + by: ['action'], + where: { orgId }, + _count: { action: true } + }); + + console.log('\nAction breakdown:'); + stats.forEach(stat => { + console.log(` ${stat.action}: ${stat._count.action}`); + }); + + // Show source breakdown + const allAudits = await prisma.audit.findMany({ + where: { orgId }, + select: { metadata: true } + }); + + let webCount = 0, mcpCount = 0, apiCount = 0; + for (const audit of allAudits) { + const meta = audit.metadata as Record | null; + const source = meta?.source as string | undefined; + if (source && typeof source === 'string' && source.startsWith('sourcebot-')) { + webCount++; + } else if (source === 'mcp') { + mcpCount++; + } else { + apiCount++; + } + } + console.log('\nSource breakdown:'); + console.log(` Web UI (source=sourcebot-*): ${webCount}`); + console.log(` MCP (source=mcp): ${mcpCount}`); + console.log(` API (source=other/null): ${apiCount}`); + }, +}; diff --git a/packages/db/tools/scripts/inject-audit-data.ts b/packages/db/tools/scripts/inject-audit-data.ts index bcfbf7685..56478e3e5 100644 --- a/packages/db/tools/scripts/inject-audit-data.ts +++ b/packages/db/tools/scripts/inject-audit-data.ts @@ -1,35 +1,18 @@ import { Script } from "../scriptRunner"; -import { PrismaClient, Prisma } from "../../dist"; +import { PrismaClient } from "../../dist"; import { confirmAction } from "../utils"; -// User profile: defines how a user interacts with Sourcebot -interface UserProfile { - id: string - // Whether this user uses the web UI, and how active they are (0 = never, 1 = heavy) - webWeight: number - // Whether this user uses MCP, and how active they are (0 = never, 1 = heavy) - mcpWeight: number - // Whether this user uses the API directly, and how active they are (0 = never, 1 = heavy) - apiWeight: number - // API source label (for non-MCP API usage) - apiSource: string - // How likely they are to be active on a weekday (0-1) - weekdayActivity: number - // How likely they are to be active on a weekend (0-1) - weekendActivity: number -} - // Generate realistic audit data for analytics testing -// Simulates 50 users with mixed usage patterns across web UI, MCP, and API +// Simulates 50 engineers with varying activity patterns export const injectAuditData: Script = { run: async (prisma: PrismaClient) => { const orgId = 1; - + // Check if org exists const org = await prisma.org.findUnique({ where: { id: orgId } }); - + if (!org) { console.error(`Organization with id ${orgId} not found. Please create it first.`); return; @@ -37,263 +20,154 @@ export const injectAuditData: Script = { console.log(`Injecting audit data for organization: ${org.name} (${org.domain})`); - const apiSources = ['cli', 'sdk', 'custom-app']; - - // Build user profiles with mixed usage patterns - const users: UserProfile[] = []; - - // Web-only users (20): browse the UI, never use MCP or API - for (let i = 0; i < 20; i++) { - users.push({ - id: `user_${String(users.length + 1).padStart(3, '0')}`, - webWeight: 0.6 + Math.random() * 0.4, // 0.6-1.0 - mcpWeight: 0, - apiWeight: 0, - apiSource: '', - weekdayActivity: 0.7 + Math.random() * 0.2, - weekendActivity: 0.05 + Math.random() * 0.15, - }); - } - - // Hybrid web + MCP users (12): use the web UI daily and also have MCP set up in their IDE - for (let i = 0; i < 12; i++) { - users.push({ - id: `user_${String(users.length + 1).padStart(3, '0')}`, - webWeight: 0.4 + Math.random() * 0.4, // 0.4-0.8 - mcpWeight: 0.5 + Math.random() * 0.5, // 0.5-1.0 - apiWeight: 0, - apiSource: '', - weekdayActivity: 0.8 + Math.random() * 0.15, - weekendActivity: 0.1 + Math.random() * 0.2, - }); - } - - // MCP-heavy users (8): primarily use MCP through their IDE, occasionally check the web UI - for (let i = 0; i < 8; i++) { - users.push({ - id: `user_${String(users.length + 1).padStart(3, '0')}`, - webWeight: 0.05 + Math.random() * 0.2, // 0.05-0.25 (occasional) - mcpWeight: 0.7 + Math.random() * 0.3, // 0.7-1.0 - apiWeight: 0, - apiSource: '', - weekdayActivity: 0.85 + Math.random() * 0.1, - weekendActivity: 0.3 + Math.random() * 0.3, - }); - } - - // API-only users (5): automated scripts/CI, no web UI or MCP - for (let i = 0; i < 5; i++) { - users.push({ - id: `user_${String(users.length + 1).padStart(3, '0')}`, - webWeight: 0, - mcpWeight: 0, - apiWeight: 0.6 + Math.random() * 0.4, - apiSource: apiSources[i % apiSources.length], - weekdayActivity: 0.9 + Math.random() * 0.1, - weekendActivity: 0.6 + Math.random() * 0.3, - }); - } - - // Hybrid web + API users (5): developers who use both the UI and have scripts that call the API - for (let i = 0; i < 5; i++) { - users.push({ - id: `user_${String(users.length + 1).padStart(3, '0')}`, - webWeight: 0.3 + Math.random() * 0.4, - mcpWeight: 0, - apiWeight: 0.4 + Math.random() * 0.4, - apiSource: apiSources[i % apiSources.length], - weekdayActivity: 0.8 + Math.random() * 0.15, - weekendActivity: 0.1 + Math.random() * 0.2, - }); - } + // Generate 50 fake user IDs + const userIds = Array.from({ length: 50 }, (_, i) => `user_${String(i + 1).padStart(3, '0')}`); + + // Actions we're tracking + const actions = [ + 'user.performed_code_search', + 'user.performed_find_references', + 'user.performed_goto_definition', + 'user.created_ask_chat' + ]; // Generate data for the last 90 days const endDate = new Date(); const startDate = new Date(); startDate.setDate(startDate.getDate() - 90); - const webOnlyCount = users.filter(u => u.webWeight > 0 && u.mcpWeight === 0 && u.apiWeight === 0).length; - const hybridWebMcpCount = users.filter(u => u.webWeight > 0 && u.mcpWeight > 0).length; - const mcpHeavyCount = users.filter(u => u.mcpWeight > 0 && u.webWeight < 0.3).length; - const apiOnlyCount = users.filter(u => u.apiWeight > 0 && u.webWeight === 0 && u.mcpWeight === 0).length; - const hybridWebApiCount = users.filter(u => u.webWeight > 0 && u.apiWeight > 0).length; - console.log(`Generating data from ${startDate.toISOString().split('T')[0]} to ${endDate.toISOString().split('T')[0]}`); - console.log(`User breakdown: ${webOnlyCount} web-only, ${hybridWebMcpCount} web+MCP, ${mcpHeavyCount} MCP-heavy, ${apiOnlyCount} API-only, ${hybridWebApiCount} web+API`); confirmAction(); - function randomTimestamp(date: Date, isWeekend: boolean): Date { - const ts = new Date(date); - if (isWeekend) { - ts.setHours(9 + Math.floor(Math.random() * 12)); - } else { - ts.setHours(9 + Math.floor(Math.random() * 9)); - } - ts.setMinutes(Math.floor(Math.random() * 60)); - ts.setSeconds(Math.floor(Math.random() * 60)); - return ts; - } - - function scaledCount(baseMin: number, baseMax: number, weight: number, isWeekend: boolean): number { - const weekendFactor = isWeekend ? 0.3 : 1.0; - const scaledMax = Math.round(baseMax * weight * weekendFactor); - const scaledMin = Math.min(Math.round(baseMin * weight * weekendFactor), scaledMax); - if (scaledMax <= 0) return 0; - return scaledMin + Math.floor(Math.random() * (scaledMax - scaledMin + 1)); - } - - async function createAudits( - userId: string, - action: string, - count: number, - currentDate: Date, - isWeekend: boolean, - targetType: string, - metadata?: Prisma.InputJsonValue, - ) { - for (let i = 0; i < count; i++) { - await prisma.audit.create({ - data: { - timestamp: randomTimestamp(currentDate, isWeekend), - action, - actorId: userId, - actorType: 'user', - targetId: `${targetType}_${Math.floor(Math.random() * 1000)}`, - targetType, - sourcebotVersion: '1.0.0', - orgId, - ...(metadata ? { metadata } : {}), - } - }); - } - } - // Generate data for each day for (let d = new Date(startDate); d <= endDate; d.setDate(d.getDate() + 1)) { const currentDate = new Date(d); - const dayOfWeek = currentDate.getDay(); + const dayOfWeek = currentDate.getDay(); // 0 = Sunday, 6 = Saturday const isWeekend = dayOfWeek === 0 || dayOfWeek === 6; - - for (const user of users) { - // Determine if user is active today - const activityChance = isWeekend ? user.weekendActivity : user.weekdayActivity; - if (Math.random() >= activityChance) continue; - - // --- Web UI activity (source='sourcebot-web-client' or 'sourcebot-ui-codenav') --- - if (user.webWeight > 0) { - const webMeta: Prisma.InputJsonValue = { source: 'sourcebot-web-client' }; - const codenavMeta: Prisma.InputJsonValue = { source: 'sourcebot-ui-codenav' }; - - // Code searches (2-5 base) - await createAudits(user.id, 'user.performed_code_search', - scaledCount(2, 5, user.webWeight, isWeekend), currentDate, isWeekend, 'search', webMeta); - - // Navigations: find references + goto definition (5-10 base) - const navCount = scaledCount(5, 10, user.webWeight, isWeekend); - for (let i = 0; i < navCount; i++) { - const action = Math.random() < 0.6 ? 'user.performed_find_references' : 'user.performed_goto_definition'; - await createAudits(user.id, action, 1, currentDate, isWeekend, 'symbol', codenavMeta); + + // For each user, generate activity for this day + for (const userId of userIds) { + // Determine if user is active today (higher chance on weekdays) + const isActiveToday = isWeekend + ? Math.random() < 0.15 // 15% chance on weekends + : Math.random() < 0.85; // 85% chance on weekdays + + if (!isActiveToday) continue; + + // Generate code searches (2-5 per day) + const codeSearches = isWeekend + ? Math.floor(Math.random() * 2) + 1 // 1-2 on weekends + : Math.floor(Math.random() * 4) + 2; // 2-5 on weekdays + + // Generate navigation actions (5-10 per day) + const navigationActions = isWeekend + ? Math.floor(Math.random() * 3) + 1 // 1-3 on weekends + : Math.floor(Math.random() * 6) + 5; // 5-10 on weekdays + + // Create code search records + for (let i = 0; i < codeSearches; i++) { + const timestamp = new Date(currentDate); + // Spread throughout the day (9 AM to 6 PM on weekdays, more random on weekends) + if (isWeekend) { + timestamp.setHours(9 + Math.floor(Math.random() * 12)); + timestamp.setMinutes(Math.floor(Math.random() * 60)); + } else { + timestamp.setHours(9 + Math.floor(Math.random() * 9)); + timestamp.setMinutes(Math.floor(Math.random() * 60)); } - - // Ask chats (0-2 base) - web only - await createAudits(user.id, 'user.created_ask_chat', - scaledCount(0, 2, user.webWeight, isWeekend), currentDate, isWeekend, 'org', webMeta); - - // File source views (3-8 base) - await createAudits(user.id, 'user.fetched_file_source', - scaledCount(3, 8, user.webWeight, isWeekend), currentDate, isWeekend, 'file', webMeta); - - // File tree browsing (2-5 base) - await createAudits(user.id, 'user.fetched_file_tree', - scaledCount(2, 5, user.webWeight, isWeekend), currentDate, isWeekend, 'repo', webMeta); - - // List repos (1-3 base) - await createAudits(user.id, 'user.listed_repos', - scaledCount(1, 3, user.webWeight, isWeekend), currentDate, isWeekend, 'org', webMeta); + timestamp.setSeconds(Math.floor(Math.random() * 60)); + + await prisma.audit.create({ + data: { + timestamp, + action: 'user.performed_code_search', + actorId: userId, + actorType: 'user', + targetId: `search_${Math.floor(Math.random() * 1000)}`, + targetType: 'search', + sourcebotVersion: '1.0.0', + orgId + } + }); } - // --- MCP activity (source='mcp') --- - if (user.mcpWeight > 0) { - const meta: Prisma.InputJsonValue = { source: 'mcp' }; - - // MCP code searches (5-15 base) - higher volume than web - await createAudits(user.id, 'user.performed_code_search', - scaledCount(5, 15, user.mcpWeight, isWeekend), currentDate, isWeekend, 'search', meta); - - // MCP file source fetches (5-12 base) - await createAudits(user.id, 'user.fetched_file_source', - scaledCount(5, 12, user.mcpWeight, isWeekend), currentDate, isWeekend, 'file', meta); + // Create navigation action records + for (let i = 0; i < navigationActions; i++) { + const timestamp = new Date(currentDate); + if (isWeekend) { + timestamp.setHours(9 + Math.floor(Math.random() * 12)); + timestamp.setMinutes(Math.floor(Math.random() * 60)); + } else { + timestamp.setHours(9 + Math.floor(Math.random() * 9)); + timestamp.setMinutes(Math.floor(Math.random() * 60)); + } + timestamp.setSeconds(Math.floor(Math.random() * 60)); - // MCP file tree fetches (3-6 base) - await createAudits(user.id, 'user.fetched_file_tree', - scaledCount(3, 6, user.mcpWeight, isWeekend), currentDate, isWeekend, 'repo', meta); + // Randomly choose between find references and goto definition + const action = Math.random() < 0.6 ? 'user.performed_find_references' : 'user.performed_goto_definition'; - // MCP list repos (3-8 base) - await createAudits(user.id, 'user.listed_repos', - scaledCount(3, 8, user.mcpWeight, isWeekend), currentDate, isWeekend, 'org', meta); + await prisma.audit.create({ + data: { + timestamp, + action, + actorId: userId, + actorType: 'user', + targetId: `symbol_${Math.floor(Math.random() * 1000)}`, + targetType: 'symbol', + sourcebotVersion: '1.0.0', + orgId + } + }); } - // --- API activity (source=cli/sdk/custom-app) --- - if (user.apiWeight > 0) { - const meta: Prisma.InputJsonValue = { source: user.apiSource }; - - // API code searches (10-30 base) - highest volume, automated - await createAudits(user.id, 'user.performed_code_search', - scaledCount(10, 30, user.apiWeight, isWeekend), currentDate, isWeekend, 'search', meta); - - // API file source fetches (8-20 base) - await createAudits(user.id, 'user.fetched_file_source', - scaledCount(8, 20, user.apiWeight, isWeekend), currentDate, isWeekend, 'file', meta); - - // API file tree fetches (4-10 base) - await createAudits(user.id, 'user.fetched_file_tree', - scaledCount(4, 10, user.apiWeight, isWeekend), currentDate, isWeekend, 'repo', meta); - - // API list repos (5-15 base) - await createAudits(user.id, 'user.listed_repos', - scaledCount(5, 15, user.apiWeight, isWeekend), currentDate, isWeekend, 'org', meta); + // Generate Ask chat sessions (0-2 per day on weekdays, 0-1 on weekends) + const askChats = isWeekend + ? Math.floor(Math.random() * 2) // 0-1 on weekends + : Math.floor(Math.random() * 3); // 0-2 on weekdays + + // Create Ask chat records + for (let i = 0; i < askChats; i++) { + const timestamp = new Date(currentDate); + if (isWeekend) { + timestamp.setHours(9 + Math.floor(Math.random() * 12)); + timestamp.setMinutes(Math.floor(Math.random() * 60)); + } else { + timestamp.setHours(9 + Math.floor(Math.random() * 9)); + timestamp.setMinutes(Math.floor(Math.random() * 60)); + } + timestamp.setSeconds(Math.floor(Math.random() * 60)); + + await prisma.audit.create({ + data: { + timestamp, + action: 'user.created_ask_chat', + actorId: userId, + actorType: 'user', + targetId: orgId.toString(), + targetType: 'org', + sourcebotVersion: '1.0.0', + orgId + } + }); } } } console.log(`\nAudit data injection complete!`); - console.log(`Users: ${users.length}`); + console.log(`Users: ${userIds.length}`); console.log(`Date range: ${startDate.toISOString().split('T')[0]} to ${endDate.toISOString().split('T')[0]}`); - - // Show statistics + + // Show some statistics const stats = await prisma.audit.groupBy({ by: ['action'], where: { orgId }, _count: { action: true } }); - + console.log('\nAction breakdown:'); stats.forEach(stat => { console.log(` ${stat.action}: ${stat._count.action}`); }); - - // Show source breakdown - const allAudits = await prisma.audit.findMany({ - where: { orgId }, - select: { metadata: true } - }); - - let webCount = 0, mcpCount = 0, apiCount = 0; - for (const audit of allAudits) { - const meta = audit.metadata as Record | null; - const source = meta?.source as string | undefined; - if (source && typeof source === 'string' && source.startsWith('sourcebot-')) { - webCount++; - } else if (source === 'mcp') { - mcpCount++; - } else { - apiCount++; - } - } - console.log('\nSource breakdown:'); - console.log(` Web UI (source=sourcebot-*): ${webCount}`); - console.log(` MCP (source=mcp): ${mcpCount}`); - console.log(` API (source=other/null): ${apiCount}`); }, -}; +}; \ No newline at end of file