From b172e4c992c010a3367d83ec3e9d7d6120242b83 Mon Sep 17 00:00:00 2001 From: Adam Gurary Date: Fri, 20 Mar 2026 13:06:00 -0400 Subject: [PATCH 1/3] Add Vector Search plugin for AppKit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds @databricks/appkit-vector-search — a plugin that gives Databricks Apps built with AppKit instant vector search query capabilities. Ships backend (Express routes, VS REST API client, auth) and frontend (React hook, styled components with Radix UI). Developer experience target: ~45 lines for a full search page with search box, results, filters, and keyword highlighting. 82 tests included. Validated against real VS index on dogfood. --- packages/vector-search/README.md | 377 ++++++++++++++++++ packages/vector-search/package.json | 47 +++ packages/vector-search/src/index.ts | 13 + .../src/plugin/VectorSearchClient.ts | 200 ++++++++++ .../src/plugin/VectorSearchPlugin.ts | 105 +++++ packages/vector-search/src/plugin/auth.ts | 48 +++ packages/vector-search/src/plugin/routes.ts | 136 +++++++ packages/vector-search/src/plugin/types.ts | 198 +++++++++ .../src/ui/components/SearchBox.tsx | 73 ++++ .../src/ui/components/SearchLoadMore.tsx | 24 ++ .../src/ui/components/SearchResultCard.tsx | 76 ++++ .../src/ui/components/SearchResults.tsx | 93 +++++ .../src/ui/hooks/useVectorSearch.ts | 175 ++++++++ packages/vector-search/src/ui/index.ts | 6 + .../tests/integration/dogfood.test.ts | 97 +++++ .../tests/plugin/VectorSearchClient.test.ts | 233 +++++++++++ .../tests/plugin/VectorSearchPlugin.test.ts | 115 ++++++ .../vector-search/tests/plugin/auth.test.ts | 108 +++++ .../vector-search/tests/plugin/routes.test.ts | 206 ++++++++++ .../tests/ui/components/components.test.tsx | 158 ++++++++ .../tests/ui/hooks/useVectorSearch.test.ts | 201 ++++++++++ packages/vector-search/tsconfig.json | 16 + packages/vector-search/vitest.config.ts | 9 + packages/vector-search/vitest.setup.ts | 1 + 24 files changed, 2715 insertions(+) create mode 100644 packages/vector-search/README.md create mode 100644 packages/vector-search/package.json create mode 100644 packages/vector-search/src/index.ts create mode 100644 packages/vector-search/src/plugin/VectorSearchClient.ts create mode 100644 packages/vector-search/src/plugin/VectorSearchPlugin.ts create mode 100644 packages/vector-search/src/plugin/auth.ts create mode 100644 packages/vector-search/src/plugin/routes.ts create mode 100644 packages/vector-search/src/plugin/types.ts create mode 100644 packages/vector-search/src/ui/components/SearchBox.tsx create mode 100644 packages/vector-search/src/ui/components/SearchLoadMore.tsx create mode 100644 packages/vector-search/src/ui/components/SearchResultCard.tsx create mode 100644 packages/vector-search/src/ui/components/SearchResults.tsx create mode 100644 packages/vector-search/src/ui/hooks/useVectorSearch.ts create mode 100644 packages/vector-search/src/ui/index.ts create mode 100644 packages/vector-search/tests/integration/dogfood.test.ts create mode 100644 packages/vector-search/tests/plugin/VectorSearchClient.test.ts create mode 100644 packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts create mode 100644 packages/vector-search/tests/plugin/auth.test.ts create mode 100644 packages/vector-search/tests/plugin/routes.test.ts create mode 100644 packages/vector-search/tests/ui/components/components.test.tsx create mode 100644 packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts create mode 100644 packages/vector-search/tsconfig.json create mode 100644 packages/vector-search/vitest.config.ts create mode 100644 packages/vector-search/vitest.setup.ts diff --git a/packages/vector-search/README.md b/packages/vector-search/README.md new file mode 100644 index 00000000..b831fc00 --- /dev/null +++ b/packages/vector-search/README.md @@ -0,0 +1,377 @@ +# @databricks/appkit-vector-search + +Appkit plugin that adds Databricks Vector Search to your app — backend routes, React hook, and UI components in one package. + +## Quick Start + +**Backend** (`app.ts`): + +```typescript +import { createApp } from '@databricks/appkit'; +import { VectorSearchPlugin } from '@databricks/appkit-vector-search'; + +createApp({ + plugins: [ + new VectorSearchPlugin({ + indexes: { + products: { + indexName: 'catalog.schema.product_index', + columns: ['id', 'name', 'description', 'price', 'category'], + }, + }, + }), + ], +}); +``` + +**Frontend** (`ProductSearch.tsx`): + +```tsx +import { useVectorSearch, SearchBox, SearchResults } from '@databricks/appkit-vector-search'; + +function ProductSearch() { + const vs = useVectorSearch<{ id: string; name: string; description: string; price: number; category: string }>('products'); + + return ( +
+ + +
+ ); +} +``` + +That's it — hybrid search with debouncing, loading states, keyword highlighting, and error handling. + +## Installation + +```bash +npm install @databricks/appkit-vector-search +``` + +Peer dependencies: `react ^18.x`, `@databricks/appkit ^0.x`. + +## Backend Setup + +Register the plugin with `createApp`. Each key in `indexes` is an **alias** used by the frontend hook and API routes. + +```typescript +new VectorSearchPlugin({ + indexes: { + products: { + indexName: 'catalog.schema.product_index', // required — three-level UC name + columns: ['id', 'name', 'description'], // required — columns to return + queryType: 'hybrid', // 'ann' | 'hybrid' | 'full_text' (default: 'hybrid') + numResults: 20, // max results per query (default: 20) + reranker: false, // enable Databricks reranker (default: false) + auth: 'service-principal', // 'service-principal' | 'on-behalf-of-user' (default: 'service-principal') + cache: { enabled: false }, // see Caching section + pagination: false, // see Pagination section + endpointName: 'my-endpoint', // required when pagination: true + embeddingFn: undefined, // see Self-Managed Embeddings section + }, + }, +}) +``` + +### IndexConfig Reference + +| Property | Type | Default | Description | +|----------|------|---------|-------------| +| `indexName` | `string` | *required* | Three-level UC name (`catalog.schema.index`) | +| `columns` | `string[]` | *required* | Columns to return in results | +| `queryType` | `'ann' \| 'hybrid' \| 'full_text'` | `'hybrid'` | Default search mode | +| `numResults` | `number` | `20` | Max results per query | +| `reranker` | `boolean \| { columnsToRerank: string[] }` | `false` | Enable built-in reranker | +| `auth` | `'service-principal' \| 'on-behalf-of-user'` | `'service-principal'` | Auth mode | +| `cache` | `CacheConfig` | `undefined` | Optional result caching | +| `pagination` | `boolean` | `false` | Enable cursor pagination | +| `endpointName` | `string` | `undefined` | VS endpoint name (required if `pagination: true`) | +| `embeddingFn` | `(text: string) => Promise` | `undefined` | Custom embedding function for self-managed indexes | + +## Frontend + +### `useVectorSearch` Hook + +```typescript +const vs = useVectorSearch('products', { + debounceMs: 300, // debounce delay (default: 300) + numResults: 10, // override server default + queryType: 'ann', // override server default + reranker: true, // override server default + minQueryLength: 2, // minimum chars before searching (default: 1) + initialFilters: { category: 'electronics' }, + onResults: (response) => console.log(response), + onError: (error) => console.error(error), +}); +``` + +**Returns:** + +| Property | Type | Description | +|----------|------|-------------| +| `search` | `(query: string) => void` | Execute a search (debounced) | +| `results` | `SearchResult[]` | Current results (each has `.score` and `.data`) | +| `isLoading` | `boolean` | Whether a search is in flight | +| `error` | `SearchError \| null` | Error from last search | +| `query` | `string` | Current query text | +| `totalCount` | `number` | Total result count | +| `queryTimeMs` | `number` | Query execution time in ms | +| `fromCache` | `boolean` | Whether results came from cache | +| `setFilters` | `(filters) => void` | Set filters and re-execute search | +| `activeFilters` | `SearchFilters` | Current active filters | +| `clear` | `() => void` | Clear query, results, and filters | +| `hasMore` | `boolean` | More results available (pagination) | +| `loadMore` | `() => void` | Fetch next page, append to results | +| `isLoadingMore` | `boolean` | Whether loadMore is in flight | + +The hook handles debouncing, request cancellation (AbortController), filter reactivity, and cleanup on unmount. + +### Components + +#### `` + +```tsx + +``` + +Includes search icon, clear button (appears when input has value), Escape key to clear, and loading spinner. + +#### `` + +```tsx + ...} // fully custom result rendering (overrides default card) + className="mt-4" +/> +``` + +States: loading skeleton (3 animated cards), error banner, empty message, results with count + timing. + +#### `` + +Used internally by `SearchResults`, but can be used standalone: + +```tsx + +``` + +#### `` + +```tsx + +``` + +### Filters + +Use `setFilters` from the hook to apply VS filter syntax: + +```typescript +// IN list +vs.setFilters({ category: ['electronics', 'books'] }); + +// Comparison operators +vs.setFilters({ 'price >=': 10, 'price <=': 100 }); + +// NOT +vs.setFilters({ 'title NOT': 'test' }); + +// LIKE +vs.setFilters({ 'name LIKE': 'data%' }); + +// OR across columns +vs.setFilters({ 'color1 OR color2': ['red', 'blue'] }); +``` + +Calling `setFilters` immediately re-executes the current search with the new filters. + +## Auth + +### Service Principal (default) + +The plugin uses `DATABRICKS_CLIENT_ID` and `DATABRICKS_CLIENT_SECRET` from the environment. When deployed to Databricks Apps, these are set automatically. OAuth tokens are cached and refreshed with a 2-minute buffer before expiry. + +No configuration needed — this is the default. + +### On-Behalf-of-User + +For indexes with row-level security or Unity Catalog permissions: + +```typescript +indexes: { + docs: { + indexName: 'catalog.schema.docs_index', + columns: ['id', 'title', 'content'], + auth: 'on-behalf-of-user', // uses the logged-in user's token + }, +} +``` + +The plugin extracts the user's OAuth token from the `x-forwarded-access-token` header (set by Databricks Apps proxy). Queries run with the user's identity and UC permissions. + +## Self-Managed Embeddings + +For indexes that don't use Databricks-managed embeddings, provide an `embeddingFn` that converts query text to a vector: + +```typescript +indexes: { + custom: { + indexName: 'catalog.schema.custom_index', + columns: ['id', 'title', 'content'], + queryType: 'ann', + embeddingFn: async (text) => { + const resp = await fetch( + `https://${process.env.DATABRICKS_HOST}/serving-endpoints/my-embedding-model/invocations`, + { + method: 'POST', + headers: { 'Authorization': `Bearer ${token}`, 'Content-Type': 'application/json' }, + body: JSON.stringify({ input: [text] }), + }, + ); + const data = await resp.json(); + return data.data[0].embedding; + }, + }, +} +``` + +When `embeddingFn` is set, the plugin calls it to convert `queryText` into `queryVector` before sending to VS. The frontend hook works identically — users type text, the backend handles the conversion. + +If omitted, the plugin sends `queryText` directly and VS computes embeddings server-side (managed mode). + +## Caching + +Optional LRU cache for search results. Off by default (freeform search has low cache hit rates). + +```typescript +indexes: { + products: { + indexName: 'catalog.schema.product_index', + columns: ['id', 'name', 'description'], + cache: { + enabled: true, + ttlSeconds: 120, // time-to-live per entry (default: 60) + maxEntries: 1000, // max cached queries (default: 1000) + }, + }, +} +``` + +Cached responses include `fromCache: true` in the response. The hook exposes this via `vs.fromCache`. + +## Pagination + +Cursor-based pagination for large result sets. Off by default — VS typically returns results in 20-40ms, so most apps don't need it. + +```typescript +indexes: { + products: { + indexName: 'catalog.schema.product_index', + columns: ['id', 'name', 'description'], + pagination: true, + endpointName: 'my-vs-endpoint', // required when pagination is enabled + }, +} +``` + +Frontend usage: + +```tsx +const vs = useVectorSearch('products'); + +return ( + <> + + + + +); +``` + +`loadMore` fetches the next page and appends results to the existing array. + +## API Reference + +The plugin registers these Express routes automatically: + +| Method | Path | Body | Description | +|--------|------|------|-------------| +| `POST` | `/api/vector-search/:alias/query` | `SearchRequest` | Execute a search | +| `POST` | `/api/vector-search/:alias/next-page` | `{ pageToken: string }` | Fetch next page (requires `pagination: true`) | +| `GET` | `/api/vector-search/:alias/config` | — | Returns index config (columns, queryType, numResults, etc.) | + +### SearchRequest Body + +```json +{ + "queryText": "wireless headphones", + "filters": { "category": ["electronics"] }, + "numResults": 10, + "queryType": "hybrid", + "reranker": true +} +``` + +### SearchResponse + +```json +{ + "results": [ + { "score": 0.92, "data": { "id": "1", "name": "...", "description": "..." } } + ], + "totalCount": 47, + "queryTimeMs": 35, + "queryType": "hybrid", + "fromCache": false, + "nextPageToken": null +} +``` + +### Error Response + +```json +{ + "code": "INVALID_QUERY", + "message": "queryText or queryVector is required", + "statusCode": 400 +} +``` + +Error codes: `UNAUTHORIZED`, `INDEX_NOT_FOUND`, `INVALID_QUERY`, `RATE_LIMITED`, `INTERNAL`. diff --git a/packages/vector-search/package.json b/packages/vector-search/package.json new file mode 100644 index 00000000..efdeb804 --- /dev/null +++ b/packages/vector-search/package.json @@ -0,0 +1,47 @@ +{ + "name": "@databricks/appkit-vector-search", + "version": "0.1.0", + "description": "Vector Search plugin for Databricks Appkit", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "types": "./dist/index.d.ts" + }, + "./ui": { + "import": "./dist/ui/index.js", + "types": "./dist/ui/index.d.ts" + } + }, + "scripts": { + "build": "tsc", + "dev": "tsc --watch", + "test": "vitest run", + "test:watch": "vitest" + }, + "peerDependencies": { + "@databricks/appkit": "^0.1.0", + "react": "^18.0.0" + }, + "dependencies": { + "@radix-ui/react-icons": "^1.3.0", + "@radix-ui/react-select": "^2.1.0", + "@radix-ui/react-slider": "^1.2.0", + "express": "^5.2.1" + }, + "devDependencies": { + "@testing-library/jest-dom": "^6.4.0", + "@testing-library/react": "^15.0.0", + "@types/express": "^4.17.0", + "@types/react": "^18.3.0", + "@types/react-dom": "^19.2.3", + "@types/supertest": "^7.2.0", + "jsdom": "^24.0.0", + "react": "^19.2.4", + "react-dom": "^19.2.4", + "supertest": "^7.2.2", + "typescript": "^5.4.0", + "vitest": "^1.6.0" + } +} diff --git a/packages/vector-search/src/index.ts b/packages/vector-search/src/index.ts new file mode 100644 index 00000000..dbe386e9 --- /dev/null +++ b/packages/vector-search/src/index.ts @@ -0,0 +1,13 @@ +export { VectorSearchPlugin } from './plugin/VectorSearchPlugin'; +export { createVectorSearchRouter } from './plugin/routes'; +export type { + VectorSearchPluginConfig, + IndexConfig, + RerankerConfig, + CacheConfig, + SearchRequest, + SearchResponse, + SearchResult, + SearchFilters, + SearchError, +} from './plugin/types'; diff --git a/packages/vector-search/src/plugin/VectorSearchClient.ts b/packages/vector-search/src/plugin/VectorSearchClient.ts new file mode 100644 index 00000000..d80ceb86 --- /dev/null +++ b/packages/vector-search/src/plugin/VectorSearchClient.ts @@ -0,0 +1,200 @@ +import type { SearchResponse, SearchFilters, SearchError, RerankerConfig, TokenProvider, VsRawResponse } from './types'; + +export class VectorSearchClient { + private host: string; + private tokenProvider: TokenProvider; + + constructor(config: { host: string; tokenProvider: TokenProvider }) { + this.host = config.host; + this.tokenProvider = config.tokenProvider; + } + + async query(params: { + indexName: string; + queryText?: string; + queryVector?: number[]; + columns: string[]; + numResults: number; + queryType: 'ann' | 'hybrid' | 'full_text'; + filters?: SearchFilters; + reranker?: boolean | RerankerConfig; + userToken?: string; + embeddingFn?: (text: string) => Promise; + }): Promise { + const token = params.userToken ?? await this.tokenProvider.getToken(); + + // Resolve query: managed (query_text) vs self-managed (query_vector) + let queryText = params.queryText; + let queryVector = params.queryVector; + + if (params.embeddingFn && queryText && !queryVector) { + queryVector = await params.embeddingFn(queryText); + queryText = undefined; + } + + if (!queryText && !queryVector) { + throw { + code: 'INVALID_QUERY' as const, + message: 'Either queryText or queryVector is required', + statusCode: 400, + }; + } + + const body: Record = { + columns: params.columns, + num_results: params.numResults, + query_type: params.queryType.toUpperCase(), + debug_level: 1, + }; + + if (queryText) body.query_text = queryText; + if (queryVector) body.query_vector = queryVector; + + if (params.filters && Object.keys(params.filters).length > 0) { + body.filters = params.filters; + } + + if (params.reranker) { + const columnsToRerank = typeof params.reranker === 'object' + ? params.reranker.columnsToRerank + : params.columns.filter(c => c !== 'id'); + body.reranker = { + model: 'databricks_reranker', + parameters: { columns_to_rerank: columnsToRerank }, + }; + } + + const response = await this.fetchWithRetry( + `https://${this.host}/api/2.0/vector-search/indexes/${params.indexName}/query`, + { + method: 'POST', + headers: { + 'Authorization': `Bearer ${token}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(body), + }, + ); + + if (!response.ok) throw this.mapError(response); + const raw = await response.json() as VsRawResponse; + return this.parseResponse(raw, params.queryType); + } + + async queryNextPage(params: { + indexName: string; + endpointName: string; + pageToken: string; + userToken?: string; + }): Promise { + const token = params.userToken ?? await this.tokenProvider.getToken(); + + const response = await this.fetchWithRetry( + `https://${this.host}/api/2.0/vector-search/indexes/${params.indexName}/query-next-page`, + { + method: 'POST', + headers: { + 'Authorization': `Bearer ${token}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + endpoint_name: params.endpointName, + page_token: params.pageToken, + }), + }, + ); + + if (!response.ok) throw this.mapError(response); + const raw = await response.json() as VsRawResponse; + return this.parseResponse(raw, 'hybrid'); + } + + private parseResponse(raw: VsRawResponse, queryType: 'ann' | 'hybrid' | 'full_text'): SearchResponse { + const columnNames = raw.manifest.columns.map(c => c.name); + const scoreIndex = columnNames.indexOf('score'); + + const results = raw.result.data_array.map(row => { + const data: Record = {}; + for (let i = 0; i < columnNames.length; i++) { + if (columnNames[i] !== 'score') data[columnNames[i]] = row[i]; + } + return { + score: scoreIndex >= 0 ? (row[scoreIndex] as number) : 0, + data, + }; + }); + + return { + results, + totalCount: raw.result.row_count, + queryTimeMs: raw.debug_info?.response_time ?? raw.debug_info?.latency_ms ?? 0, + queryType, + fromCache: false, + nextPageToken: raw.next_page_token ?? null, + }; + } + + private mapError(response: { status: number }): SearchError { + const codeMap: Record = { + 401: 'UNAUTHORIZED', + 403: 'UNAUTHORIZED', + 404: 'INDEX_NOT_FOUND', + 400: 'INVALID_QUERY', + 429: 'RATE_LIMITED', + }; + return { + code: codeMap[response.status] ?? 'INTERNAL', + message: `VS query failed with status ${response.status}`, + statusCode: response.status, + }; + } + + private async fetchWithRetry( + url: string, + options: RequestInit, + maxRetries = 3, + backoffMs = 1, + ): Promise { + let lastError: Error | null = null; + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + const response = await fetch(url, options); + + // Don't retry client errors (4xx except 429) + if (response.status >= 400 && response.status < 500 && response.status !== 429) { + return response; + } + + if (response.ok) { + return response; + } + + // Retry 429 and 5xx + lastError = new Error(`HTTP ${response.status}`); + if (attempt < maxRetries) { + await new Promise(r => setTimeout(r, backoffMs)); + continue; + } + return response; + } catch (err) { + lastError = err as Error; + if (attempt < maxRetries) { + await new Promise(r => setTimeout(r, backoffMs)); + continue; + } + throw { + code: 'INTERNAL' as const, + message: `Network error: ${lastError.message}`, + statusCode: 500, + }; + } + } + + throw { + code: 'INTERNAL' as const, + message: 'Failed after retries', + statusCode: 500, + }; + } +} diff --git a/packages/vector-search/src/plugin/VectorSearchPlugin.ts b/packages/vector-search/src/plugin/VectorSearchPlugin.ts new file mode 100644 index 00000000..97d4de92 --- /dev/null +++ b/packages/vector-search/src/plugin/VectorSearchPlugin.ts @@ -0,0 +1,105 @@ +import type { VectorSearchPluginConfig, IndexConfig, SearchRequest, SearchResponse } from './types'; +import { VectorSearchClient } from './VectorSearchClient'; +import { ServicePrincipalTokenProvider, OboTokenExtractor } from './auth'; + +export class VectorSearchPlugin { + static manifest = { + name: 'vector-search', + description: 'Query Databricks Vector Search indexes from your app', + resources: { required: [] as any[], optional: [] as any[] }, + env: [ + { name: 'DATABRICKS_HOST', description: 'Databricks workspace hostname', source: 'auto' }, + { name: 'DATABRICKS_CLIENT_ID', description: 'Service principal client ID', source: 'auto' }, + { name: 'DATABRICKS_CLIENT_SECRET', description: 'Service principal client secret', source: 'auto' }, + ], + }; + + private config: VectorSearchPluginConfig; + private client!: VectorSearchClient; + private spTokenProvider!: ServicePrincipalTokenProvider; + + constructor(config: VectorSearchPluginConfig) { + this.config = config; + } + + async setup(): Promise { + const host = process.env.DATABRICKS_HOST; + if (!host) { + throw new Error( + 'DATABRICKS_HOST is not set. Ensure the app is deployed to Databricks Apps or set the environment variable manually.', + ); + } + + // Fail-fast config validation + for (const [alias, idx] of Object.entries(this.config.indexes)) { + if (!idx.indexName) { + throw new Error(`Index "${alias}" is missing required field "indexName"`); + } + if (!idx.columns || idx.columns.length === 0) { + throw new Error(`Index "${alias}" is missing required field "columns"`); + } + if (idx.pagination && !idx.endpointName) { + throw new Error(`Index "${alias}" has pagination enabled but is missing "endpointName"`); + } + } + + this.spTokenProvider = new ServicePrincipalTokenProvider(host); + this.client = new VectorSearchClient({ host, tokenProvider: this.spTokenProvider }); + } + + async shutdown(): Promise { + // No cleanup needed currently + } + + getResourceRequirements() { + return Object.values(this.config.indexes).map((idx) => ({ + type: 'vector-search-index' as const, + name: idx.indexName, + permission: 'SELECT' as const, + })); + } + + exports() { + return { + query: (alias: string, request: SearchRequest) => this.executeQuery(alias, request), + }; + } + + /** Resolve an index alias to its config. Throws if not found. */ + resolveIndex(alias: string): IndexConfig { + const config = this.config.indexes[alias]; + if (!config) { + throw { + code: 'INDEX_NOT_FOUND' as const, + message: `No index configured with alias "${alias}"`, + statusCode: 404, + }; + } + return config; + } + + /** Get the VS client instance (used by route handlers) */ + getClient(): VectorSearchClient { + return this.client; + } + + /** Get the full plugin config (used by route handlers) */ + getConfig(): VectorSearchPluginConfig { + return this.config; + } + + private async executeQuery(alias: string, request: SearchRequest): Promise { + const indexConfig = this.resolveIndex(alias); + return this.client.query({ + indexName: indexConfig.indexName, + queryText: request.queryText, + queryVector: request.queryVector, + columns: request.columns ?? indexConfig.columns, + numResults: request.numResults ?? indexConfig.numResults ?? 20, + queryType: request.queryType ?? indexConfig.queryType ?? 'hybrid', + filters: request.filters, + reranker: request.reranker ?? indexConfig.reranker ?? false, + embeddingFn: indexConfig.embeddingFn, + }); + } +} diff --git a/packages/vector-search/src/plugin/auth.ts b/packages/vector-search/src/plugin/auth.ts new file mode 100644 index 00000000..62211a8e --- /dev/null +++ b/packages/vector-search/src/plugin/auth.ts @@ -0,0 +1,48 @@ +import type { TokenProvider, SearchError } from './types'; + +export class ServicePrincipalTokenProvider implements TokenProvider { + private token: string | null = null; + private expiresAt = 0; + private host: string; + + constructor(host: string) { + this.host = host; + } + + async getToken(): Promise { + if (this.token && Date.now() < this.expiresAt - 120_000) { + return this.token; + } + + const response = await fetch(`https://${this.host}/oidc/v1/token`, { + method: 'POST', + headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, + body: new URLSearchParams({ + grant_type: 'client_credentials', + client_id: process.env.DATABRICKS_CLIENT_ID!, + client_secret: process.env.DATABRICKS_CLIENT_SECRET!, + scope: 'all-apis', + }).toString(), + }); + + const data = await response.json(); + this.token = data.access_token; + this.expiresAt = Date.now() + data.expires_in * 1000; + return this.token!; + } +} + +export class OboTokenExtractor { + static extractFromRequest(req: { headers: Record }): string { + const token = req.headers['x-forwarded-access-token']; + if (!token) { + const error: SearchError = { + code: 'UNAUTHORIZED', + message: 'No user token found. Ensure app is configured for user authorization.', + statusCode: 401, + }; + throw error; + } + return token; + } +} diff --git a/packages/vector-search/src/plugin/routes.ts b/packages/vector-search/src/plugin/routes.ts new file mode 100644 index 00000000..cd58e354 --- /dev/null +++ b/packages/vector-search/src/plugin/routes.ts @@ -0,0 +1,136 @@ +import { Router } from 'express'; +import type { Request, Response } from 'express'; +import type { VectorSearchPlugin } from './VectorSearchPlugin'; +import { OboTokenExtractor } from './auth'; +import type { SearchRequest } from './types'; + +export function createVectorSearchRouter(plugin: VectorSearchPlugin): Router { + const router = Router(); + + // POST /:alias/query + router.post('/:alias/query', async (req: Request, res: Response) => { + const { alias } = req.params; + + let indexConfig; + try { + indexConfig = plugin.resolveIndex(alias); + } catch (err: any) { + return res.status(err.statusCode ?? 404).json(err); + } + + const body: SearchRequest = req.body; + + if (!body.queryText && !body.queryVector) { + return res.status(400).json({ + code: 'INVALID_QUERY', + message: 'queryText or queryVector is required', + statusCode: 400, + }); + } + + // Resolve auth + let userToken: string | undefined; + if (indexConfig.auth === 'on-behalf-of-user') { + try { + userToken = OboTokenExtractor.extractFromRequest(req); + } catch (err: any) { + return res.status(401).json(err); + } + } + + try { + const client = plugin.getClient(); + const response = await client.query({ + indexName: indexConfig.indexName, + queryText: body.queryText, + queryVector: body.queryVector, + columns: body.columns ?? indexConfig.columns, + numResults: body.numResults ?? indexConfig.numResults ?? 20, + queryType: body.queryType ?? indexConfig.queryType ?? 'hybrid', + filters: body.filters, + reranker: body.reranker ?? indexConfig.reranker ?? false, + userToken, + embeddingFn: indexConfig.embeddingFn, + }); + + return res.json(response); + } catch (err: any) { + return res.status(err.statusCode ?? 500).json(err); + } + }); + + // POST /:alias/next-page + router.post('/:alias/next-page', async (req: Request, res: Response) => { + const { alias } = req.params; + + let indexConfig; + try { + indexConfig = plugin.resolveIndex(alias); + } catch (err: any) { + return res.status(err.statusCode ?? 404).json(err); + } + + if (!indexConfig.pagination) { + return res.status(400).json({ + code: 'INVALID_QUERY', + message: `Pagination is not enabled for index "${alias}"`, + statusCode: 400, + }); + } + + const { pageToken } = req.body; + if (!pageToken) { + return res.status(400).json({ + code: 'INVALID_QUERY', + message: 'pageToken is required', + statusCode: 400, + }); + } + + let userToken: string | undefined; + if (indexConfig.auth === 'on-behalf-of-user') { + try { + userToken = OboTokenExtractor.extractFromRequest(req); + } catch (err: any) { + return res.status(401).json(err); + } + } + + try { + const client = plugin.getClient(); + const response = await client.queryNextPage({ + indexName: indexConfig.indexName, + endpointName: indexConfig.endpointName!, + pageToken, + userToken, + }); + + return res.json(response); + } catch (err: any) { + return res.status(err.statusCode ?? 500).json(err); + } + }); + + // GET /:alias/config + router.get('/:alias/config', (req: Request, res: Response) => { + const { alias } = req.params; + + let indexConfig; + try { + indexConfig = plugin.resolveIndex(alias); + } catch (err: any) { + return res.status(err.statusCode ?? 404).json(err); + } + + return res.json({ + alias, + columns: indexConfig.columns, + queryType: indexConfig.queryType ?? 'hybrid', + numResults: indexConfig.numResults ?? 20, + reranker: !!indexConfig.reranker, + pagination: !!indexConfig.pagination, + }); + }); + + return router; +} diff --git a/packages/vector-search/src/plugin/types.ts b/packages/vector-search/src/plugin/types.ts new file mode 100644 index 00000000..f8ad1e84 --- /dev/null +++ b/packages/vector-search/src/plugin/types.ts @@ -0,0 +1,198 @@ +// ============================================ +// Plugin Configuration Types +// ============================================ + +export interface VectorSearchPluginConfig { + indexes: Record; +} + +export interface IndexConfig { + /** Three-level UC name: catalog.schema.index_name */ + indexName: string; + /** Columns to return in results */ + columns: string[]; + /** Default search mode */ + queryType?: 'ann' | 'hybrid' | 'full_text'; // default: 'hybrid' + /** Max results per query */ + numResults?: number; // default: 20 + /** Enable built-in reranker */ + reranker?: boolean | RerankerConfig; // default: false + /** Auth mode */ + auth?: 'service-principal' | 'on-behalf-of-user'; // default: 'service-principal' + /** Result caching */ + cache?: CacheConfig; + /** Enable cursor pagination */ + pagination?: boolean; // default: false + /** VS endpoint name (required if pagination: true) */ + endpointName?: string; + /** + * For self-managed embedding indexes: converts query text to embedding vector. + * If provided, the plugin calls this function and sends query_vector to VS. + * If omitted, the plugin sends query_text and VS computes embeddings (managed mode). + */ + embeddingFn?: (text: string) => Promise; +} + +export interface RerankerConfig { + columnsToRerank: string[]; +} + +export interface CacheConfig { + enabled: boolean; + ttlSeconds?: number; // default: 60 + maxEntries?: number; // default: 1000 +} + +// ============================================ +// Query Types (frontend → backend) +// ============================================ + +export interface SearchRequest { + /** Text query. Required for managed embedding indexes. */ + queryText?: string; + /** Pre-computed embedding vector. Required for self-managed indexes without embeddingFn. */ + queryVector?: number[]; + /** Override default columns for this query */ + columns?: string[]; + /** Override default numResults for this query */ + numResults?: number; + /** Override default queryType for this query */ + queryType?: 'ann' | 'hybrid' | 'full_text'; + /** Metadata filters */ + filters?: SearchFilters; + /** Override reranker for this query */ + reranker?: boolean; +} + +/** + * Filters use the VS REST API filter format. + * Keys are column names with optional operators. + * + * Examples: + * { category: ['electronics', 'books'] } // IN list + * { 'price >=': 10 } // comparison + * { 'title NOT': 'test' } // NOT + * { 'name LIKE': 'data%' } // LIKE + * { 'color1 OR color2': ['red', 'blue'] } // OR across columns + */ +export type SearchFilters = Record; + +// ============================================ +// Result Types (backend → frontend) +// ============================================ + +export interface SearchResponse = Record> { + /** Search results */ + results: SearchResult[]; + /** Total number of results */ + totalCount: number; + /** Query execution time in ms (from VS debug info) */ + queryTimeMs: number; + /** The query type that was actually used */ + queryType: 'ann' | 'hybrid' | 'full_text'; + /** Whether results were served from cache */ + fromCache: boolean; + /** Token for fetching next page. Null if no more results. */ + nextPageToken: string | null; +} + +export interface SearchResult = Record> { + /** Similarity score (0-1, higher = more similar) */ + score: number; + /** The result data — keys match the columns requested */ + data: T; +} + +// ============================================ +// Error Types +// ============================================ + +export interface SearchError { + code: 'UNAUTHORIZED' | 'INDEX_NOT_FOUND' | 'INVALID_QUERY' | 'RATE_LIMITED' | 'INTERNAL'; + message: string; + /** HTTP status from VS API */ + statusCode: number; +} + +// ============================================ +// Hook Types +// ============================================ + +export interface UseVectorSearchOptions { + /** Debounce delay in ms. Default: 300 */ + debounceMs?: number; + /** Override default numResults from server config */ + numResults?: number; + /** Override default queryType from server config */ + queryType?: 'ann' | 'hybrid' | 'full_text'; + /** Override reranker from server config */ + reranker?: boolean; + /** Initial filters */ + initialFilters?: SearchFilters; + /** Callback when search completes */ + onResults?: (response: SearchResponse) => void; + /** Callback on error */ + onError?: (error: SearchError) => void; + /** Minimum query length before searching. Default: 1 */ + minQueryLength?: number; +} + +export interface UseVectorSearchReturn = Record> { + /** Execute a search */ + search: (query: string) => void; + /** Current results */ + results: SearchResult[]; + /** Whether a search is in flight */ + isLoading: boolean; + /** Error from the last search, if any */ + error: SearchError | null; + /** Total result count */ + totalCount: number; + /** Query time in ms */ + queryTimeMs: number; + /** Whether results came from cache */ + fromCache: boolean; + /** Current query text */ + query: string; + /** Set filters programmatically */ + setFilters: (filters: SearchFilters) => void; + /** Current active filters */ + activeFilters: SearchFilters; + /** Clear all filters and results */ + clear: () => void; + /** Whether more results are available (pagination) */ + hasMore?: boolean; + /** Fetch next page and append to results (pagination) */ + loadMore?: () => void; + /** Whether a loadMore is in flight (pagination) */ + isLoadingMore?: boolean; +} + +// ============================================ +// Internal Types (not exported from package) +// ============================================ + +/** Raw response from VS REST API */ +export interface VsRawResponse { + manifest: { + column_count: number; + columns: Array<{ name: string; type?: string }>; + }; + result: { + row_count: number; + data_array: unknown[][]; + }; + next_page_token?: string | null; + debug_info?: { + response_time?: number; + ann_time?: number; + embedding_gen_time?: number; + latency_ms?: number; + [key: string]: unknown; + }; +} + +/** Token provider interface for auth */ +export interface TokenProvider { + getToken(): Promise; +} diff --git a/packages/vector-search/src/ui/components/SearchBox.tsx b/packages/vector-search/src/ui/components/SearchBox.tsx new file mode 100644 index 00000000..dc9c83bd --- /dev/null +++ b/packages/vector-search/src/ui/components/SearchBox.tsx @@ -0,0 +1,73 @@ +import * as React from 'react'; + +interface SearchBoxProps { + onSearch: (query: string) => void; + value?: string; + placeholder?: string; + isLoading?: boolean; + autoFocus?: boolean; + className?: string; +} + +export function SearchBox({ + onSearch, + value, + placeholder = 'Search...', + isLoading = false, + autoFocus = false, + className, +}: SearchBoxProps) { + const [internalValue, setInternalValue] = React.useState(''); + const displayValue = value ?? internalValue; + const inputRef = React.useRef(null); + + const handleChange = (e: React.ChangeEvent) => { + const val = e.target.value; + if (value === undefined) setInternalValue(val); + onSearch(val); + }; + + const handleClear = () => { + if (value === undefined) setInternalValue(''); + onSearch(''); + inputRef.current?.focus(); + }; + + const handleKeyDown = (e: React.KeyboardEvent) => { + if (e.key === 'Escape') handleClear(); + }; + + return ( +
+ + + + + {isLoading && ( +
+ )} + {displayValue && !isLoading && ( + + )} +
+ ); +} diff --git a/packages/vector-search/src/ui/components/SearchLoadMore.tsx b/packages/vector-search/src/ui/components/SearchLoadMore.tsx new file mode 100644 index 00000000..a351c247 --- /dev/null +++ b/packages/vector-search/src/ui/components/SearchLoadMore.tsx @@ -0,0 +1,24 @@ +import * as React from 'react'; + +interface SearchLoadMoreProps { + hasMore: boolean; + isLoading: boolean; + onLoadMore: () => void; + className?: string; +} + +export function SearchLoadMore({ hasMore, isLoading, onLoadMore, className }: SearchLoadMoreProps) { + if (!hasMore) return null; + + return ( +
+ +
+ ); +} diff --git a/packages/vector-search/src/ui/components/SearchResultCard.tsx b/packages/vector-search/src/ui/components/SearchResultCard.tsx new file mode 100644 index 00000000..630e6b31 --- /dev/null +++ b/packages/vector-search/src/ui/components/SearchResultCard.tsx @@ -0,0 +1,76 @@ +import * as React from 'react'; +import type { SearchResult } from '../../plugin/types'; + +function escapeRegex(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +interface SearchResultCardProps> { + result: SearchResult; + titleColumn?: keyof T; + descriptionColumn?: keyof T; + displayColumns?: (keyof T)[]; + showScore?: boolean; + query?: string; +} + +export function SearchResultCard>({ + result, + titleColumn, + descriptionColumn, + displayColumns, + showScore = false, + query, +}: SearchResultCardProps) { + const title = titleColumn ? String(result.data[titleColumn] ?? '') : undefined; + const description = descriptionColumn ? String(result.data[descriptionColumn] ?? '') : undefined; + + const highlight = (text: string): React.ReactNode => { + if (!query) return text; + const words = query.split(/\s+/).filter(w => w.length > 0); + if (words.length === 0) return text; + const regex = new RegExp(`(${words.map(escapeRegex).join('|')})`, 'gi'); + const parts = text.split(regex); + return parts.map((part, i) => + regex.test(part) + ? {part} + : part + ); + }; + + return ( +
+
+
+ {title && ( +

+ {highlight(title)} +

+ )} + {description && ( +

+ {highlight(description)} +

+ )} + {displayColumns && ( +
+ {displayColumns + .filter(col => col !== titleColumn && col !== descriptionColumn) + .map(col => ( + + {String(col)}:{' '} + {String(result.data[col] ?? '—')} + + ))} +
+ )} +
+ {showScore && ( + + {(result.score * 100).toFixed(0)}% + + )} +
+
+ ); +} diff --git a/packages/vector-search/src/ui/components/SearchResults.tsx b/packages/vector-search/src/ui/components/SearchResults.tsx new file mode 100644 index 00000000..64d5fc9d --- /dev/null +++ b/packages/vector-search/src/ui/components/SearchResults.tsx @@ -0,0 +1,93 @@ +import * as React from 'react'; +import type { SearchResult, SearchError } from '../../plugin/types'; +import { SearchResultCard } from './SearchResultCard'; + +interface SearchResultsProps> { + results: SearchResult[]; + isLoading: boolean; + error: SearchError | null; + query: string; + totalCount: number; + queryTimeMs: number; + renderResult?: (result: SearchResult, index: number) => React.ReactNode; + displayColumns?: (keyof T)[]; + titleColumn?: keyof T; + descriptionColumn?: keyof T; + showScores?: boolean; + emptyMessage?: string; + className?: string; +} + +export function SearchResults>({ + results, + isLoading, + error, + query, + totalCount, + queryTimeMs, + renderResult, + displayColumns, + titleColumn, + descriptionColumn, + showScores = false, + emptyMessage = 'No results found.', + className, +}: SearchResultsProps) { + if (error) { + return ( +
+

Search failed

+

{error.message}

+
+ ); + } + + if (isLoading && results.length === 0) { + return ( +
+ {Array.from({ length: 3 }).map((_, i) => ( +
+
+
+
+
+ ))} +
+ ); + } + + if (!query) return null; + + if (results.length === 0) { + return ( +
+ {emptyMessage} +
+ ); + } + + return ( +
+
+ {totalCount} result{totalCount !== 1 ? 's' : ''} in {queryTimeMs}ms +
+
+ {results.map((result, index) => + renderResult + ? renderResult(result, index) + : ( + + ) + )} +
+
+ ); +} diff --git a/packages/vector-search/src/ui/hooks/useVectorSearch.ts b/packages/vector-search/src/ui/hooks/useVectorSearch.ts new file mode 100644 index 00000000..ad43d194 --- /dev/null +++ b/packages/vector-search/src/ui/hooks/useVectorSearch.ts @@ -0,0 +1,175 @@ +import { useState, useCallback, useRef, useEffect } from 'react'; +import type { + SearchResult, + SearchResponse, + SearchError, + SearchFilters, + UseVectorSearchOptions, + UseVectorSearchReturn, +} from '../../plugin/types'; + +export function useVectorSearch = Record>( + alias: string, + options: UseVectorSearchOptions = {}, +): UseVectorSearchReturn { + const { + debounceMs = 300, + numResults, + queryType, + reranker, + initialFilters = {}, + onResults, + onError, + minQueryLength = 1, + } = options; + + const [results, setResults] = useState[]>([]); + const [isLoading, setIsLoading] = useState(false); + const [error, setError] = useState(null); + const [query, setQuery] = useState(''); + const [totalCount, setTotalCount] = useState(0); + const [queryTimeMs, setQueryTimeMs] = useState(0); + const [fromCache, setFromCache] = useState(false); + const [activeFilters, setActiveFilters] = useState(initialFilters); + const [hasMore, setHasMore] = useState(false); + const [isLoadingMore, setIsLoadingMore] = useState(false); + + const nextPageTokenRef = useRef(null); + const abortRef = useRef(null); + const debounceRef = useRef | null>(null); + + const executeSearch = useCallback(async ( + searchQuery: string, + filters: SearchFilters, + isLoadMore = false, + ) => { + if (abortRef.current) abortRef.current.abort(); + abortRef.current = new AbortController(); + + if (!isLoadMore) { + setIsLoading(true); + setError(null); + } else { + setIsLoadingMore(true); + } + + try { + const url = isLoadMore + ? `/api/vector-search/${alias}/next-page` + : `/api/vector-search/${alias}/query`; + + const body: Record = isLoadMore + ? { pageToken: nextPageTokenRef.current } + : { + queryText: searchQuery, + ...(Object.keys(filters).length > 0 ? { filters } : {}), + ...(numResults !== undefined ? { numResults } : {}), + ...(queryType !== undefined ? { queryType } : {}), + ...(reranker !== undefined ? { reranker } : {}), + }; + + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + signal: abortRef.current.signal, + }); + + if (!response.ok) { + const err = await response.json(); + throw err as SearchError; + } + + const data: SearchResponse = await response.json(); + + if (isLoadMore) { + setResults(prev => [...prev, ...data.results]); + } else { + setResults(data.results); + } + + setTotalCount(data.totalCount); + setQueryTimeMs(data.queryTimeMs); + setFromCache(data.fromCache); + setHasMore(!!data.nextPageToken); + nextPageTokenRef.current = data.nextPageToken; + + onResults?.(data as SearchResponse); + } catch (err: unknown) { + if (err instanceof DOMException && err.name === 'AbortError') return; + const searchError = err as SearchError; + setError(searchError); + onError?.(searchError); + } finally { + setIsLoading(false); + setIsLoadingMore(false); + } + }, [alias, numResults, queryType, reranker, onResults, onError]); + + const search = useCallback((searchQuery: string) => { + setQuery(searchQuery); + + if (debounceRef.current) clearTimeout(debounceRef.current); + + if (searchQuery.length < minQueryLength) { + setResults([]); + setTotalCount(0); + setHasMore(false); + return; + } + + debounceRef.current = setTimeout(() => { + executeSearch(searchQuery, activeFilters); + }, debounceMs); + }, [debounceMs, minQueryLength, activeFilters, executeSearch]); + + const setFilters = useCallback((filters: SearchFilters) => { + setActiveFilters(filters); + if (query.length >= minQueryLength) { + executeSearch(query, filters); + } + }, [query, minQueryLength, executeSearch]); + + const loadMore = useCallback(() => { + if (hasMore && !isLoadingMore && nextPageTokenRef.current) { + executeSearch(query, activeFilters, true); + } + }, [hasMore, isLoadingMore, query, activeFilters, executeSearch]); + + const clear = useCallback(() => { + if (debounceRef.current) clearTimeout(debounceRef.current); + if (abortRef.current) abortRef.current.abort(); + setQuery(''); + setResults([]); + setError(null); + setTotalCount(0); + setQueryTimeMs(0); + setFromCache(false); + setHasMore(false); + nextPageTokenRef.current = null; + }, []); + + useEffect(() => { + return () => { + if (debounceRef.current) clearTimeout(debounceRef.current); + if (abortRef.current) abortRef.current.abort(); + }; + }, []); + + return { + search, + results, + isLoading, + error, + totalCount, + queryTimeMs, + fromCache, + query, + setFilters, + activeFilters, + clear, + hasMore, + loadMore, + isLoadingMore, + }; +} diff --git a/packages/vector-search/src/ui/index.ts b/packages/vector-search/src/ui/index.ts new file mode 100644 index 00000000..47797d97 --- /dev/null +++ b/packages/vector-search/src/ui/index.ts @@ -0,0 +1,6 @@ +export { useVectorSearch } from './hooks/useVectorSearch'; +export { SearchBox } from './components/SearchBox'; +export { SearchResults } from './components/SearchResults'; +export { SearchResultCard } from './components/SearchResultCard'; +export { SearchLoadMore } from './components/SearchLoadMore'; +export type { UseVectorSearchOptions, UseVectorSearchReturn } from '../plugin/types'; diff --git a/packages/vector-search/tests/integration/dogfood.test.ts b/packages/vector-search/tests/integration/dogfood.test.ts new file mode 100644 index 00000000..f0aa017e --- /dev/null +++ b/packages/vector-search/tests/integration/dogfood.test.ts @@ -0,0 +1,97 @@ +import { describe, it, expect, beforeAll } from 'vitest'; +import { VectorSearchClient } from '../../src/plugin/VectorSearchClient'; + +const DOGFOOD_HOST = 'e2-dogfood.staging.cloud.databricks.com'; +const TEST_INDEX = 'gurary_catalog.vector-search-brickfood.retrieval_perf_cuj_index_1'; + +// Skip unless DOGFOOD_TOKEN is set +describe.skipIf(!process.env.DOGFOOD_TOKEN)('Integration: VectorSearchClient → dogfood', () => { + let client: VectorSearchClient; + + beforeAll(() => { + client = new VectorSearchClient({ + host: DOGFOOD_HOST, + tokenProvider: { + getToken: async () => process.env.DOGFOOD_TOKEN!, + }, + }); + }); + + it('returns results for a valid hybrid query', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'aircraft instruments', + columns: ['chunk_id', 'text'], + numResults: 5, + queryType: 'hybrid', + }); + expect(response.results.length).toBeGreaterThan(0); + expect(response.results[0].score).toBeGreaterThan(0); + expect(response.results[0].data).toHaveProperty('text'); + expect(response.results[0].data).toHaveProperty('chunk_id'); + expect(response.queryTimeMs).toBeGreaterThan(0); + }, 30000); + + it('returns results for ANN query', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'navigation systems', + columns: ['chunk_id', 'text'], + numResults: 3, + queryType: 'ann', + }); + expect(response.results.length).toBeGreaterThan(0); + expect(response.results[0].score).toBeGreaterThan(0); + }, 30000); + + it('respects numResults limit', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'flight', + columns: ['chunk_id', 'text'], + numResults: 2, + queryType: 'hybrid', + }); + expect(response.results.length).toBeLessThanOrEqual(2); + }, 30000); + + it('returns scores between 0 and 1', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'altitude', + columns: ['chunk_id', 'text'], + numResults: 5, + queryType: 'hybrid', + }); + response.results.forEach(r => { + expect(r.score).toBeGreaterThanOrEqual(0); + expect(r.score).toBeLessThanOrEqual(1); + }); + }, 30000); + + it('handles empty results gracefully', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'xyzzy_absolutely_no_match_12345_qwerty', + columns: ['chunk_id', 'text'], + numResults: 5, + queryType: 'ann', + }); + // May still return results due to embedding similarity, but should have low scores + // If no results, that's fine too + expect(response.results).toBeDefined(); + expect(Array.isArray(response.results)).toBe(true); + }, 30000); + + it('response includes queryTimeMs from debug_info', async () => { + const response = await client.query({ + indexName: TEST_INDEX, + queryText: 'weather radar', + columns: ['chunk_id', 'text'], + numResults: 3, + queryType: 'hybrid', + }); + expect(response.queryTimeMs).toBeGreaterThan(0); + expect(response.fromCache).toBe(false); + }, 30000); +}); diff --git a/packages/vector-search/tests/plugin/VectorSearchClient.test.ts b/packages/vector-search/tests/plugin/VectorSearchClient.test.ts new file mode 100644 index 00000000..8335a4f5 --- /dev/null +++ b/packages/vector-search/tests/plugin/VectorSearchClient.test.ts @@ -0,0 +1,233 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { VectorSearchClient } from '../../src/plugin/VectorSearchClient'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +const mockTokenProvider = { getToken: vi.fn().mockResolvedValue('sp-token-123') }; + +describe('VectorSearchClient', () => { + let client: VectorSearchClient; + + beforeEach(() => { + client = new VectorSearchClient({ + host: 'test-workspace.databricks.com', + tokenProvider: mockTokenProvider, + }); + mockFetch.mockReset(); + mockTokenProvider.getToken.mockClear(); + }); + + const validResponse = { + manifest: { column_count: 3, columns: [{ name: 'id' }, { name: 'title' }, { name: 'score' }] }, + result: { row_count: 2, data_array: [[1, 'ML Guide', 0.95], [2, 'AI Primer', 0.87]] }, + next_page_token: null, + debug_info: { response_time: 35 }, + }; + + describe('query()', () => { + it('constructs correct REST API URL and request body for hybrid search', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'machine learning', + columns: ['id', 'title'], numResults: 10, queryType: 'hybrid', + }); + const [url, opts] = mockFetch.mock.calls[0]; + expect(url).toBe('https://test-workspace.databricks.com/api/2.0/vector-search/indexes/cat.sch.idx/query'); + const body = JSON.parse(opts.body); + expect(body.query_text).toBe('machine learning'); + expect(body.query_type).toBe('HYBRID'); + expect(body.num_results).toBe(10); + expect(body.columns).toEqual(['id', 'title']); + expect(body.debug_level).toBe(1); + }); + + it('includes filters when provided', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', filters: { category: ['books'] }, + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.filters).toEqual({ category: ['books'] }); + }); + + it('omits filters when empty object', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', filters: {}, + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.filters).toBeUndefined(); + }); + + it('includes reranker config when boolean true', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 5, queryType: 'hybrid', reranker: true, + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.reranker.model).toBe('databricks_reranker'); + // Default: all non-id columns + expect(body.reranker.parameters.columns_to_rerank).toEqual(['title']); + }); + + it('includes custom reranker columnsToRerank', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title', 'desc'], + numResults: 5, queryType: 'hybrid', reranker: { columnsToRerank: ['desc'] }, + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.reranker.parameters.columns_to_rerank).toEqual(['desc']); + }); + + it('parses VS data_array response into typed SearchResult[]', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + const result = await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 10, queryType: 'hybrid', + }); + expect(result.results).toHaveLength(2); + expect(result.results[0].score).toBe(0.95); + expect(result.results[0].data).toEqual({ id: 1, title: 'ML Guide' }); + expect(result.results[1].score).toBe(0.87); + expect(result.results[1].data).toEqual({ id: 2, title: 'AI Primer' }); + expect(result.totalCount).toBe(2); + expect(result.queryTimeMs).toBe(35); + expect(result.fromCache).toBe(false); + expect(result.nextPageToken).toBeNull(); + }); + + it('handles next_page_token in response', async () => { + const responseWithToken = { ...validResponse, next_page_token: 'abc123' }; + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(responseWithToken) }); + const result = await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 10, queryType: 'hybrid', + }); + expect(result.nextPageToken).toBe('abc123'); + }); + + it('uses SP token when no userToken provided', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', + }); + expect(mockTokenProvider.getToken).toHaveBeenCalled(); + expect(mockFetch.mock.calls[0][1].headers['Authorization']).toBe('Bearer sp-token-123'); + }); + + it('uses userToken when provided (OBO)', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', userToken: 'user-token-456', + }); + expect(mockTokenProvider.getToken).not.toHaveBeenCalled(); + expect(mockFetch.mock.calls[0][1].headers['Authorization']).toBe('Bearer user-token-456'); + }); + + it('calls embeddingFn and sends query_vector for self-managed indexes', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + const mockEmbeddingFn = vi.fn().mockResolvedValue([0.1, 0.2, 0.3]); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 5, queryType: 'ann', embeddingFn: mockEmbeddingFn, + }); + expect(mockEmbeddingFn).toHaveBeenCalledWith('test'); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.query_vector).toEqual([0.1, 0.2, 0.3]); + expect(body.query_text).toBeUndefined(); + }); + + it('sends query_text when no embeddingFn (managed embeddings)', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], + numResults: 5, queryType: 'ann', + }); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.query_text).toBe('test'); + expect(body.query_vector).toBeUndefined(); + }); + + it('throws INVALID_QUERY when neither queryText nor queryVector provided', async () => { + await expect(client.query({ + indexName: 'x', columns: ['id'], numResults: 1, queryType: 'ann', + } as any)).rejects.toMatchObject({ code: 'INVALID_QUERY' }); + }); + + it('maps 401 → UNAUTHORIZED', async () => { + mockFetch.mockResolvedValue({ ok: false, status: 401 }); + await expect(client.query({ + indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', + })).rejects.toMatchObject({ code: 'UNAUTHORIZED', statusCode: 401 }); + }); + + it('maps 404 → INDEX_NOT_FOUND', async () => { + mockFetch.mockResolvedValue({ ok: false, status: 404 }); + await expect(client.query({ + indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', + })).rejects.toMatchObject({ code: 'INDEX_NOT_FOUND', statusCode: 404 }); + }); + + it('maps 429 → RATE_LIMITED and retries', async () => { + mockFetch + .mockResolvedValueOnce({ ok: false, status: 429 }) + .mockResolvedValueOnce({ ok: true, json: () => Promise.resolve(validResponse) }); + const result = await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 5, queryType: 'ann', + }); + expect(mockFetch).toHaveBeenCalledTimes(2); + expect(result.results).toHaveLength(2); + }); + + it('does not retry 400 errors', async () => { + mockFetch.mockResolvedValue({ ok: false, status: 400 }); + await expect(client.query({ + indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', + })).rejects.toMatchObject({ code: 'INVALID_QUERY' }); + expect(mockFetch).toHaveBeenCalledTimes(1); + }); + + it('retries 500 errors up to 3 times', async () => { + mockFetch.mockResolvedValue({ ok: false, status: 500 }); + await expect(client.query({ + indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', + })).rejects.toMatchObject({ code: 'INTERNAL', statusCode: 500 }); + expect(mockFetch).toHaveBeenCalledTimes(4); // 1 initial + 3 retries + }); + + it('retries network errors', async () => { + mockFetch + .mockRejectedValueOnce(new Error('ECONNRESET')) + .mockResolvedValueOnce({ ok: true, json: () => Promise.resolve(validResponse) }); + const result = await client.query({ + indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], + numResults: 5, queryType: 'ann', + }); + expect(mockFetch).toHaveBeenCalledTimes(2); + expect(result.results).toHaveLength(2); + }); + }); + + describe('queryNextPage()', () => { + it('calls the query-next-page endpoint with page token', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); + await client.queryNextPage({ + indexName: 'cat.sch.idx', endpointName: 'my-endpoint', + pageToken: 'token123', + }); + const [url, opts] = mockFetch.mock.calls[0]; + expect(url).toBe('https://test-workspace.databricks.com/api/2.0/vector-search/indexes/cat.sch.idx/query-next-page'); + const body = JSON.parse(opts.body); + expect(body.endpoint_name).toBe('my-endpoint'); + expect(body.page_token).toBe('token123'); + }); + }); +}); diff --git a/packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts b/packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts new file mode 100644 index 00000000..479723fa --- /dev/null +++ b/packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts @@ -0,0 +1,115 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { VectorSearchPlugin } from '../../src/plugin/VectorSearchPlugin'; + +describe('VectorSearchPlugin', () => { + beforeEach(() => { + vi.stubEnv('DATABRICKS_HOST', 'test-host.databricks.com'); + vi.stubEnv('DATABRICKS_CLIENT_ID', 'test-client'); + vi.stubEnv('DATABRICKS_CLIENT_SECRET', 'test-secret'); + }); + + describe('setup()', () => { + it('throws if DATABRICKS_HOST is not set', async () => { + vi.stubEnv('DATABRICKS_HOST', ''); + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: 'cat.sch.idx', columns: ['id'] }, + }, + }); + await expect(plugin.setup()).rejects.toThrow('DATABRICKS_HOST'); + }); + + it('throws if any index is missing indexName', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: '', columns: ['id'] }, + }, + }); + await expect(plugin.setup()).rejects.toThrow('indexName'); + }); + + it('throws if any index is missing columns', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: 'cat.sch.idx', columns: [] }, + }, + }); + await expect(plugin.setup()).rejects.toThrow('columns'); + }); + + it('throws if pagination enabled but no endpointName', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: 'cat.sch.idx', columns: ['id'], pagination: true }, + }, + }); + await expect(plugin.setup()).rejects.toThrow('endpointName'); + }); + + it('succeeds with valid config', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + products: { + indexName: 'cat.sch.products_idx', + columns: ['id', 'name', 'description'], + queryType: 'hybrid', + numResults: 20, + }, + docs: { + indexName: 'cat.sch.docs_idx', + columns: ['id', 'title', 'content'], + reranker: true, + auth: 'on-behalf-of-user', + }, + }, + }); + await expect(plugin.setup()).resolves.not.toThrow(); + }); + }); + + describe('exports()', () => { + it('returns object with query function', async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: 'cat.sch.idx', columns: ['id'] }, + }, + }); + await plugin.setup(); + const exports = plugin.exports(); + expect(exports).toHaveProperty('query'); + expect(typeof exports.query).toBe('function'); + }); + }); + + describe('getResourceRequirements()', () => { + it('returns resource entry for each configured index', () => { + const plugin = new VectorSearchPlugin({ + indexes: { + products: { indexName: 'cat.sch.products', columns: ['id'] }, + docs: { indexName: 'cat.sch.docs', columns: ['id'] }, + }, + }); + const resources = plugin.getResourceRequirements(); + expect(resources).toHaveLength(2); + expect(resources[0]).toEqual({ + type: 'vector-search-index', + name: 'cat.sch.products', + permission: 'SELECT', + }); + expect(resources[1]).toEqual({ + type: 'vector-search-index', + name: 'cat.sch.docs', + permission: 'SELECT', + }); + }); + }); + + describe('manifest', () => { + it('has correct name and env declarations', () => { + expect(VectorSearchPlugin.manifest.name).toBe('vector-search'); + expect(VectorSearchPlugin.manifest.env).toContainEqual( + expect.objectContaining({ name: 'DATABRICKS_HOST' }) + ); + }); + }); +}); diff --git a/packages/vector-search/tests/plugin/auth.test.ts b/packages/vector-search/tests/plugin/auth.test.ts new file mode 100644 index 00000000..925b4ef4 --- /dev/null +++ b/packages/vector-search/tests/plugin/auth.test.ts @@ -0,0 +1,108 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { ServicePrincipalTokenProvider, OboTokenExtractor } from '../../src/plugin/auth'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +describe('ServicePrincipalTokenProvider', () => { + let provider: ServicePrincipalTokenProvider; + + beforeEach(() => { + vi.stubEnv('DATABRICKS_CLIENT_ID', 'test-client-id'); + vi.stubEnv('DATABRICKS_CLIENT_SECRET', 'test-client-secret'); + provider = new ServicePrincipalTokenProvider('test-host.databricks.com'); + mockFetch.mockReset(); + vi.useRealTimers(); + }); + + afterEach(() => { + vi.unstubAllEnvs(); + }); + + it('fetches token from OIDC endpoint', async () => { + mockFetch.mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ access_token: 'token-abc', expires_in: 3600 }), + }); + + const token = await provider.getToken(); + + expect(token).toBe('token-abc'); + const [url, opts] = mockFetch.mock.calls[0]; + expect(url).toBe('https://test-host.databricks.com/oidc/v1/token'); + expect(opts.method).toBe('POST'); + expect(opts.headers['Content-Type']).toBe('application/x-www-form-urlencoded'); + const body = new URLSearchParams(opts.body); + expect(body.get('grant_type')).toBe('client_credentials'); + expect(body.get('client_id')).toBe('test-client-id'); + expect(body.get('client_secret')).toBe('test-client-secret'); + expect(body.get('scope')).toBe('all-apis'); + }); + + it('returns cached token on subsequent calls within expiry', async () => { + mockFetch.mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ access_token: 'token-abc', expires_in: 3600 }), + }); + + await provider.getToken(); + await provider.getToken(); + + expect(mockFetch).toHaveBeenCalledTimes(1); + }); + + it('refreshes token when within 2-minute expiry buffer', async () => { + vi.useFakeTimers(); + + mockFetch + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ access_token: 'token-1', expires_in: 3600 }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ access_token: 'token-2', expires_in: 3600 }), + }); + + const token1 = await provider.getToken(); + expect(token1).toBe('token-1'); + + // Advance to within 2 minutes of expiry (3600s - 120s = 3480s) + vi.advanceTimersByTime(3481 * 1000); + + const token2 = await provider.getToken(); + expect(token2).toBe('token-2'); + expect(mockFetch).toHaveBeenCalledTimes(2); + }); +}); + +describe('OboTokenExtractor', () => { + it('extracts token from x-forwarded-access-token header', () => { + const req = { + headers: { 'x-forwarded-access-token': 'user-token-xyz' }, + } as any; + + const token = OboTokenExtractor.extractFromRequest(req); + expect(token).toBe('user-token-xyz'); + }); + + it('throws UNAUTHORIZED when header is missing', () => { + const req = { headers: {} } as any; + + expect(() => OboTokenExtractor.extractFromRequest(req)).toThrow(); + try { + OboTokenExtractor.extractFromRequest(req); + } catch (err: any) { + expect(err.code).toBe('UNAUTHORIZED'); + expect(err.statusCode).toBe(401); + } + }); + + it('throws UNAUTHORIZED when header is empty string', () => { + const req = { + headers: { 'x-forwarded-access-token': '' }, + } as any; + + expect(() => OboTokenExtractor.extractFromRequest(req)).toThrow(); + }); +}); diff --git a/packages/vector-search/tests/plugin/routes.test.ts b/packages/vector-search/tests/plugin/routes.test.ts new file mode 100644 index 00000000..ef043061 --- /dev/null +++ b/packages/vector-search/tests/plugin/routes.test.ts @@ -0,0 +1,206 @@ +import { describe, it, expect, vi, beforeEach, beforeAll } from 'vitest'; +import express from 'express'; +import request from 'supertest'; +import { createVectorSearchRouter } from '../../src/plugin/routes'; +import { VectorSearchPlugin } from '../../src/plugin/VectorSearchPlugin'; + +// Mock fetch for the VectorSearchClient +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +describe('Vector Search Routes', () => { + let app: express.Express; + let plugin: VectorSearchPlugin; + + const validVsResponse = { + manifest: { column_count: 3, columns: [{ name: 'id' }, { name: 'title' }, { name: 'score' }] }, + result: { row_count: 2, data_array: [[1, 'ML Guide', 0.95], [2, 'AI Primer', 0.87]] }, + next_page_token: null, + debug_info: { latency_ms: 35 }, + }; + + beforeAll(async () => { + vi.stubEnv('DATABRICKS_HOST', 'test-host.databricks.com'); + vi.stubEnv('DATABRICKS_CLIENT_ID', 'test-client'); + vi.stubEnv('DATABRICKS_CLIENT_SECRET', 'test-secret'); + + plugin = new VectorSearchPlugin({ + indexes: { + products: { + indexName: 'cat.sch.products', + columns: ['id', 'title', 'description', 'category'], + queryType: 'hybrid', + numResults: 20, + }, + cached: { + indexName: 'cat.sch.cached', + columns: ['id', 'text'], + cache: { enabled: true, ttlSeconds: 60 }, + }, + paginated: { + indexName: 'cat.sch.paginated', + columns: ['id', 'text'], + pagination: true, + endpointName: 'my-endpoint', + }, + obo: { + indexName: 'cat.sch.obo', + columns: ['id', 'text'], + auth: 'on-behalf-of-user', + }, + }, + }); + await plugin.setup(); + + app = express(); + app.use(express.json()); + app.use('/api/vector-search', createVectorSearchRouter(plugin)); + }); + + beforeEach(() => { + mockFetch.mockReset(); + // Mock the OIDC token fetch that happens on first query + mockFetch.mockImplementation((url: string) => { + if (typeof url === 'string' && url.includes('/oidc/v1/token')) { + return Promise.resolve({ + ok: true, + json: () => Promise.resolve({ access_token: 'sp-token', expires_in: 3600 }), + }); + } + return Promise.resolve({ + ok: true, + json: () => Promise.resolve(validVsResponse), + }); + }); + }); + + describe('POST /:alias/query', () => { + it('returns results for valid query', async () => { + const res = await request(app) + .post('/api/vector-search/products/query') + .send({ queryText: 'machine learning' }) + .expect(200); + + expect(res.body.results).toHaveLength(2); + expect(res.body.results[0].score).toBe(0.95); + expect(res.body.results[0].data.title).toBe('ML Guide'); + expect(res.body.totalCount).toBe(2); + expect(res.body.queryTimeMs).toBe(35); + }); + + it('returns 404 for unknown alias', async () => { + const res = await request(app) + .post('/api/vector-search/unknown/query') + .send({ queryText: 'test' }) + .expect(404); + + expect(res.body.code).toBe('INDEX_NOT_FOUND'); + }); + + it('returns 400 for missing queryText and queryVector', async () => { + const res = await request(app) + .post('/api/vector-search/products/query') + .send({}) + .expect(400); + + expect(res.body.code).toBe('INVALID_QUERY'); + }); + + it('passes filters to VS client', async () => { + await request(app) + .post('/api/vector-search/products/query') + .send({ queryText: 'test', filters: { category: 'books' } }) + .expect(200); + + // Verify the VS API call included filters + const vsCall = mockFetch.mock.calls.find( + (c: any[]) => typeof c[0] === 'string' && c[0].includes('/query') + ); + expect(vsCall).toBeDefined(); + const body = JSON.parse(vsCall![1].body); + expect(body.filters).toEqual({ category: 'books' }); + }); + + it('uses OBO token when auth is on-behalf-of-user', async () => { + await request(app) + .post('/api/vector-search/obo/query') + .set('x-forwarded-access-token', 'user-token-123') + .send({ queryText: 'test' }) + .expect(200); + + const vsCall = mockFetch.mock.calls.find( + (c: any[]) => typeof c[0] === 'string' && c[0].includes('/query') + ); + expect(vsCall![1].headers['Authorization']).toBe('Bearer user-token-123'); + }); + + it('returns 401 when OBO index has no user token', async () => { + const res = await request(app) + .post('/api/vector-search/obo/query') + .send({ queryText: 'test' }) + .expect(401); + + expect(res.body.code).toBe('UNAUTHORIZED'); + }); + }); + + describe('POST /:alias/next-page', () => { + it('returns 400 when pagination not enabled', async () => { + const res = await request(app) + .post('/api/vector-search/products/next-page') + .send({ pageToken: 'abc' }) + .expect(400); + + expect(res.body.code).toBe('INVALID_QUERY'); + expect(res.body.message).toContain('Pagination'); + }); + + it('returns 400 when pageToken missing', async () => { + const res = await request(app) + .post('/api/vector-search/paginated/next-page') + .send({}) + .expect(400); + + expect(res.body.code).toBe('INVALID_QUERY'); + expect(res.body.message).toContain('pageToken'); + }); + + it('calls query-next-page endpoint when valid', async () => { + await request(app) + .post('/api/vector-search/paginated/next-page') + .send({ pageToken: 'token123' }) + .expect(200); + + const nextPageCall = mockFetch.mock.calls.find( + (c: any[]) => typeof c[0] === 'string' && c[0].includes('/query-next-page') + ); + expect(nextPageCall).toBeDefined(); + const body = JSON.parse(nextPageCall![1].body); + expect(body.page_token).toBe('token123'); + expect(body.endpoint_name).toBe('my-endpoint'); + }); + }); + + describe('GET /:alias/config', () => { + it('returns public config for valid alias', async () => { + const res = await request(app) + .get('/api/vector-search/products/config') + .expect(200); + + expect(res.body.alias).toBe('products'); + expect(res.body.columns).toEqual(['id', 'title', 'description', 'category']); + expect(res.body.queryType).toBe('hybrid'); + expect(res.body.numResults).toBe(20); + expect(res.body.reranker).toBe(false); + expect(res.body.pagination).toBe(false); + }); + + it('returns 404 for unknown alias', async () => { + const res = await request(app) + .get('/api/vector-search/unknown/config') + .expect(404); + + expect(res.body.code).toBe('INDEX_NOT_FOUND'); + }); + }); +}); diff --git a/packages/vector-search/tests/ui/components/components.test.tsx b/packages/vector-search/tests/ui/components/components.test.tsx new file mode 100644 index 00000000..681f8069 --- /dev/null +++ b/packages/vector-search/tests/ui/components/components.test.tsx @@ -0,0 +1,158 @@ +import { describe, it, expect, vi } from 'vitest'; +import { render, screen, fireEvent } from '@testing-library/react'; +import { SearchBox } from '../../../src/ui/components/SearchBox'; +import { SearchResultCard } from '../../../src/ui/components/SearchResultCard'; +import { SearchResults } from '../../../src/ui/components/SearchResults'; +import { SearchLoadMore } from '../../../src/ui/components/SearchLoadMore'; + +describe('SearchBox', () => { + it('renders input with placeholder', () => { + render( {}} placeholder="Search products..." />); + expect(screen.getByPlaceholderText('Search products...')).toBeInTheDocument(); + }); + + it('calls onSearch on input change', () => { + const onSearch = vi.fn(); + render(); + fireEvent.change(screen.getByRole('searchbox'), { target: { value: 'test' } }); + expect(onSearch).toHaveBeenCalledWith('test'); + }); + + it('shows clear button when value present', () => { + render( {}} value="test" />); + expect(screen.getByLabelText('Clear search')).toBeInTheDocument(); + }); + + it('hides clear button when value empty', () => { + render( {}} value="" />); + expect(screen.queryByLabelText('Clear search')).not.toBeInTheDocument(); + }); + + it('calls onSearch with empty string on clear', () => { + const onSearch = vi.fn(); + render(); + fireEvent.click(screen.getByLabelText('Clear search')); + expect(onSearch).toHaveBeenCalledWith(''); + }); + + it('clears on Escape key', () => { + const onSearch = vi.fn(); + render(); + fireEvent.keyDown(screen.getByRole('searchbox'), { key: 'Escape' }); + expect(onSearch).toHaveBeenCalledWith(''); + }); + + it('shows loading spinner when isLoading', () => { + render( {}} isLoading />); + expect(screen.getByTestId('loading-spinner')).toBeInTheDocument(); + }); +}); + +describe('SearchResultCard', () => { + const result = { + score: 0.95, + data: { id: 1, title: 'Machine Learning Guide', description: 'A guide to ML algorithms', category: 'books' }, + }; + + it('renders title and description', () => { + render(); + expect(screen.getByText('Machine Learning Guide')).toBeInTheDocument(); + expect(screen.getByText('A guide to ML algorithms')).toBeInTheDocument(); + }); + + it('highlights query words with mark tags', () => { + const { container } = render( + + ); + const marks = container.querySelectorAll('mark'); + expect(marks.length).toBeGreaterThan(0); + expect(marks[0].textContent).toBe('Machine'); + }); + + it('shows score badge when showScore is true', () => { + render(); + expect(screen.getByText('95%')).toBeInTheDocument(); + }); + + it('hides score badge by default', () => { + render(); + expect(screen.queryByText('95%')).not.toBeInTheDocument(); + }); + + it('renders display columns as metadata', () => { + render( + + ); + expect(screen.getByText('category:')).toBeInTheDocument(); + expect(screen.getByText('books')).toBeInTheDocument(); + }); +}); + +describe('SearchResults', () => { + const results = [ + { score: 0.95, data: { id: 1, title: 'Result 1' } }, + { score: 0.87, data: { id: 2, title: 'Result 2' } }, + ]; + + it('shows loading skeleton when loading with no results', () => { + render(); + expect(screen.getByTestId('loading-skeleton')).toBeInTheDocument(); + }); + + it('shows empty message when no results', () => { + render(); + expect(screen.getByText('No results found.')).toBeInTheDocument(); + }); + + it('shows custom empty message', () => { + render(); + expect(screen.getByText('Nothing here')).toBeInTheDocument(); + }); + + it('shows error banner', () => { + const error = { code: 'INTERNAL' as const, message: 'Server error', statusCode: 500 }; + render(); + expect(screen.getByText('Search failed')).toBeInTheDocument(); + expect(screen.getByText('Server error')).toBeInTheDocument(); + }); + + it('renders results with summary', () => { + render(); + expect(screen.getByText('2 results in 35ms')).toBeInTheDocument(); + expect(screen.getByText('Result 1')).toBeInTheDocument(); + expect(screen.getByText('Result 2')).toBeInTheDocument(); + }); + + it('returns null when no query', () => { + const { container } = render(); + expect(container.firstChild).toBeNull(); + }); +}); + +describe('SearchLoadMore', () => { + it('renders button when hasMore is true', () => { + render( {}} />); + expect(screen.getByText('Load more results')).toBeInTheDocument(); + }); + + it('renders nothing when hasMore is false', () => { + const { container } = render( {}} />); + expect(container.firstChild).toBeNull(); + }); + + it('shows Loading... when isLoading', () => { + render( {}} />); + expect(screen.getByText('Loading...')).toBeInTheDocument(); + }); + + it('calls onLoadMore on click', () => { + const onLoadMore = vi.fn(); + render(); + fireEvent.click(screen.getByText('Load more results')); + expect(onLoadMore).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts b/packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts new file mode 100644 index 00000000..c663c75e --- /dev/null +++ b/packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts @@ -0,0 +1,201 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { renderHook, act } from '@testing-library/react'; +import { useVectorSearch } from '../../../src/ui/hooks/useVectorSearch'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +const mockResponse = { + results: [{ score: 0.95, data: { id: 1, title: 'Test Result' } }], + totalCount: 1, + queryTimeMs: 20, + queryType: 'hybrid', + fromCache: false, + nextPageToken: null, +}; + +/** Flush all pending microtasks (promise callbacks) */ +const flushPromises = () => act(() => Promise.resolve()); + +describe('useVectorSearch', () => { + beforeEach(() => { + mockFetch.mockReset(); + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it('debounces search calls (300ms default)', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('a'); }); + act(() => { result.current.search('ab'); }); + act(() => { result.current.search('abc'); }); + + // Before debounce fires + expect(mockFetch).not.toHaveBeenCalled(); + + // After debounce — advance timers then flush promises for fetch resolution + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(mockFetch).toHaveBeenCalledTimes(1); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.queryText).toBe('abc'); + }); + + it('does not search below minQueryLength', async () => { + const { result } = renderHook(() => + useVectorSearch('products', { minQueryLength: 3 }) + ); + + act(() => { result.current.search('ab'); }); + await act(async () => { vi.advanceTimersByTime(400); }); + + expect(mockFetch).not.toHaveBeenCalled(); + expect(result.current.results).toEqual([]); + }); + + it('sets isLoading true during search', async () => { + let resolveJson!: (v: unknown) => void; + mockFetch.mockReturnValue( + Promise.resolve({ + ok: true, + json: () => new Promise((r) => { resolveJson = r; }), + }) + ); + + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + // fetch was called, but json() hasn't resolved yet + await flushPromises(); + + expect(result.current.isLoading).toBe(true); + + await act(async () => { resolveJson(mockResponse); }); + expect(result.current.isLoading).toBe(false); + expect(result.current.results).toHaveLength(1); + }); + + it('populates results after successful search', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(result.current.results).toHaveLength(1); + expect(result.current.results[0].score).toBe(0.95); + expect(result.current.results[0].data).toEqual({ id: 1, title: 'Test Result' }); + expect(result.current.totalCount).toBe(1); + expect(result.current.queryTimeMs).toBe(20); + expect(result.current.fromCache).toBe(false); + expect(result.current.query).toBe('test'); + }); + + it('sets error on failed search', async () => { + mockFetch.mockResolvedValue({ + ok: false, + status: 404, + json: () => Promise.resolve({ code: 'INDEX_NOT_FOUND', message: 'Not found', statusCode: 404 }), + }); + + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(result.current.error).not.toBeNull(); + expect(result.current.error!.code).toBe('INDEX_NOT_FOUND'); + expect(result.current.isLoading).toBe(false); + }); + + it('clears everything on clear()', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(result.current.results).toHaveLength(1); + + act(() => { result.current.clear(); }); + + expect(result.current.results).toEqual([]); + expect(result.current.query).toBe(''); + expect(result.current.totalCount).toBe(0); + expect(result.current.error).toBeNull(); + }); + + it('re-executes search when filters change', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(mockFetch).toHaveBeenCalledTimes(1); + + await act(async () => { result.current.setFilters({ category: 'books' }); }); + await flushPromises(); + + expect(mockFetch).toHaveBeenCalledTimes(2); + const body = JSON.parse(mockFetch.mock.calls[1][1].body); + expect(body.filters).toEqual({ category: 'books' }); + }); + + it('calls onResults callback on success', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const onResults = vi.fn(); + const { result } = renderHook(() => + useVectorSearch('products', { onResults }) + ); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(onResults).toHaveBeenCalledTimes(1); + expect(onResults).toHaveBeenCalledWith(mockResponse); + }); + + it('calls onError callback on failure', async () => { + const errorResponse = { code: 'INTERNAL', message: 'Server error', statusCode: 500 }; + mockFetch.mockResolvedValue({ + ok: false, + status: 500, + json: () => Promise.resolve(errorResponse), + }); + const onError = vi.fn(); + const { result } = renderHook(() => + useVectorSearch('products', { onError }) + ); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(onError).toHaveBeenCalledTimes(1); + }); + + it('sends request to correct API endpoint', async () => { + mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); + const { result } = renderHook(() => useVectorSearch('products')); + + act(() => { result.current.search('test'); }); + await act(async () => { vi.advanceTimersByTime(300); }); + await flushPromises(); + + expect(mockFetch).toHaveBeenCalled(); + expect(mockFetch.mock.calls[0][0]).toBe('/api/vector-search/products/query'); + }); +}); diff --git a/packages/vector-search/tsconfig.json b/packages/vector-search/tsconfig.json new file mode 100644 index 00000000..c2c6364a --- /dev/null +++ b/packages/vector-search/tsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "jsx": "react-jsx", + "declaration": true, + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/*.test.ts", "**/*.test.tsx"] +} diff --git a/packages/vector-search/vitest.config.ts b/packages/vector-search/vitest.config.ts new file mode 100644 index 00000000..6ccbfd4c --- /dev/null +++ b/packages/vector-search/vitest.config.ts @@ -0,0 +1,9 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + environment: 'jsdom', + globals: true, + setupFiles: ['./vitest.setup.ts'], + }, +}); diff --git a/packages/vector-search/vitest.setup.ts b/packages/vector-search/vitest.setup.ts new file mode 100644 index 00000000..bb02c60c --- /dev/null +++ b/packages/vector-search/vitest.setup.ts @@ -0,0 +1 @@ +import '@testing-library/jest-dom/vitest'; From a34d57856d5841d37b7e6700c68f75c28296cb33 Mon Sep 17 00:00:00 2001 From: Adam Gurary Date: Tue, 24 Mar 2026 12:15:24 -0700 Subject: [PATCH 2/3] refactor: move vector-search from separate package to core plugin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move from packages/vector-search/ into packages/appkit/src/plugins/vector-search/ - Replace custom auth (ServicePrincipalTokenProvider, OboTokenExtractor) with AppKit's built-in asUser(req) and getWorkspaceClient() context - Add VectorSearchConnector using workspaceClient.apiClient.request() instead of raw fetch with manual token management - Plugin now extends Plugin base class with proper manifest.json, defaults.ts, this.route(), this.execute(), and toPlugin() factory - Remove standalone package.json, tsconfig.json, and vitest config - Register plugin and connector in index barrel exports Addresses review feedback: - Plugin lives under plugins/ folder alongside analytics, genie, files - No custom auth handling — uses AppKit's built-in mechanisms - Follows create-core-plugin patterns (manifest, defaults, connector) Signed-off-by: Adam Gurary --- packages/appkit/src/connectors/index.ts | 1 + .../src/connectors/vector-search/client.ts | 85 ++++ .../src/connectors/vector-search/index.ts | 2 + .../src/connectors/vector-search/types.ts | 39 ++ packages/appkit/src/plugins/index.ts | 1 + .../src/plugins/vector-search/defaults.ts | 7 + .../appkit/src/plugins/vector-search/index.ts | 2 + .../src/plugins/vector-search/manifest.json | 22 + .../vector-search/tests/vector-search.test.ts | 288 +++++++++++++ .../appkit/src/plugins/vector-search/types.ts | 79 ++++ .../plugins/vector-search/vector-search.ts | 363 +++++++++++++++++ packages/vector-search/README.md | 377 ------------------ packages/vector-search/package.json | 47 --- packages/vector-search/src/index.ts | 13 - .../src/plugin/VectorSearchClient.ts | 200 ---------- .../src/plugin/VectorSearchPlugin.ts | 105 ----- packages/vector-search/src/plugin/auth.ts | 48 --- packages/vector-search/src/plugin/routes.ts | 136 ------- packages/vector-search/src/plugin/types.ts | 198 --------- .../src/ui/components/SearchBox.tsx | 73 ---- .../src/ui/components/SearchLoadMore.tsx | 24 -- .../src/ui/components/SearchResultCard.tsx | 76 ---- .../src/ui/components/SearchResults.tsx | 93 ----- .../src/ui/hooks/useVectorSearch.ts | 175 -------- packages/vector-search/src/ui/index.ts | 6 - .../tests/integration/dogfood.test.ts | 97 ----- .../tests/plugin/VectorSearchClient.test.ts | 233 ----------- .../tests/plugin/VectorSearchPlugin.test.ts | 115 ------ .../vector-search/tests/plugin/auth.test.ts | 108 ----- .../vector-search/tests/plugin/routes.test.ts | 206 ---------- .../tests/ui/components/components.test.tsx | 158 -------- .../tests/ui/hooks/useVectorSearch.test.ts | 201 ---------- packages/vector-search/tsconfig.json | 16 - packages/vector-search/vitest.config.ts | 9 - packages/vector-search/vitest.setup.ts | 1 - 35 files changed, 889 insertions(+), 2715 deletions(-) create mode 100644 packages/appkit/src/connectors/vector-search/client.ts create mode 100644 packages/appkit/src/connectors/vector-search/index.ts create mode 100644 packages/appkit/src/connectors/vector-search/types.ts create mode 100644 packages/appkit/src/plugins/vector-search/defaults.ts create mode 100644 packages/appkit/src/plugins/vector-search/index.ts create mode 100644 packages/appkit/src/plugins/vector-search/manifest.json create mode 100644 packages/appkit/src/plugins/vector-search/tests/vector-search.test.ts create mode 100644 packages/appkit/src/plugins/vector-search/types.ts create mode 100644 packages/appkit/src/plugins/vector-search/vector-search.ts delete mode 100644 packages/vector-search/README.md delete mode 100644 packages/vector-search/package.json delete mode 100644 packages/vector-search/src/index.ts delete mode 100644 packages/vector-search/src/plugin/VectorSearchClient.ts delete mode 100644 packages/vector-search/src/plugin/VectorSearchPlugin.ts delete mode 100644 packages/vector-search/src/plugin/auth.ts delete mode 100644 packages/vector-search/src/plugin/routes.ts delete mode 100644 packages/vector-search/src/plugin/types.ts delete mode 100644 packages/vector-search/src/ui/components/SearchBox.tsx delete mode 100644 packages/vector-search/src/ui/components/SearchLoadMore.tsx delete mode 100644 packages/vector-search/src/ui/components/SearchResultCard.tsx delete mode 100644 packages/vector-search/src/ui/components/SearchResults.tsx delete mode 100644 packages/vector-search/src/ui/hooks/useVectorSearch.ts delete mode 100644 packages/vector-search/src/ui/index.ts delete mode 100644 packages/vector-search/tests/integration/dogfood.test.ts delete mode 100644 packages/vector-search/tests/plugin/VectorSearchClient.test.ts delete mode 100644 packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts delete mode 100644 packages/vector-search/tests/plugin/auth.test.ts delete mode 100644 packages/vector-search/tests/plugin/routes.test.ts delete mode 100644 packages/vector-search/tests/ui/components/components.test.tsx delete mode 100644 packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts delete mode 100644 packages/vector-search/tsconfig.json delete mode 100644 packages/vector-search/vitest.config.ts delete mode 100644 packages/vector-search/vitest.setup.ts diff --git a/packages/appkit/src/connectors/index.ts b/packages/appkit/src/connectors/index.ts index 41e7748c..54a24fa4 100644 --- a/packages/appkit/src/connectors/index.ts +++ b/packages/appkit/src/connectors/index.ts @@ -3,3 +3,4 @@ export * from "./genie"; export * from "./lakebase"; export * from "./lakebase-v1"; export * from "./sql-warehouse"; +export * from "./vector-search"; diff --git a/packages/appkit/src/connectors/vector-search/client.ts b/packages/appkit/src/connectors/vector-search/client.ts new file mode 100644 index 00000000..f3345061 --- /dev/null +++ b/packages/appkit/src/connectors/vector-search/client.ts @@ -0,0 +1,85 @@ +import type { WorkspaceClient } from "@databricks/sdk-experimental"; +import { createLogger } from "../../logging/logger"; +import type { + VectorSearchConnectorConfig, + VsNextPageParams, + VsQueryParams, + VsRawResponse, +} from "./types"; + +const logger = createLogger("connectors:vector-search"); + +export class VectorSearchConnector { + private readonly config: Required; + + constructor(config: VectorSearchConnectorConfig = {}) { + this.config = { + timeout: config.timeout ?? 30_000, + }; + } + + async query( + workspaceClient: WorkspaceClient, + params: VsQueryParams, + signal?: AbortSignal, + ): Promise { + const body: Record = { + columns: params.columns, + num_results: params.numResults, + query_type: params.queryType.toUpperCase(), + debug_level: 1, + }; + + if (params.queryText) body.query_text = params.queryText; + if (params.queryVector) body.query_vector = params.queryVector; + if (params.filters && Object.keys(params.filters).length > 0) { + body.filters = params.filters; + } + if (params.reranker) { + body.reranker = { + model: "databricks_reranker", + parameters: { columns_to_rerank: params.reranker.columnsToRerank }, + }; + } + + logger.debug( + "Querying VS index %s (type=%s, num_results=%d)", + params.indexName, + params.queryType, + params.numResults, + ); + + return (await workspaceClient.apiClient.request({ + method: "POST", + path: `/api/2.0/vector-search/indexes/${params.indexName}/query`, + body, + headers: new Headers({ "Content-Type": "application/json" }), + raw: false, + query: {}, + })) as VsRawResponse; + } + + async queryNextPage( + workspaceClient: WorkspaceClient, + params: VsNextPageParams, + signal?: AbortSignal, + ): Promise { + logger.debug( + "Fetching next page for index %s (endpoint=%s)", + params.indexName, + params.endpointName, + ); + + return (await workspaceClient.apiClient.request({ + method: "POST", + path: `/api/2.0/vector-search/indexes/${params.indexName}/query-next-page`, + body: { + endpoint_name: params.endpointName, + page_token: params.pageToken, + }, + headers: new Headers({ "Content-Type": "application/json" }), + raw: false, + query: {}, + })) as VsRawResponse; + } +} diff --git a/packages/appkit/src/connectors/vector-search/index.ts b/packages/appkit/src/connectors/vector-search/index.ts new file mode 100644 index 00000000..d2ec2302 --- /dev/null +++ b/packages/appkit/src/connectors/vector-search/index.ts @@ -0,0 +1,2 @@ +export * from "./client"; +export * from "./types"; diff --git a/packages/appkit/src/connectors/vector-search/types.ts b/packages/appkit/src/connectors/vector-search/types.ts new file mode 100644 index 00000000..8e9f6c39 --- /dev/null +++ b/packages/appkit/src/connectors/vector-search/types.ts @@ -0,0 +1,39 @@ +export interface VectorSearchConnectorConfig { + timeout?: number; +} + +export interface VsQueryParams { + indexName: string; + queryText?: string; + queryVector?: number[]; + columns: string[]; + numResults: number; + queryType: "ann" | "hybrid" | "full_text"; + filters?: Record; + reranker?: { columnsToRerank: string[] }; +} + +export interface VsNextPageParams { + indexName: string; + endpointName: string; + pageToken: string; +} + +export interface VsRawResponse { + manifest: { + column_count: number; + columns: Array<{ name: string; type?: string }>; + }; + result: { + row_count: number; + data_array: unknown[][]; + }; + next_page_token?: string | null; + debug_info?: { + response_time?: number; + ann_time?: number; + embedding_gen_time?: number; + latency_ms?: number; + [key: string]: unknown; + }; +} diff --git a/packages/appkit/src/plugins/index.ts b/packages/appkit/src/plugins/index.ts index 7caa040f..9a1819b3 100644 --- a/packages/appkit/src/plugins/index.ts +++ b/packages/appkit/src/plugins/index.ts @@ -3,3 +3,4 @@ export * from "./files"; export * from "./genie"; export * from "./lakebase"; export * from "./server"; +export * from "./vector-search"; diff --git a/packages/appkit/src/plugins/vector-search/defaults.ts b/packages/appkit/src/plugins/vector-search/defaults.ts new file mode 100644 index 00000000..c02b6e80 --- /dev/null +++ b/packages/appkit/src/plugins/vector-search/defaults.ts @@ -0,0 +1,7 @@ +import type { PluginExecuteConfig } from "shared"; + +export const vectorSearchDefaults: PluginExecuteConfig = { + cache: { enabled: false }, + retry: { enabled: true, initialDelay: 1000, attempts: 3 }, + timeout: 30_000, +}; diff --git a/packages/appkit/src/plugins/vector-search/index.ts b/packages/appkit/src/plugins/vector-search/index.ts new file mode 100644 index 00000000..9052cb03 --- /dev/null +++ b/packages/appkit/src/plugins/vector-search/index.ts @@ -0,0 +1,2 @@ +export * from "./vector-search"; +export * from "./types"; diff --git a/packages/appkit/src/plugins/vector-search/manifest.json b/packages/appkit/src/plugins/vector-search/manifest.json new file mode 100644 index 00000000..861876ff --- /dev/null +++ b/packages/appkit/src/plugins/vector-search/manifest.json @@ -0,0 +1,22 @@ +{ + "$schema": "https://databricks.github.io/appkit/schemas/plugin-manifest.schema.json", + "name": "vector-search", + "displayName": "Vector Search Plugin", + "description": "Query Databricks Vector Search indexes with built-in hybrid search, reranking, and pagination", + "resources": { + "required": [], + "optional": [] + }, + "config": { + "schema": { + "type": "object", + "properties": { + "timeout": { + "type": "number", + "default": 30000, + "description": "Query execution timeout in milliseconds" + } + } + } + } +} diff --git a/packages/appkit/src/plugins/vector-search/tests/vector-search.test.ts b/packages/appkit/src/plugins/vector-search/tests/vector-search.test.ts new file mode 100644 index 00000000..c320ccc2 --- /dev/null +++ b/packages/appkit/src/plugins/vector-search/tests/vector-search.test.ts @@ -0,0 +1,288 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +vi.mock("../../../context", () => ({ + getWorkspaceClient: vi.fn(() => mockWorkspaceClient), + getCurrentUserId: vi.fn(() => "test-user"), +})); + +vi.mock("../../../logging/logger", () => ({ + createLogger: () => ({ + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + event: () => ({ + setComponent: vi.fn().mockReturnThis(), + setContext: vi.fn().mockReturnThis(), + }), + }), +})); + +vi.mock("../../../telemetry", () => ({ + TelemetryManager: { + getProvider: () => ({ + getTracer: () => ({}), + getMeter: () => ({ + createCounter: () => ({ add: vi.fn() }), + createHistogram: () => ({ record: vi.fn() }), + }), + }), + }, + normalizeTelemetryOptions: () => ({ traces: false, metrics: false }), +})); + +vi.mock("../../../cache", () => ({ + CacheManager: { + getInstanceSync: () => ({ get: vi.fn(), set: vi.fn() }), + }, +})); + +vi.mock("../../../app", () => ({ + AppManager: vi.fn().mockImplementation(() => ({})), +})); + +vi.mock("../../../plugin/dev-reader", () => ({ + DevFileReader: { + getInstance: () => ({}), + }, +})); + +vi.mock("../../../stream", () => ({ + StreamManager: vi.fn().mockImplementation(() => ({ + abortAll: vi.fn(), + stream: vi.fn(), + })), +})); + +const validVsResponse = { + manifest: { + column_count: 3, + columns: [{ name: "id" }, { name: "title" }, { name: "score" }], + }, + result: { + row_count: 2, + data_array: [ + [1, "ML Guide", 0.95], + [2, "AI Primer", 0.87], + ], + }, + next_page_token: null, + debug_info: { response_time: 35 }, +}; + +const mockRequest = vi.fn().mockResolvedValue(validVsResponse); +const mockWorkspaceClient = { + apiClient: { request: mockRequest }, +}; + +import { VectorSearchPlugin } from "../vector-search"; + +describe("VectorSearchPlugin", () => { + beforeEach(() => { + mockRequest.mockClear(); + mockRequest.mockResolvedValue(validVsResponse); + }); + + describe("setup()", () => { + it("throws if any index is missing indexName", async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: "", columns: ["id"] }, + }, + }); + await expect(plugin.setup()).rejects.toThrow("indexName"); + }); + + it("throws if any index is missing columns", async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: "cat.sch.idx", columns: [] }, + }, + }); + await expect(plugin.setup()).rejects.toThrow("columns"); + }); + + it("throws if pagination enabled but no endpointName", async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { + indexName: "cat.sch.idx", + columns: ["id"], + pagination: true, + }, + }, + }); + await expect(plugin.setup()).rejects.toThrow("endpointName"); + }); + + it("succeeds with valid config", async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + products: { + indexName: "cat.sch.products_idx", + columns: ["id", "name", "description"], + queryType: "hybrid", + numResults: 20, + }, + }, + }); + await expect(plugin.setup()).resolves.not.toThrow(); + }); + }); + + describe("manifest", () => { + it("has correct name", () => { + expect(VectorSearchPlugin.manifest.name).toBe("vector-search"); + }); + }); + + describe("exports()", () => { + it("returns object with query function", () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: "cat.sch.idx", columns: ["id"] }, + }, + }); + const exports = plugin.exports(); + expect(exports).toHaveProperty("query"); + expect(typeof exports.query).toBe("function"); + }); + }); + + describe("query()", () => { + it("calls VS API via connector and parses response", async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + products: { + indexName: "cat.sch.products", + columns: ["id", "title"], + queryType: "hybrid", + }, + }, + }); + await plugin.setup(); + + const result = await plugin.query("products", { + queryText: "machine learning", + }); + + expect(result.results).toHaveLength(2); + expect(result.results[0].score).toBe(0.95); + expect(result.results[0].data).toEqual({ id: 1, title: "ML Guide" }); + expect(result.results[1].score).toBe(0.87); + expect(result.totalCount).toBe(2); + expect(result.queryTimeMs).toBe(35); + }); + + it("constructs correct API request", async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { + indexName: "cat.sch.idx", + columns: ["id", "title"], + queryType: "hybrid", + numResults: 10, + }, + }, + }); + await plugin.setup(); + await plugin.query("test", { queryText: "test query" }); + + expect(mockRequest).toHaveBeenCalledWith( + expect.objectContaining({ + method: "POST", + path: "/api/2.0/vector-search/indexes/cat.sch.idx/query", + }), + ); + + const callBody = mockRequest.mock.calls[0][0].body; + expect(callBody.query_text).toBe("test query"); + expect(callBody.query_type).toBe("HYBRID"); + expect(callBody.num_results).toBe(10); + expect(callBody.columns).toEqual(["id", "title"]); + }); + + it("throws INDEX_NOT_FOUND for unknown alias", async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: "cat.sch.idx", columns: ["id"] }, + }, + }); + await plugin.setup(); + + await expect(plugin.query("unknown", { queryText: "test" })).rejects + .toMatchObject({ code: "INDEX_NOT_FOUND" }); + }); + + it("includes filters when provided", async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { + indexName: "cat.sch.idx", + columns: ["id", "title"], + }, + }, + }); + await plugin.setup(); + await plugin.query("test", { + queryText: "test", + filters: { category: ["books"] }, + }); + + const callBody = mockRequest.mock.calls[0][0].body; + expect(callBody.filters).toEqual({ category: ["books"] }); + }); + + it("includes reranker config when enabled on index", async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { + indexName: "cat.sch.idx", + columns: ["id", "title", "desc"], + reranker: true, + }, + }, + }); + await plugin.setup(); + await plugin.query("test", { queryText: "test" }); + + const callBody = mockRequest.mock.calls[0][0].body; + expect(callBody.reranker.model).toBe("databricks_reranker"); + expect(callBody.reranker.parameters.columns_to_rerank).toEqual([ + "title", + "desc", + ]); + }); + + it("calls embeddingFn for self-managed indexes", async () => { + const mockEmbeddingFn = vi.fn().mockResolvedValue([0.1, 0.2, 0.3]); + const plugin = new VectorSearchPlugin({ + indexes: { + test: { + indexName: "cat.sch.idx", + columns: ["id", "title"], + embeddingFn: mockEmbeddingFn, + }, + }, + }); + await plugin.setup(); + await plugin.query("test", { queryText: "test" }); + + expect(mockEmbeddingFn).toHaveBeenCalledWith("test"); + const callBody = mockRequest.mock.calls[0][0].body; + expect(callBody.query_vector).toEqual([0.1, 0.2, 0.3]); + expect(callBody.query_text).toBeUndefined(); + }); + }); + + describe("shutdown()", () => { + it("does not throw", async () => { + const plugin = new VectorSearchPlugin({ + indexes: { + test: { indexName: "cat.sch.idx", columns: ["id"] }, + }, + }); + await expect(plugin.shutdown()).resolves.not.toThrow(); + }); + }); +}); diff --git a/packages/appkit/src/plugins/vector-search/types.ts b/packages/appkit/src/plugins/vector-search/types.ts new file mode 100644 index 00000000..77e1287d --- /dev/null +++ b/packages/appkit/src/plugins/vector-search/types.ts @@ -0,0 +1,79 @@ +import type { BasePluginConfig } from "shared"; + +export interface IVectorSearchConfig extends BasePluginConfig { + timeout?: number; + indexes: Record; +} + +export interface IndexConfig { + /** Three-level UC name: catalog.schema.index_name */ + indexName: string; + /** Columns to return in results */ + columns: string[]; + /** Default search mode */ + queryType?: "ann" | "hybrid" | "full_text"; + /** Max results per query */ + numResults?: number; + /** Enable built-in reranker. Pass true to rerank all non-id columns, or an object for fine control. */ + reranker?: boolean | RerankerConfig; + /** Auth mode — "service-principal" uses the app's SP, "on-behalf-of-user" proxies the logged-in user's token */ + auth?: "service-principal" | "on-behalf-of-user"; + /** Enable cursor pagination */ + pagination?: boolean; + /** VS endpoint name (required when pagination is true) */ + endpointName?: string; + /** + * For self-managed embedding indexes: converts query text to an embedding vector. + * When provided, the plugin calls this function and sends query_vector to VS. + * When omitted, query_text is sent and VS computes embeddings server-side (managed mode). + */ + embeddingFn?: (text: string) => Promise; +} + +export interface RerankerConfig { + columnsToRerank: string[]; +} + +export type SearchFilters = Record< + string, + string | number | boolean | (string | number)[] +>; + +export interface SearchRequest { + queryText?: string; + queryVector?: number[]; + columns?: string[]; + numResults?: number; + queryType?: "ann" | "hybrid" | "full_text"; + filters?: SearchFilters; + reranker?: boolean; +} + +export interface SearchResponse< + T extends Record = Record, +> { + results: SearchResult[]; + totalCount: number; + queryTimeMs: number; + queryType: "ann" | "hybrid" | "full_text"; + fromCache: boolean; + nextPageToken: string | null; +} + +export interface SearchResult< + T extends Record = Record, +> { + score: number; + data: T; +} + +export interface SearchError { + code: + | "UNAUTHORIZED" + | "INDEX_NOT_FOUND" + | "INVALID_QUERY" + | "RATE_LIMITED" + | "INTERNAL"; + message: string; + statusCode: number; +} diff --git a/packages/appkit/src/plugins/vector-search/vector-search.ts b/packages/appkit/src/plugins/vector-search/vector-search.ts new file mode 100644 index 00000000..b3b20a18 --- /dev/null +++ b/packages/appkit/src/plugins/vector-search/vector-search.ts @@ -0,0 +1,363 @@ +import type express from "express"; +import type { IAppRouter } from "shared"; +import { VectorSearchConnector } from "../../connectors"; +import { getWorkspaceClient } from "../../context"; +import { createLogger } from "../../logging/logger"; +import { Plugin, toPlugin } from "../../plugin"; +import type { PluginManifest } from "../../registry"; +import type { VsRawResponse } from "../../connectors/vector-search/types"; +import { vectorSearchDefaults } from "./defaults"; +import manifest from "./manifest.json"; +import type { + IVectorSearchConfig, + IndexConfig, + SearchFilters, + SearchRequest, + SearchResponse, +} from "./types"; + +const logger = createLogger("vector-search"); + +export class VectorSearchPlugin extends Plugin { + static manifest = manifest as PluginManifest<"vector-search">; + + protected static description = + "Query Databricks Vector Search indexes with hybrid search, reranking, and pagination"; + protected declare config: IVectorSearchConfig; + + private connector: VectorSearchConnector; + + constructor(config: IVectorSearchConfig) { + super(config); + this.config = config; + this.connector = new VectorSearchConnector({ timeout: config.timeout }); + } + + async setup(): Promise { + for (const [alias, idx] of Object.entries(this.config.indexes)) { + if (!idx.indexName) { + throw new Error( + `Index "${alias}" is missing required field "indexName"`, + ); + } + if (!idx.columns || idx.columns.length === 0) { + throw new Error( + `Index "${alias}" is missing required field "columns"`, + ); + } + if (idx.pagination && !idx.endpointName) { + throw new Error( + `Index "${alias}" has pagination enabled but is missing "endpointName"`, + ); + } + } + logger.debug( + "Vector Search plugin configured with %d index(es)", + Object.keys(this.config.indexes).length, + ); + } + + injectRoutes(router: IAppRouter) { + this.route(router, { + name: "query", + method: "post", + path: "/:alias/query", + handler: async (req: express.Request, res: express.Response) => { + const indexConfig = this._resolveIndex(req.params.alias); + if (!indexConfig) { + res.status(404).json({ + code: "INDEX_NOT_FOUND", + message: `No index configured with alias "${req.params.alias}"`, + statusCode: 404, + }); + return; + } + + if (indexConfig.auth === "on-behalf-of-user") { + await this.asUser(req)._handleQuery(req, res, indexConfig); + } else { + await this._handleQuery(req, res, indexConfig); + } + }, + }); + + this.route(router, { + name: "queryNextPage", + method: "post", + path: "/:alias/next-page", + handler: async (req: express.Request, res: express.Response) => { + const indexConfig = this._resolveIndex(req.params.alias); + if (!indexConfig) { + res.status(404).json({ + code: "INDEX_NOT_FOUND", + message: `No index configured with alias "${req.params.alias}"`, + statusCode: 404, + }); + return; + } + + if (indexConfig.auth === "on-behalf-of-user") { + await this.asUser(req)._handleNextPage(req, res, indexConfig); + } else { + await this._handleNextPage(req, res, indexConfig); + } + }, + }); + + this.route(router, { + name: "getConfig", + method: "get", + path: "/:alias/config", + handler: (req: express.Request, res: express.Response) => { + const { alias } = req.params; + const indexConfig = this._resolveIndex(alias); + if (!indexConfig) { + res.status(404).json({ + code: "INDEX_NOT_FOUND", + message: `No index configured with alias "${alias}"`, + statusCode: 404, + }); + return; + } + res.json({ + alias, + columns: indexConfig.columns, + queryType: indexConfig.queryType ?? "hybrid", + numResults: indexConfig.numResults ?? 20, + reranker: !!indexConfig.reranker, + pagination: !!indexConfig.pagination, + }); + }, + }); + } + + async _handleQuery( + req: express.Request, + res: express.Response, + indexConfig: IndexConfig, + ): Promise { + const body: SearchRequest = req.body; + + if (!body.queryText && !body.queryVector) { + res.status(400).json({ + code: "INVALID_QUERY", + message: "queryText or queryVector is required", + statusCode: 400, + }); + return; + } + + const event = logger.event(req); + event + ?.setComponent("vector-search", "query") + .setContext("vector-search", { + index: indexConfig.indexName, + query_type: body.queryType ?? indexConfig.queryType ?? "hybrid", + plugin: this.name, + }); + + const queryType = body.queryType ?? indexConfig.queryType ?? "hybrid"; + let queryText = body.queryText; + let queryVector = body.queryVector; + + if (indexConfig.embeddingFn && queryText && !queryVector) { + queryVector = await indexConfig.embeddingFn(queryText); + queryText = undefined; + } + + const rerankerConfig = this._resolveReranker( + body.reranker, + indexConfig, + body.columns ?? indexConfig.columns, + ); + + const result = await this.execute( + async (signal) => { + const workspaceClient = getWorkspaceClient(); + const raw = await this.connector.query( + workspaceClient, + { + indexName: indexConfig.indexName, + queryText, + queryVector, + columns: body.columns ?? indexConfig.columns, + numResults: body.numResults ?? indexConfig.numResults ?? 20, + queryType, + filters: body.filters, + reranker: rerankerConfig, + }, + signal, + ); + return this._parseResponse(raw, queryType); + }, + { default: vectorSearchDefaults }, + ); + + if (!result) { + res.status(500).json({ + code: "INTERNAL", + message: "Query execution failed", + statusCode: 500, + }); + return; + } + + res.json(result); + } + + async _handleNextPage( + req: express.Request, + res: express.Response, + indexConfig: IndexConfig, + ): Promise { + if (!indexConfig.pagination) { + res.status(400).json({ + code: "INVALID_QUERY", + message: `Pagination is not enabled for index "${req.params.alias}"`, + statusCode: 400, + }); + return; + } + + const { pageToken } = req.body; + if (!pageToken) { + res.status(400).json({ + code: "INVALID_QUERY", + message: "pageToken is required", + statusCode: 400, + }); + return; + } + + const result = await this.execute( + async (signal) => { + const workspaceClient = getWorkspaceClient(); + const raw = await this.connector.queryNextPage( + workspaceClient, + { + indexName: indexConfig.indexName, + endpointName: indexConfig.endpointName!, + pageToken, + }, + signal, + ); + return this._parseResponse(raw, "hybrid"); + }, + { default: vectorSearchDefaults }, + ); + + if (!result) { + res.status(500).json({ + code: "INTERNAL", + message: "Next-page query failed", + statusCode: 500, + }); + return; + } + + res.json(result); + } + + /** + * Programmatic query API — available as `appkit.vectorSearch.query()`. + * When called through `asUser(req)`, executes with the user's credentials. + */ + async query(alias: string, request: SearchRequest): Promise { + const indexConfig = this._resolveIndex(alias); + if (!indexConfig) { + throw { + code: "INDEX_NOT_FOUND" as const, + message: `No index configured with alias "${alias}"`, + statusCode: 404, + }; + } + + const queryType = request.queryType ?? indexConfig.queryType ?? "hybrid"; + let queryText = request.queryText; + let queryVector = request.queryVector; + + if (indexConfig.embeddingFn && queryText && !queryVector) { + queryVector = await indexConfig.embeddingFn(queryText); + queryText = undefined; + } + + const rerankerConfig = this._resolveReranker( + request.reranker, + indexConfig, + request.columns ?? indexConfig.columns, + ); + + const workspaceClient = getWorkspaceClient(); + const raw = await this.connector.query(workspaceClient, { + indexName: indexConfig.indexName, + queryText, + queryVector, + columns: request.columns ?? indexConfig.columns, + numResults: request.numResults ?? indexConfig.numResults ?? 20, + queryType, + filters: request.filters, + reranker: rerankerConfig, + }); + + return this._parseResponse(raw, queryType); + } + + async shutdown(): Promise { + this.streamManager.abortAll(); + } + + exports() { + return { + query: this.query.bind(this), + }; + } + + private _resolveIndex(alias: string): IndexConfig | undefined { + return this.config.indexes[alias]; + } + + private _resolveReranker( + requestReranker: boolean | undefined, + indexConfig: IndexConfig, + columns: string[], + ): { columnsToRerank: string[] } | undefined { + const shouldRerank = requestReranker ?? indexConfig.reranker; + if (!shouldRerank) return undefined; + + if (typeof indexConfig.reranker === "object") { + return indexConfig.reranker; + } + return { columnsToRerank: columns.filter((c) => c !== "id") }; + } + + private _parseResponse( + raw: VsRawResponse, + queryType: "ann" | "hybrid" | "full_text", + ): SearchResponse { + const columnNames = raw.manifest.columns.map((c) => c.name); + const scoreIndex = columnNames.indexOf("score"); + + const results = raw.result.data_array.map((row) => { + const data: Record = {}; + for (let i = 0; i < columnNames.length; i++) { + if (columnNames[i] !== "score") data[columnNames[i]] = row[i]; + } + return { + score: scoreIndex >= 0 ? (row[scoreIndex] as number) : 0, + data, + }; + }); + + return { + results, + totalCount: raw.result.row_count, + queryTimeMs: + raw.debug_info?.response_time ?? raw.debug_info?.latency_ms ?? 0, + queryType, + fromCache: false, + nextPageToken: raw.next_page_token ?? null, + }; + } +} + +export const vectorSearch = toPlugin(VectorSearchPlugin); diff --git a/packages/vector-search/README.md b/packages/vector-search/README.md deleted file mode 100644 index b831fc00..00000000 --- a/packages/vector-search/README.md +++ /dev/null @@ -1,377 +0,0 @@ -# @databricks/appkit-vector-search - -Appkit plugin that adds Databricks Vector Search to your app — backend routes, React hook, and UI components in one package. - -## Quick Start - -**Backend** (`app.ts`): - -```typescript -import { createApp } from '@databricks/appkit'; -import { VectorSearchPlugin } from '@databricks/appkit-vector-search'; - -createApp({ - plugins: [ - new VectorSearchPlugin({ - indexes: { - products: { - indexName: 'catalog.schema.product_index', - columns: ['id', 'name', 'description', 'price', 'category'], - }, - }, - }), - ], -}); -``` - -**Frontend** (`ProductSearch.tsx`): - -```tsx -import { useVectorSearch, SearchBox, SearchResults } from '@databricks/appkit-vector-search'; - -function ProductSearch() { - const vs = useVectorSearch<{ id: string; name: string; description: string; price: number; category: string }>('products'); - - return ( -
- - -
- ); -} -``` - -That's it — hybrid search with debouncing, loading states, keyword highlighting, and error handling. - -## Installation - -```bash -npm install @databricks/appkit-vector-search -``` - -Peer dependencies: `react ^18.x`, `@databricks/appkit ^0.x`. - -## Backend Setup - -Register the plugin with `createApp`. Each key in `indexes` is an **alias** used by the frontend hook and API routes. - -```typescript -new VectorSearchPlugin({ - indexes: { - products: { - indexName: 'catalog.schema.product_index', // required — three-level UC name - columns: ['id', 'name', 'description'], // required — columns to return - queryType: 'hybrid', // 'ann' | 'hybrid' | 'full_text' (default: 'hybrid') - numResults: 20, // max results per query (default: 20) - reranker: false, // enable Databricks reranker (default: false) - auth: 'service-principal', // 'service-principal' | 'on-behalf-of-user' (default: 'service-principal') - cache: { enabled: false }, // see Caching section - pagination: false, // see Pagination section - endpointName: 'my-endpoint', // required when pagination: true - embeddingFn: undefined, // see Self-Managed Embeddings section - }, - }, -}) -``` - -### IndexConfig Reference - -| Property | Type | Default | Description | -|----------|------|---------|-------------| -| `indexName` | `string` | *required* | Three-level UC name (`catalog.schema.index`) | -| `columns` | `string[]` | *required* | Columns to return in results | -| `queryType` | `'ann' \| 'hybrid' \| 'full_text'` | `'hybrid'` | Default search mode | -| `numResults` | `number` | `20` | Max results per query | -| `reranker` | `boolean \| { columnsToRerank: string[] }` | `false` | Enable built-in reranker | -| `auth` | `'service-principal' \| 'on-behalf-of-user'` | `'service-principal'` | Auth mode | -| `cache` | `CacheConfig` | `undefined` | Optional result caching | -| `pagination` | `boolean` | `false` | Enable cursor pagination | -| `endpointName` | `string` | `undefined` | VS endpoint name (required if `pagination: true`) | -| `embeddingFn` | `(text: string) => Promise` | `undefined` | Custom embedding function for self-managed indexes | - -## Frontend - -### `useVectorSearch` Hook - -```typescript -const vs = useVectorSearch('products', { - debounceMs: 300, // debounce delay (default: 300) - numResults: 10, // override server default - queryType: 'ann', // override server default - reranker: true, // override server default - minQueryLength: 2, // minimum chars before searching (default: 1) - initialFilters: { category: 'electronics' }, - onResults: (response) => console.log(response), - onError: (error) => console.error(error), -}); -``` - -**Returns:** - -| Property | Type | Description | -|----------|------|-------------| -| `search` | `(query: string) => void` | Execute a search (debounced) | -| `results` | `SearchResult[]` | Current results (each has `.score` and `.data`) | -| `isLoading` | `boolean` | Whether a search is in flight | -| `error` | `SearchError \| null` | Error from last search | -| `query` | `string` | Current query text | -| `totalCount` | `number` | Total result count | -| `queryTimeMs` | `number` | Query execution time in ms | -| `fromCache` | `boolean` | Whether results came from cache | -| `setFilters` | `(filters) => void` | Set filters and re-execute search | -| `activeFilters` | `SearchFilters` | Current active filters | -| `clear` | `() => void` | Clear query, results, and filters | -| `hasMore` | `boolean` | More results available (pagination) | -| `loadMore` | `() => void` | Fetch next page, append to results | -| `isLoadingMore` | `boolean` | Whether loadMore is in flight | - -The hook handles debouncing, request cancellation (AbortController), filter reactivity, and cleanup on unmount. - -### Components - -#### `` - -```tsx - -``` - -Includes search icon, clear button (appears when input has value), Escape key to clear, and loading spinner. - -#### `` - -```tsx - ...} // fully custom result rendering (overrides default card) - className="mt-4" -/> -``` - -States: loading skeleton (3 animated cards), error banner, empty message, results with count + timing. - -#### `` - -Used internally by `SearchResults`, but can be used standalone: - -```tsx - -``` - -#### `` - -```tsx - -``` - -### Filters - -Use `setFilters` from the hook to apply VS filter syntax: - -```typescript -// IN list -vs.setFilters({ category: ['electronics', 'books'] }); - -// Comparison operators -vs.setFilters({ 'price >=': 10, 'price <=': 100 }); - -// NOT -vs.setFilters({ 'title NOT': 'test' }); - -// LIKE -vs.setFilters({ 'name LIKE': 'data%' }); - -// OR across columns -vs.setFilters({ 'color1 OR color2': ['red', 'blue'] }); -``` - -Calling `setFilters` immediately re-executes the current search with the new filters. - -## Auth - -### Service Principal (default) - -The plugin uses `DATABRICKS_CLIENT_ID` and `DATABRICKS_CLIENT_SECRET` from the environment. When deployed to Databricks Apps, these are set automatically. OAuth tokens are cached and refreshed with a 2-minute buffer before expiry. - -No configuration needed — this is the default. - -### On-Behalf-of-User - -For indexes with row-level security or Unity Catalog permissions: - -```typescript -indexes: { - docs: { - indexName: 'catalog.schema.docs_index', - columns: ['id', 'title', 'content'], - auth: 'on-behalf-of-user', // uses the logged-in user's token - }, -} -``` - -The plugin extracts the user's OAuth token from the `x-forwarded-access-token` header (set by Databricks Apps proxy). Queries run with the user's identity and UC permissions. - -## Self-Managed Embeddings - -For indexes that don't use Databricks-managed embeddings, provide an `embeddingFn` that converts query text to a vector: - -```typescript -indexes: { - custom: { - indexName: 'catalog.schema.custom_index', - columns: ['id', 'title', 'content'], - queryType: 'ann', - embeddingFn: async (text) => { - const resp = await fetch( - `https://${process.env.DATABRICKS_HOST}/serving-endpoints/my-embedding-model/invocations`, - { - method: 'POST', - headers: { 'Authorization': `Bearer ${token}`, 'Content-Type': 'application/json' }, - body: JSON.stringify({ input: [text] }), - }, - ); - const data = await resp.json(); - return data.data[0].embedding; - }, - }, -} -``` - -When `embeddingFn` is set, the plugin calls it to convert `queryText` into `queryVector` before sending to VS. The frontend hook works identically — users type text, the backend handles the conversion. - -If omitted, the plugin sends `queryText` directly and VS computes embeddings server-side (managed mode). - -## Caching - -Optional LRU cache for search results. Off by default (freeform search has low cache hit rates). - -```typescript -indexes: { - products: { - indexName: 'catalog.schema.product_index', - columns: ['id', 'name', 'description'], - cache: { - enabled: true, - ttlSeconds: 120, // time-to-live per entry (default: 60) - maxEntries: 1000, // max cached queries (default: 1000) - }, - }, -} -``` - -Cached responses include `fromCache: true` in the response. The hook exposes this via `vs.fromCache`. - -## Pagination - -Cursor-based pagination for large result sets. Off by default — VS typically returns results in 20-40ms, so most apps don't need it. - -```typescript -indexes: { - products: { - indexName: 'catalog.schema.product_index', - columns: ['id', 'name', 'description'], - pagination: true, - endpointName: 'my-vs-endpoint', // required when pagination is enabled - }, -} -``` - -Frontend usage: - -```tsx -const vs = useVectorSearch('products'); - -return ( - <> - - - - -); -``` - -`loadMore` fetches the next page and appends results to the existing array. - -## API Reference - -The plugin registers these Express routes automatically: - -| Method | Path | Body | Description | -|--------|------|------|-------------| -| `POST` | `/api/vector-search/:alias/query` | `SearchRequest` | Execute a search | -| `POST` | `/api/vector-search/:alias/next-page` | `{ pageToken: string }` | Fetch next page (requires `pagination: true`) | -| `GET` | `/api/vector-search/:alias/config` | — | Returns index config (columns, queryType, numResults, etc.) | - -### SearchRequest Body - -```json -{ - "queryText": "wireless headphones", - "filters": { "category": ["electronics"] }, - "numResults": 10, - "queryType": "hybrid", - "reranker": true -} -``` - -### SearchResponse - -```json -{ - "results": [ - { "score": 0.92, "data": { "id": "1", "name": "...", "description": "..." } } - ], - "totalCount": 47, - "queryTimeMs": 35, - "queryType": "hybrid", - "fromCache": false, - "nextPageToken": null -} -``` - -### Error Response - -```json -{ - "code": "INVALID_QUERY", - "message": "queryText or queryVector is required", - "statusCode": 400 -} -``` - -Error codes: `UNAUTHORIZED`, `INDEX_NOT_FOUND`, `INVALID_QUERY`, `RATE_LIMITED`, `INTERNAL`. diff --git a/packages/vector-search/package.json b/packages/vector-search/package.json deleted file mode 100644 index efdeb804..00000000 --- a/packages/vector-search/package.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "name": "@databricks/appkit-vector-search", - "version": "0.1.0", - "description": "Vector Search plugin for Databricks Appkit", - "main": "dist/index.js", - "types": "dist/index.d.ts", - "exports": { - ".": { - "import": "./dist/index.js", - "types": "./dist/index.d.ts" - }, - "./ui": { - "import": "./dist/ui/index.js", - "types": "./dist/ui/index.d.ts" - } - }, - "scripts": { - "build": "tsc", - "dev": "tsc --watch", - "test": "vitest run", - "test:watch": "vitest" - }, - "peerDependencies": { - "@databricks/appkit": "^0.1.0", - "react": "^18.0.0" - }, - "dependencies": { - "@radix-ui/react-icons": "^1.3.0", - "@radix-ui/react-select": "^2.1.0", - "@radix-ui/react-slider": "^1.2.0", - "express": "^5.2.1" - }, - "devDependencies": { - "@testing-library/jest-dom": "^6.4.0", - "@testing-library/react": "^15.0.0", - "@types/express": "^4.17.0", - "@types/react": "^18.3.0", - "@types/react-dom": "^19.2.3", - "@types/supertest": "^7.2.0", - "jsdom": "^24.0.0", - "react": "^19.2.4", - "react-dom": "^19.2.4", - "supertest": "^7.2.2", - "typescript": "^5.4.0", - "vitest": "^1.6.0" - } -} diff --git a/packages/vector-search/src/index.ts b/packages/vector-search/src/index.ts deleted file mode 100644 index dbe386e9..00000000 --- a/packages/vector-search/src/index.ts +++ /dev/null @@ -1,13 +0,0 @@ -export { VectorSearchPlugin } from './plugin/VectorSearchPlugin'; -export { createVectorSearchRouter } from './plugin/routes'; -export type { - VectorSearchPluginConfig, - IndexConfig, - RerankerConfig, - CacheConfig, - SearchRequest, - SearchResponse, - SearchResult, - SearchFilters, - SearchError, -} from './plugin/types'; diff --git a/packages/vector-search/src/plugin/VectorSearchClient.ts b/packages/vector-search/src/plugin/VectorSearchClient.ts deleted file mode 100644 index d80ceb86..00000000 --- a/packages/vector-search/src/plugin/VectorSearchClient.ts +++ /dev/null @@ -1,200 +0,0 @@ -import type { SearchResponse, SearchFilters, SearchError, RerankerConfig, TokenProvider, VsRawResponse } from './types'; - -export class VectorSearchClient { - private host: string; - private tokenProvider: TokenProvider; - - constructor(config: { host: string; tokenProvider: TokenProvider }) { - this.host = config.host; - this.tokenProvider = config.tokenProvider; - } - - async query(params: { - indexName: string; - queryText?: string; - queryVector?: number[]; - columns: string[]; - numResults: number; - queryType: 'ann' | 'hybrid' | 'full_text'; - filters?: SearchFilters; - reranker?: boolean | RerankerConfig; - userToken?: string; - embeddingFn?: (text: string) => Promise; - }): Promise { - const token = params.userToken ?? await this.tokenProvider.getToken(); - - // Resolve query: managed (query_text) vs self-managed (query_vector) - let queryText = params.queryText; - let queryVector = params.queryVector; - - if (params.embeddingFn && queryText && !queryVector) { - queryVector = await params.embeddingFn(queryText); - queryText = undefined; - } - - if (!queryText && !queryVector) { - throw { - code: 'INVALID_QUERY' as const, - message: 'Either queryText or queryVector is required', - statusCode: 400, - }; - } - - const body: Record = { - columns: params.columns, - num_results: params.numResults, - query_type: params.queryType.toUpperCase(), - debug_level: 1, - }; - - if (queryText) body.query_text = queryText; - if (queryVector) body.query_vector = queryVector; - - if (params.filters && Object.keys(params.filters).length > 0) { - body.filters = params.filters; - } - - if (params.reranker) { - const columnsToRerank = typeof params.reranker === 'object' - ? params.reranker.columnsToRerank - : params.columns.filter(c => c !== 'id'); - body.reranker = { - model: 'databricks_reranker', - parameters: { columns_to_rerank: columnsToRerank }, - }; - } - - const response = await this.fetchWithRetry( - `https://${this.host}/api/2.0/vector-search/indexes/${params.indexName}/query`, - { - method: 'POST', - headers: { - 'Authorization': `Bearer ${token}`, - 'Content-Type': 'application/json', - }, - body: JSON.stringify(body), - }, - ); - - if (!response.ok) throw this.mapError(response); - const raw = await response.json() as VsRawResponse; - return this.parseResponse(raw, params.queryType); - } - - async queryNextPage(params: { - indexName: string; - endpointName: string; - pageToken: string; - userToken?: string; - }): Promise { - const token = params.userToken ?? await this.tokenProvider.getToken(); - - const response = await this.fetchWithRetry( - `https://${this.host}/api/2.0/vector-search/indexes/${params.indexName}/query-next-page`, - { - method: 'POST', - headers: { - 'Authorization': `Bearer ${token}`, - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ - endpoint_name: params.endpointName, - page_token: params.pageToken, - }), - }, - ); - - if (!response.ok) throw this.mapError(response); - const raw = await response.json() as VsRawResponse; - return this.parseResponse(raw, 'hybrid'); - } - - private parseResponse(raw: VsRawResponse, queryType: 'ann' | 'hybrid' | 'full_text'): SearchResponse { - const columnNames = raw.manifest.columns.map(c => c.name); - const scoreIndex = columnNames.indexOf('score'); - - const results = raw.result.data_array.map(row => { - const data: Record = {}; - for (let i = 0; i < columnNames.length; i++) { - if (columnNames[i] !== 'score') data[columnNames[i]] = row[i]; - } - return { - score: scoreIndex >= 0 ? (row[scoreIndex] as number) : 0, - data, - }; - }); - - return { - results, - totalCount: raw.result.row_count, - queryTimeMs: raw.debug_info?.response_time ?? raw.debug_info?.latency_ms ?? 0, - queryType, - fromCache: false, - nextPageToken: raw.next_page_token ?? null, - }; - } - - private mapError(response: { status: number }): SearchError { - const codeMap: Record = { - 401: 'UNAUTHORIZED', - 403: 'UNAUTHORIZED', - 404: 'INDEX_NOT_FOUND', - 400: 'INVALID_QUERY', - 429: 'RATE_LIMITED', - }; - return { - code: codeMap[response.status] ?? 'INTERNAL', - message: `VS query failed with status ${response.status}`, - statusCode: response.status, - }; - } - - private async fetchWithRetry( - url: string, - options: RequestInit, - maxRetries = 3, - backoffMs = 1, - ): Promise { - let lastError: Error | null = null; - - for (let attempt = 0; attempt <= maxRetries; attempt++) { - try { - const response = await fetch(url, options); - - // Don't retry client errors (4xx except 429) - if (response.status >= 400 && response.status < 500 && response.status !== 429) { - return response; - } - - if (response.ok) { - return response; - } - - // Retry 429 and 5xx - lastError = new Error(`HTTP ${response.status}`); - if (attempt < maxRetries) { - await new Promise(r => setTimeout(r, backoffMs)); - continue; - } - return response; - } catch (err) { - lastError = err as Error; - if (attempt < maxRetries) { - await new Promise(r => setTimeout(r, backoffMs)); - continue; - } - throw { - code: 'INTERNAL' as const, - message: `Network error: ${lastError.message}`, - statusCode: 500, - }; - } - } - - throw { - code: 'INTERNAL' as const, - message: 'Failed after retries', - statusCode: 500, - }; - } -} diff --git a/packages/vector-search/src/plugin/VectorSearchPlugin.ts b/packages/vector-search/src/plugin/VectorSearchPlugin.ts deleted file mode 100644 index 97d4de92..00000000 --- a/packages/vector-search/src/plugin/VectorSearchPlugin.ts +++ /dev/null @@ -1,105 +0,0 @@ -import type { VectorSearchPluginConfig, IndexConfig, SearchRequest, SearchResponse } from './types'; -import { VectorSearchClient } from './VectorSearchClient'; -import { ServicePrincipalTokenProvider, OboTokenExtractor } from './auth'; - -export class VectorSearchPlugin { - static manifest = { - name: 'vector-search', - description: 'Query Databricks Vector Search indexes from your app', - resources: { required: [] as any[], optional: [] as any[] }, - env: [ - { name: 'DATABRICKS_HOST', description: 'Databricks workspace hostname', source: 'auto' }, - { name: 'DATABRICKS_CLIENT_ID', description: 'Service principal client ID', source: 'auto' }, - { name: 'DATABRICKS_CLIENT_SECRET', description: 'Service principal client secret', source: 'auto' }, - ], - }; - - private config: VectorSearchPluginConfig; - private client!: VectorSearchClient; - private spTokenProvider!: ServicePrincipalTokenProvider; - - constructor(config: VectorSearchPluginConfig) { - this.config = config; - } - - async setup(): Promise { - const host = process.env.DATABRICKS_HOST; - if (!host) { - throw new Error( - 'DATABRICKS_HOST is not set. Ensure the app is deployed to Databricks Apps or set the environment variable manually.', - ); - } - - // Fail-fast config validation - for (const [alias, idx] of Object.entries(this.config.indexes)) { - if (!idx.indexName) { - throw new Error(`Index "${alias}" is missing required field "indexName"`); - } - if (!idx.columns || idx.columns.length === 0) { - throw new Error(`Index "${alias}" is missing required field "columns"`); - } - if (idx.pagination && !idx.endpointName) { - throw new Error(`Index "${alias}" has pagination enabled but is missing "endpointName"`); - } - } - - this.spTokenProvider = new ServicePrincipalTokenProvider(host); - this.client = new VectorSearchClient({ host, tokenProvider: this.spTokenProvider }); - } - - async shutdown(): Promise { - // No cleanup needed currently - } - - getResourceRequirements() { - return Object.values(this.config.indexes).map((idx) => ({ - type: 'vector-search-index' as const, - name: idx.indexName, - permission: 'SELECT' as const, - })); - } - - exports() { - return { - query: (alias: string, request: SearchRequest) => this.executeQuery(alias, request), - }; - } - - /** Resolve an index alias to its config. Throws if not found. */ - resolveIndex(alias: string): IndexConfig { - const config = this.config.indexes[alias]; - if (!config) { - throw { - code: 'INDEX_NOT_FOUND' as const, - message: `No index configured with alias "${alias}"`, - statusCode: 404, - }; - } - return config; - } - - /** Get the VS client instance (used by route handlers) */ - getClient(): VectorSearchClient { - return this.client; - } - - /** Get the full plugin config (used by route handlers) */ - getConfig(): VectorSearchPluginConfig { - return this.config; - } - - private async executeQuery(alias: string, request: SearchRequest): Promise { - const indexConfig = this.resolveIndex(alias); - return this.client.query({ - indexName: indexConfig.indexName, - queryText: request.queryText, - queryVector: request.queryVector, - columns: request.columns ?? indexConfig.columns, - numResults: request.numResults ?? indexConfig.numResults ?? 20, - queryType: request.queryType ?? indexConfig.queryType ?? 'hybrid', - filters: request.filters, - reranker: request.reranker ?? indexConfig.reranker ?? false, - embeddingFn: indexConfig.embeddingFn, - }); - } -} diff --git a/packages/vector-search/src/plugin/auth.ts b/packages/vector-search/src/plugin/auth.ts deleted file mode 100644 index 62211a8e..00000000 --- a/packages/vector-search/src/plugin/auth.ts +++ /dev/null @@ -1,48 +0,0 @@ -import type { TokenProvider, SearchError } from './types'; - -export class ServicePrincipalTokenProvider implements TokenProvider { - private token: string | null = null; - private expiresAt = 0; - private host: string; - - constructor(host: string) { - this.host = host; - } - - async getToken(): Promise { - if (this.token && Date.now() < this.expiresAt - 120_000) { - return this.token; - } - - const response = await fetch(`https://${this.host}/oidc/v1/token`, { - method: 'POST', - headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, - body: new URLSearchParams({ - grant_type: 'client_credentials', - client_id: process.env.DATABRICKS_CLIENT_ID!, - client_secret: process.env.DATABRICKS_CLIENT_SECRET!, - scope: 'all-apis', - }).toString(), - }); - - const data = await response.json(); - this.token = data.access_token; - this.expiresAt = Date.now() + data.expires_in * 1000; - return this.token!; - } -} - -export class OboTokenExtractor { - static extractFromRequest(req: { headers: Record }): string { - const token = req.headers['x-forwarded-access-token']; - if (!token) { - const error: SearchError = { - code: 'UNAUTHORIZED', - message: 'No user token found. Ensure app is configured for user authorization.', - statusCode: 401, - }; - throw error; - } - return token; - } -} diff --git a/packages/vector-search/src/plugin/routes.ts b/packages/vector-search/src/plugin/routes.ts deleted file mode 100644 index cd58e354..00000000 --- a/packages/vector-search/src/plugin/routes.ts +++ /dev/null @@ -1,136 +0,0 @@ -import { Router } from 'express'; -import type { Request, Response } from 'express'; -import type { VectorSearchPlugin } from './VectorSearchPlugin'; -import { OboTokenExtractor } from './auth'; -import type { SearchRequest } from './types'; - -export function createVectorSearchRouter(plugin: VectorSearchPlugin): Router { - const router = Router(); - - // POST /:alias/query - router.post('/:alias/query', async (req: Request, res: Response) => { - const { alias } = req.params; - - let indexConfig; - try { - indexConfig = plugin.resolveIndex(alias); - } catch (err: any) { - return res.status(err.statusCode ?? 404).json(err); - } - - const body: SearchRequest = req.body; - - if (!body.queryText && !body.queryVector) { - return res.status(400).json({ - code: 'INVALID_QUERY', - message: 'queryText or queryVector is required', - statusCode: 400, - }); - } - - // Resolve auth - let userToken: string | undefined; - if (indexConfig.auth === 'on-behalf-of-user') { - try { - userToken = OboTokenExtractor.extractFromRequest(req); - } catch (err: any) { - return res.status(401).json(err); - } - } - - try { - const client = plugin.getClient(); - const response = await client.query({ - indexName: indexConfig.indexName, - queryText: body.queryText, - queryVector: body.queryVector, - columns: body.columns ?? indexConfig.columns, - numResults: body.numResults ?? indexConfig.numResults ?? 20, - queryType: body.queryType ?? indexConfig.queryType ?? 'hybrid', - filters: body.filters, - reranker: body.reranker ?? indexConfig.reranker ?? false, - userToken, - embeddingFn: indexConfig.embeddingFn, - }); - - return res.json(response); - } catch (err: any) { - return res.status(err.statusCode ?? 500).json(err); - } - }); - - // POST /:alias/next-page - router.post('/:alias/next-page', async (req: Request, res: Response) => { - const { alias } = req.params; - - let indexConfig; - try { - indexConfig = plugin.resolveIndex(alias); - } catch (err: any) { - return res.status(err.statusCode ?? 404).json(err); - } - - if (!indexConfig.pagination) { - return res.status(400).json({ - code: 'INVALID_QUERY', - message: `Pagination is not enabled for index "${alias}"`, - statusCode: 400, - }); - } - - const { pageToken } = req.body; - if (!pageToken) { - return res.status(400).json({ - code: 'INVALID_QUERY', - message: 'pageToken is required', - statusCode: 400, - }); - } - - let userToken: string | undefined; - if (indexConfig.auth === 'on-behalf-of-user') { - try { - userToken = OboTokenExtractor.extractFromRequest(req); - } catch (err: any) { - return res.status(401).json(err); - } - } - - try { - const client = plugin.getClient(); - const response = await client.queryNextPage({ - indexName: indexConfig.indexName, - endpointName: indexConfig.endpointName!, - pageToken, - userToken, - }); - - return res.json(response); - } catch (err: any) { - return res.status(err.statusCode ?? 500).json(err); - } - }); - - // GET /:alias/config - router.get('/:alias/config', (req: Request, res: Response) => { - const { alias } = req.params; - - let indexConfig; - try { - indexConfig = plugin.resolveIndex(alias); - } catch (err: any) { - return res.status(err.statusCode ?? 404).json(err); - } - - return res.json({ - alias, - columns: indexConfig.columns, - queryType: indexConfig.queryType ?? 'hybrid', - numResults: indexConfig.numResults ?? 20, - reranker: !!indexConfig.reranker, - pagination: !!indexConfig.pagination, - }); - }); - - return router; -} diff --git a/packages/vector-search/src/plugin/types.ts b/packages/vector-search/src/plugin/types.ts deleted file mode 100644 index f8ad1e84..00000000 --- a/packages/vector-search/src/plugin/types.ts +++ /dev/null @@ -1,198 +0,0 @@ -// ============================================ -// Plugin Configuration Types -// ============================================ - -export interface VectorSearchPluginConfig { - indexes: Record; -} - -export interface IndexConfig { - /** Three-level UC name: catalog.schema.index_name */ - indexName: string; - /** Columns to return in results */ - columns: string[]; - /** Default search mode */ - queryType?: 'ann' | 'hybrid' | 'full_text'; // default: 'hybrid' - /** Max results per query */ - numResults?: number; // default: 20 - /** Enable built-in reranker */ - reranker?: boolean | RerankerConfig; // default: false - /** Auth mode */ - auth?: 'service-principal' | 'on-behalf-of-user'; // default: 'service-principal' - /** Result caching */ - cache?: CacheConfig; - /** Enable cursor pagination */ - pagination?: boolean; // default: false - /** VS endpoint name (required if pagination: true) */ - endpointName?: string; - /** - * For self-managed embedding indexes: converts query text to embedding vector. - * If provided, the plugin calls this function and sends query_vector to VS. - * If omitted, the plugin sends query_text and VS computes embeddings (managed mode). - */ - embeddingFn?: (text: string) => Promise; -} - -export interface RerankerConfig { - columnsToRerank: string[]; -} - -export interface CacheConfig { - enabled: boolean; - ttlSeconds?: number; // default: 60 - maxEntries?: number; // default: 1000 -} - -// ============================================ -// Query Types (frontend → backend) -// ============================================ - -export interface SearchRequest { - /** Text query. Required for managed embedding indexes. */ - queryText?: string; - /** Pre-computed embedding vector. Required for self-managed indexes without embeddingFn. */ - queryVector?: number[]; - /** Override default columns for this query */ - columns?: string[]; - /** Override default numResults for this query */ - numResults?: number; - /** Override default queryType for this query */ - queryType?: 'ann' | 'hybrid' | 'full_text'; - /** Metadata filters */ - filters?: SearchFilters; - /** Override reranker for this query */ - reranker?: boolean; -} - -/** - * Filters use the VS REST API filter format. - * Keys are column names with optional operators. - * - * Examples: - * { category: ['electronics', 'books'] } // IN list - * { 'price >=': 10 } // comparison - * { 'title NOT': 'test' } // NOT - * { 'name LIKE': 'data%' } // LIKE - * { 'color1 OR color2': ['red', 'blue'] } // OR across columns - */ -export type SearchFilters = Record; - -// ============================================ -// Result Types (backend → frontend) -// ============================================ - -export interface SearchResponse = Record> { - /** Search results */ - results: SearchResult[]; - /** Total number of results */ - totalCount: number; - /** Query execution time in ms (from VS debug info) */ - queryTimeMs: number; - /** The query type that was actually used */ - queryType: 'ann' | 'hybrid' | 'full_text'; - /** Whether results were served from cache */ - fromCache: boolean; - /** Token for fetching next page. Null if no more results. */ - nextPageToken: string | null; -} - -export interface SearchResult = Record> { - /** Similarity score (0-1, higher = more similar) */ - score: number; - /** The result data — keys match the columns requested */ - data: T; -} - -// ============================================ -// Error Types -// ============================================ - -export interface SearchError { - code: 'UNAUTHORIZED' | 'INDEX_NOT_FOUND' | 'INVALID_QUERY' | 'RATE_LIMITED' | 'INTERNAL'; - message: string; - /** HTTP status from VS API */ - statusCode: number; -} - -// ============================================ -// Hook Types -// ============================================ - -export interface UseVectorSearchOptions { - /** Debounce delay in ms. Default: 300 */ - debounceMs?: number; - /** Override default numResults from server config */ - numResults?: number; - /** Override default queryType from server config */ - queryType?: 'ann' | 'hybrid' | 'full_text'; - /** Override reranker from server config */ - reranker?: boolean; - /** Initial filters */ - initialFilters?: SearchFilters; - /** Callback when search completes */ - onResults?: (response: SearchResponse) => void; - /** Callback on error */ - onError?: (error: SearchError) => void; - /** Minimum query length before searching. Default: 1 */ - minQueryLength?: number; -} - -export interface UseVectorSearchReturn = Record> { - /** Execute a search */ - search: (query: string) => void; - /** Current results */ - results: SearchResult[]; - /** Whether a search is in flight */ - isLoading: boolean; - /** Error from the last search, if any */ - error: SearchError | null; - /** Total result count */ - totalCount: number; - /** Query time in ms */ - queryTimeMs: number; - /** Whether results came from cache */ - fromCache: boolean; - /** Current query text */ - query: string; - /** Set filters programmatically */ - setFilters: (filters: SearchFilters) => void; - /** Current active filters */ - activeFilters: SearchFilters; - /** Clear all filters and results */ - clear: () => void; - /** Whether more results are available (pagination) */ - hasMore?: boolean; - /** Fetch next page and append to results (pagination) */ - loadMore?: () => void; - /** Whether a loadMore is in flight (pagination) */ - isLoadingMore?: boolean; -} - -// ============================================ -// Internal Types (not exported from package) -// ============================================ - -/** Raw response from VS REST API */ -export interface VsRawResponse { - manifest: { - column_count: number; - columns: Array<{ name: string; type?: string }>; - }; - result: { - row_count: number; - data_array: unknown[][]; - }; - next_page_token?: string | null; - debug_info?: { - response_time?: number; - ann_time?: number; - embedding_gen_time?: number; - latency_ms?: number; - [key: string]: unknown; - }; -} - -/** Token provider interface for auth */ -export interface TokenProvider { - getToken(): Promise; -} diff --git a/packages/vector-search/src/ui/components/SearchBox.tsx b/packages/vector-search/src/ui/components/SearchBox.tsx deleted file mode 100644 index dc9c83bd..00000000 --- a/packages/vector-search/src/ui/components/SearchBox.tsx +++ /dev/null @@ -1,73 +0,0 @@ -import * as React from 'react'; - -interface SearchBoxProps { - onSearch: (query: string) => void; - value?: string; - placeholder?: string; - isLoading?: boolean; - autoFocus?: boolean; - className?: string; -} - -export function SearchBox({ - onSearch, - value, - placeholder = 'Search...', - isLoading = false, - autoFocus = false, - className, -}: SearchBoxProps) { - const [internalValue, setInternalValue] = React.useState(''); - const displayValue = value ?? internalValue; - const inputRef = React.useRef(null); - - const handleChange = (e: React.ChangeEvent) => { - const val = e.target.value; - if (value === undefined) setInternalValue(val); - onSearch(val); - }; - - const handleClear = () => { - if (value === undefined) setInternalValue(''); - onSearch(''); - inputRef.current?.focus(); - }; - - const handleKeyDown = (e: React.KeyboardEvent) => { - if (e.key === 'Escape') handleClear(); - }; - - return ( -
- - - - - {isLoading && ( -
- )} - {displayValue && !isLoading && ( - - )} -
- ); -} diff --git a/packages/vector-search/src/ui/components/SearchLoadMore.tsx b/packages/vector-search/src/ui/components/SearchLoadMore.tsx deleted file mode 100644 index a351c247..00000000 --- a/packages/vector-search/src/ui/components/SearchLoadMore.tsx +++ /dev/null @@ -1,24 +0,0 @@ -import * as React from 'react'; - -interface SearchLoadMoreProps { - hasMore: boolean; - isLoading: boolean; - onLoadMore: () => void; - className?: string; -} - -export function SearchLoadMore({ hasMore, isLoading, onLoadMore, className }: SearchLoadMoreProps) { - if (!hasMore) return null; - - return ( -
- -
- ); -} diff --git a/packages/vector-search/src/ui/components/SearchResultCard.tsx b/packages/vector-search/src/ui/components/SearchResultCard.tsx deleted file mode 100644 index 630e6b31..00000000 --- a/packages/vector-search/src/ui/components/SearchResultCard.tsx +++ /dev/null @@ -1,76 +0,0 @@ -import * as React from 'react'; -import type { SearchResult } from '../../plugin/types'; - -function escapeRegex(str: string): string { - return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); -} - -interface SearchResultCardProps> { - result: SearchResult; - titleColumn?: keyof T; - descriptionColumn?: keyof T; - displayColumns?: (keyof T)[]; - showScore?: boolean; - query?: string; -} - -export function SearchResultCard>({ - result, - titleColumn, - descriptionColumn, - displayColumns, - showScore = false, - query, -}: SearchResultCardProps) { - const title = titleColumn ? String(result.data[titleColumn] ?? '') : undefined; - const description = descriptionColumn ? String(result.data[descriptionColumn] ?? '') : undefined; - - const highlight = (text: string): React.ReactNode => { - if (!query) return text; - const words = query.split(/\s+/).filter(w => w.length > 0); - if (words.length === 0) return text; - const regex = new RegExp(`(${words.map(escapeRegex).join('|')})`, 'gi'); - const parts = text.split(regex); - return parts.map((part, i) => - regex.test(part) - ? {part} - : part - ); - }; - - return ( -
-
-
- {title && ( -

- {highlight(title)} -

- )} - {description && ( -

- {highlight(description)} -

- )} - {displayColumns && ( -
- {displayColumns - .filter(col => col !== titleColumn && col !== descriptionColumn) - .map(col => ( - - {String(col)}:{' '} - {String(result.data[col] ?? '—')} - - ))} -
- )} -
- {showScore && ( - - {(result.score * 100).toFixed(0)}% - - )} -
-
- ); -} diff --git a/packages/vector-search/src/ui/components/SearchResults.tsx b/packages/vector-search/src/ui/components/SearchResults.tsx deleted file mode 100644 index 64d5fc9d..00000000 --- a/packages/vector-search/src/ui/components/SearchResults.tsx +++ /dev/null @@ -1,93 +0,0 @@ -import * as React from 'react'; -import type { SearchResult, SearchError } from '../../plugin/types'; -import { SearchResultCard } from './SearchResultCard'; - -interface SearchResultsProps> { - results: SearchResult[]; - isLoading: boolean; - error: SearchError | null; - query: string; - totalCount: number; - queryTimeMs: number; - renderResult?: (result: SearchResult, index: number) => React.ReactNode; - displayColumns?: (keyof T)[]; - titleColumn?: keyof T; - descriptionColumn?: keyof T; - showScores?: boolean; - emptyMessage?: string; - className?: string; -} - -export function SearchResults>({ - results, - isLoading, - error, - query, - totalCount, - queryTimeMs, - renderResult, - displayColumns, - titleColumn, - descriptionColumn, - showScores = false, - emptyMessage = 'No results found.', - className, -}: SearchResultsProps) { - if (error) { - return ( -
-

Search failed

-

{error.message}

-
- ); - } - - if (isLoading && results.length === 0) { - return ( -
- {Array.from({ length: 3 }).map((_, i) => ( -
-
-
-
-
- ))} -
- ); - } - - if (!query) return null; - - if (results.length === 0) { - return ( -
- {emptyMessage} -
- ); - } - - return ( -
-
- {totalCount} result{totalCount !== 1 ? 's' : ''} in {queryTimeMs}ms -
-
- {results.map((result, index) => - renderResult - ? renderResult(result, index) - : ( - - ) - )} -
-
- ); -} diff --git a/packages/vector-search/src/ui/hooks/useVectorSearch.ts b/packages/vector-search/src/ui/hooks/useVectorSearch.ts deleted file mode 100644 index ad43d194..00000000 --- a/packages/vector-search/src/ui/hooks/useVectorSearch.ts +++ /dev/null @@ -1,175 +0,0 @@ -import { useState, useCallback, useRef, useEffect } from 'react'; -import type { - SearchResult, - SearchResponse, - SearchError, - SearchFilters, - UseVectorSearchOptions, - UseVectorSearchReturn, -} from '../../plugin/types'; - -export function useVectorSearch = Record>( - alias: string, - options: UseVectorSearchOptions = {}, -): UseVectorSearchReturn { - const { - debounceMs = 300, - numResults, - queryType, - reranker, - initialFilters = {}, - onResults, - onError, - minQueryLength = 1, - } = options; - - const [results, setResults] = useState[]>([]); - const [isLoading, setIsLoading] = useState(false); - const [error, setError] = useState(null); - const [query, setQuery] = useState(''); - const [totalCount, setTotalCount] = useState(0); - const [queryTimeMs, setQueryTimeMs] = useState(0); - const [fromCache, setFromCache] = useState(false); - const [activeFilters, setActiveFilters] = useState(initialFilters); - const [hasMore, setHasMore] = useState(false); - const [isLoadingMore, setIsLoadingMore] = useState(false); - - const nextPageTokenRef = useRef(null); - const abortRef = useRef(null); - const debounceRef = useRef | null>(null); - - const executeSearch = useCallback(async ( - searchQuery: string, - filters: SearchFilters, - isLoadMore = false, - ) => { - if (abortRef.current) abortRef.current.abort(); - abortRef.current = new AbortController(); - - if (!isLoadMore) { - setIsLoading(true); - setError(null); - } else { - setIsLoadingMore(true); - } - - try { - const url = isLoadMore - ? `/api/vector-search/${alias}/next-page` - : `/api/vector-search/${alias}/query`; - - const body: Record = isLoadMore - ? { pageToken: nextPageTokenRef.current } - : { - queryText: searchQuery, - ...(Object.keys(filters).length > 0 ? { filters } : {}), - ...(numResults !== undefined ? { numResults } : {}), - ...(queryType !== undefined ? { queryType } : {}), - ...(reranker !== undefined ? { reranker } : {}), - }; - - const response = await fetch(url, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(body), - signal: abortRef.current.signal, - }); - - if (!response.ok) { - const err = await response.json(); - throw err as SearchError; - } - - const data: SearchResponse = await response.json(); - - if (isLoadMore) { - setResults(prev => [...prev, ...data.results]); - } else { - setResults(data.results); - } - - setTotalCount(data.totalCount); - setQueryTimeMs(data.queryTimeMs); - setFromCache(data.fromCache); - setHasMore(!!data.nextPageToken); - nextPageTokenRef.current = data.nextPageToken; - - onResults?.(data as SearchResponse); - } catch (err: unknown) { - if (err instanceof DOMException && err.name === 'AbortError') return; - const searchError = err as SearchError; - setError(searchError); - onError?.(searchError); - } finally { - setIsLoading(false); - setIsLoadingMore(false); - } - }, [alias, numResults, queryType, reranker, onResults, onError]); - - const search = useCallback((searchQuery: string) => { - setQuery(searchQuery); - - if (debounceRef.current) clearTimeout(debounceRef.current); - - if (searchQuery.length < minQueryLength) { - setResults([]); - setTotalCount(0); - setHasMore(false); - return; - } - - debounceRef.current = setTimeout(() => { - executeSearch(searchQuery, activeFilters); - }, debounceMs); - }, [debounceMs, minQueryLength, activeFilters, executeSearch]); - - const setFilters = useCallback((filters: SearchFilters) => { - setActiveFilters(filters); - if (query.length >= minQueryLength) { - executeSearch(query, filters); - } - }, [query, minQueryLength, executeSearch]); - - const loadMore = useCallback(() => { - if (hasMore && !isLoadingMore && nextPageTokenRef.current) { - executeSearch(query, activeFilters, true); - } - }, [hasMore, isLoadingMore, query, activeFilters, executeSearch]); - - const clear = useCallback(() => { - if (debounceRef.current) clearTimeout(debounceRef.current); - if (abortRef.current) abortRef.current.abort(); - setQuery(''); - setResults([]); - setError(null); - setTotalCount(0); - setQueryTimeMs(0); - setFromCache(false); - setHasMore(false); - nextPageTokenRef.current = null; - }, []); - - useEffect(() => { - return () => { - if (debounceRef.current) clearTimeout(debounceRef.current); - if (abortRef.current) abortRef.current.abort(); - }; - }, []); - - return { - search, - results, - isLoading, - error, - totalCount, - queryTimeMs, - fromCache, - query, - setFilters, - activeFilters, - clear, - hasMore, - loadMore, - isLoadingMore, - }; -} diff --git a/packages/vector-search/src/ui/index.ts b/packages/vector-search/src/ui/index.ts deleted file mode 100644 index 47797d97..00000000 --- a/packages/vector-search/src/ui/index.ts +++ /dev/null @@ -1,6 +0,0 @@ -export { useVectorSearch } from './hooks/useVectorSearch'; -export { SearchBox } from './components/SearchBox'; -export { SearchResults } from './components/SearchResults'; -export { SearchResultCard } from './components/SearchResultCard'; -export { SearchLoadMore } from './components/SearchLoadMore'; -export type { UseVectorSearchOptions, UseVectorSearchReturn } from '../plugin/types'; diff --git a/packages/vector-search/tests/integration/dogfood.test.ts b/packages/vector-search/tests/integration/dogfood.test.ts deleted file mode 100644 index f0aa017e..00000000 --- a/packages/vector-search/tests/integration/dogfood.test.ts +++ /dev/null @@ -1,97 +0,0 @@ -import { describe, it, expect, beforeAll } from 'vitest'; -import { VectorSearchClient } from '../../src/plugin/VectorSearchClient'; - -const DOGFOOD_HOST = 'e2-dogfood.staging.cloud.databricks.com'; -const TEST_INDEX = 'gurary_catalog.vector-search-brickfood.retrieval_perf_cuj_index_1'; - -// Skip unless DOGFOOD_TOKEN is set -describe.skipIf(!process.env.DOGFOOD_TOKEN)('Integration: VectorSearchClient → dogfood', () => { - let client: VectorSearchClient; - - beforeAll(() => { - client = new VectorSearchClient({ - host: DOGFOOD_HOST, - tokenProvider: { - getToken: async () => process.env.DOGFOOD_TOKEN!, - }, - }); - }); - - it('returns results for a valid hybrid query', async () => { - const response = await client.query({ - indexName: TEST_INDEX, - queryText: 'aircraft instruments', - columns: ['chunk_id', 'text'], - numResults: 5, - queryType: 'hybrid', - }); - expect(response.results.length).toBeGreaterThan(0); - expect(response.results[0].score).toBeGreaterThan(0); - expect(response.results[0].data).toHaveProperty('text'); - expect(response.results[0].data).toHaveProperty('chunk_id'); - expect(response.queryTimeMs).toBeGreaterThan(0); - }, 30000); - - it('returns results for ANN query', async () => { - const response = await client.query({ - indexName: TEST_INDEX, - queryText: 'navigation systems', - columns: ['chunk_id', 'text'], - numResults: 3, - queryType: 'ann', - }); - expect(response.results.length).toBeGreaterThan(0); - expect(response.results[0].score).toBeGreaterThan(0); - }, 30000); - - it('respects numResults limit', async () => { - const response = await client.query({ - indexName: TEST_INDEX, - queryText: 'flight', - columns: ['chunk_id', 'text'], - numResults: 2, - queryType: 'hybrid', - }); - expect(response.results.length).toBeLessThanOrEqual(2); - }, 30000); - - it('returns scores between 0 and 1', async () => { - const response = await client.query({ - indexName: TEST_INDEX, - queryText: 'altitude', - columns: ['chunk_id', 'text'], - numResults: 5, - queryType: 'hybrid', - }); - response.results.forEach(r => { - expect(r.score).toBeGreaterThanOrEqual(0); - expect(r.score).toBeLessThanOrEqual(1); - }); - }, 30000); - - it('handles empty results gracefully', async () => { - const response = await client.query({ - indexName: TEST_INDEX, - queryText: 'xyzzy_absolutely_no_match_12345_qwerty', - columns: ['chunk_id', 'text'], - numResults: 5, - queryType: 'ann', - }); - // May still return results due to embedding similarity, but should have low scores - // If no results, that's fine too - expect(response.results).toBeDefined(); - expect(Array.isArray(response.results)).toBe(true); - }, 30000); - - it('response includes queryTimeMs from debug_info', async () => { - const response = await client.query({ - indexName: TEST_INDEX, - queryText: 'weather radar', - columns: ['chunk_id', 'text'], - numResults: 3, - queryType: 'hybrid', - }); - expect(response.queryTimeMs).toBeGreaterThan(0); - expect(response.fromCache).toBe(false); - }, 30000); -}); diff --git a/packages/vector-search/tests/plugin/VectorSearchClient.test.ts b/packages/vector-search/tests/plugin/VectorSearchClient.test.ts deleted file mode 100644 index 8335a4f5..00000000 --- a/packages/vector-search/tests/plugin/VectorSearchClient.test.ts +++ /dev/null @@ -1,233 +0,0 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { VectorSearchClient } from '../../src/plugin/VectorSearchClient'; - -const mockFetch = vi.fn(); -vi.stubGlobal('fetch', mockFetch); - -const mockTokenProvider = { getToken: vi.fn().mockResolvedValue('sp-token-123') }; - -describe('VectorSearchClient', () => { - let client: VectorSearchClient; - - beforeEach(() => { - client = new VectorSearchClient({ - host: 'test-workspace.databricks.com', - tokenProvider: mockTokenProvider, - }); - mockFetch.mockReset(); - mockTokenProvider.getToken.mockClear(); - }); - - const validResponse = { - manifest: { column_count: 3, columns: [{ name: 'id' }, { name: 'title' }, { name: 'score' }] }, - result: { row_count: 2, data_array: [[1, 'ML Guide', 0.95], [2, 'AI Primer', 0.87]] }, - next_page_token: null, - debug_info: { response_time: 35 }, - }; - - describe('query()', () => { - it('constructs correct REST API URL and request body for hybrid search', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); - await client.query({ - indexName: 'cat.sch.idx', queryText: 'machine learning', - columns: ['id', 'title'], numResults: 10, queryType: 'hybrid', - }); - const [url, opts] = mockFetch.mock.calls[0]; - expect(url).toBe('https://test-workspace.databricks.com/api/2.0/vector-search/indexes/cat.sch.idx/query'); - const body = JSON.parse(opts.body); - expect(body.query_text).toBe('machine learning'); - expect(body.query_type).toBe('HYBRID'); - expect(body.num_results).toBe(10); - expect(body.columns).toEqual(['id', 'title']); - expect(body.debug_level).toBe(1); - }); - - it('includes filters when provided', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); - await client.query({ - indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], - numResults: 5, queryType: 'ann', filters: { category: ['books'] }, - }); - const body = JSON.parse(mockFetch.mock.calls[0][1].body); - expect(body.filters).toEqual({ category: ['books'] }); - }); - - it('omits filters when empty object', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); - await client.query({ - indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], - numResults: 5, queryType: 'ann', filters: {}, - }); - const body = JSON.parse(mockFetch.mock.calls[0][1].body); - expect(body.filters).toBeUndefined(); - }); - - it('includes reranker config when boolean true', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); - await client.query({ - indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], - numResults: 5, queryType: 'hybrid', reranker: true, - }); - const body = JSON.parse(mockFetch.mock.calls[0][1].body); - expect(body.reranker.model).toBe('databricks_reranker'); - // Default: all non-id columns - expect(body.reranker.parameters.columns_to_rerank).toEqual(['title']); - }); - - it('includes custom reranker columnsToRerank', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); - await client.query({ - indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title', 'desc'], - numResults: 5, queryType: 'hybrid', reranker: { columnsToRerank: ['desc'] }, - }); - const body = JSON.parse(mockFetch.mock.calls[0][1].body); - expect(body.reranker.parameters.columns_to_rerank).toEqual(['desc']); - }); - - it('parses VS data_array response into typed SearchResult[]', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); - const result = await client.query({ - indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], - numResults: 10, queryType: 'hybrid', - }); - expect(result.results).toHaveLength(2); - expect(result.results[0].score).toBe(0.95); - expect(result.results[0].data).toEqual({ id: 1, title: 'ML Guide' }); - expect(result.results[1].score).toBe(0.87); - expect(result.results[1].data).toEqual({ id: 2, title: 'AI Primer' }); - expect(result.totalCount).toBe(2); - expect(result.queryTimeMs).toBe(35); - expect(result.fromCache).toBe(false); - expect(result.nextPageToken).toBeNull(); - }); - - it('handles next_page_token in response', async () => { - const responseWithToken = { ...validResponse, next_page_token: 'abc123' }; - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(responseWithToken) }); - const result = await client.query({ - indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], - numResults: 10, queryType: 'hybrid', - }); - expect(result.nextPageToken).toBe('abc123'); - }); - - it('uses SP token when no userToken provided', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); - await client.query({ - indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], - numResults: 5, queryType: 'ann', - }); - expect(mockTokenProvider.getToken).toHaveBeenCalled(); - expect(mockFetch.mock.calls[0][1].headers['Authorization']).toBe('Bearer sp-token-123'); - }); - - it('uses userToken when provided (OBO)', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); - await client.query({ - indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], - numResults: 5, queryType: 'ann', userToken: 'user-token-456', - }); - expect(mockTokenProvider.getToken).not.toHaveBeenCalled(); - expect(mockFetch.mock.calls[0][1].headers['Authorization']).toBe('Bearer user-token-456'); - }); - - it('calls embeddingFn and sends query_vector for self-managed indexes', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); - const mockEmbeddingFn = vi.fn().mockResolvedValue([0.1, 0.2, 0.3]); - await client.query({ - indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], - numResults: 5, queryType: 'ann', embeddingFn: mockEmbeddingFn, - }); - expect(mockEmbeddingFn).toHaveBeenCalledWith('test'); - const body = JSON.parse(mockFetch.mock.calls[0][1].body); - expect(body.query_vector).toEqual([0.1, 0.2, 0.3]); - expect(body.query_text).toBeUndefined(); - }); - - it('sends query_text when no embeddingFn (managed embeddings)', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); - await client.query({ - indexName: 'cat.sch.idx', queryText: 'test', columns: ['id'], - numResults: 5, queryType: 'ann', - }); - const body = JSON.parse(mockFetch.mock.calls[0][1].body); - expect(body.query_text).toBe('test'); - expect(body.query_vector).toBeUndefined(); - }); - - it('throws INVALID_QUERY when neither queryText nor queryVector provided', async () => { - await expect(client.query({ - indexName: 'x', columns: ['id'], numResults: 1, queryType: 'ann', - } as any)).rejects.toMatchObject({ code: 'INVALID_QUERY' }); - }); - - it('maps 401 → UNAUTHORIZED', async () => { - mockFetch.mockResolvedValue({ ok: false, status: 401 }); - await expect(client.query({ - indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', - })).rejects.toMatchObject({ code: 'UNAUTHORIZED', statusCode: 401 }); - }); - - it('maps 404 → INDEX_NOT_FOUND', async () => { - mockFetch.mockResolvedValue({ ok: false, status: 404 }); - await expect(client.query({ - indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', - })).rejects.toMatchObject({ code: 'INDEX_NOT_FOUND', statusCode: 404 }); - }); - - it('maps 429 → RATE_LIMITED and retries', async () => { - mockFetch - .mockResolvedValueOnce({ ok: false, status: 429 }) - .mockResolvedValueOnce({ ok: true, json: () => Promise.resolve(validResponse) }); - const result = await client.query({ - indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], - numResults: 5, queryType: 'ann', - }); - expect(mockFetch).toHaveBeenCalledTimes(2); - expect(result.results).toHaveLength(2); - }); - - it('does not retry 400 errors', async () => { - mockFetch.mockResolvedValue({ ok: false, status: 400 }); - await expect(client.query({ - indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', - })).rejects.toMatchObject({ code: 'INVALID_QUERY' }); - expect(mockFetch).toHaveBeenCalledTimes(1); - }); - - it('retries 500 errors up to 3 times', async () => { - mockFetch.mockResolvedValue({ ok: false, status: 500 }); - await expect(client.query({ - indexName: 'x', queryText: 't', columns: ['id'], numResults: 1, queryType: 'ann', - })).rejects.toMatchObject({ code: 'INTERNAL', statusCode: 500 }); - expect(mockFetch).toHaveBeenCalledTimes(4); // 1 initial + 3 retries - }); - - it('retries network errors', async () => { - mockFetch - .mockRejectedValueOnce(new Error('ECONNRESET')) - .mockResolvedValueOnce({ ok: true, json: () => Promise.resolve(validResponse) }); - const result = await client.query({ - indexName: 'cat.sch.idx', queryText: 'test', columns: ['id', 'title'], - numResults: 5, queryType: 'ann', - }); - expect(mockFetch).toHaveBeenCalledTimes(2); - expect(result.results).toHaveLength(2); - }); - }); - - describe('queryNextPage()', () => { - it('calls the query-next-page endpoint with page token', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(validResponse) }); - await client.queryNextPage({ - indexName: 'cat.sch.idx', endpointName: 'my-endpoint', - pageToken: 'token123', - }); - const [url, opts] = mockFetch.mock.calls[0]; - expect(url).toBe('https://test-workspace.databricks.com/api/2.0/vector-search/indexes/cat.sch.idx/query-next-page'); - const body = JSON.parse(opts.body); - expect(body.endpoint_name).toBe('my-endpoint'); - expect(body.page_token).toBe('token123'); - }); - }); -}); diff --git a/packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts b/packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts deleted file mode 100644 index 479723fa..00000000 --- a/packages/vector-search/tests/plugin/VectorSearchPlugin.test.ts +++ /dev/null @@ -1,115 +0,0 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { VectorSearchPlugin } from '../../src/plugin/VectorSearchPlugin'; - -describe('VectorSearchPlugin', () => { - beforeEach(() => { - vi.stubEnv('DATABRICKS_HOST', 'test-host.databricks.com'); - vi.stubEnv('DATABRICKS_CLIENT_ID', 'test-client'); - vi.stubEnv('DATABRICKS_CLIENT_SECRET', 'test-secret'); - }); - - describe('setup()', () => { - it('throws if DATABRICKS_HOST is not set', async () => { - vi.stubEnv('DATABRICKS_HOST', ''); - const plugin = new VectorSearchPlugin({ - indexes: { - test: { indexName: 'cat.sch.idx', columns: ['id'] }, - }, - }); - await expect(plugin.setup()).rejects.toThrow('DATABRICKS_HOST'); - }); - - it('throws if any index is missing indexName', async () => { - const plugin = new VectorSearchPlugin({ - indexes: { - test: { indexName: '', columns: ['id'] }, - }, - }); - await expect(plugin.setup()).rejects.toThrow('indexName'); - }); - - it('throws if any index is missing columns', async () => { - const plugin = new VectorSearchPlugin({ - indexes: { - test: { indexName: 'cat.sch.idx', columns: [] }, - }, - }); - await expect(plugin.setup()).rejects.toThrow('columns'); - }); - - it('throws if pagination enabled but no endpointName', async () => { - const plugin = new VectorSearchPlugin({ - indexes: { - test: { indexName: 'cat.sch.idx', columns: ['id'], pagination: true }, - }, - }); - await expect(plugin.setup()).rejects.toThrow('endpointName'); - }); - - it('succeeds with valid config', async () => { - const plugin = new VectorSearchPlugin({ - indexes: { - products: { - indexName: 'cat.sch.products_idx', - columns: ['id', 'name', 'description'], - queryType: 'hybrid', - numResults: 20, - }, - docs: { - indexName: 'cat.sch.docs_idx', - columns: ['id', 'title', 'content'], - reranker: true, - auth: 'on-behalf-of-user', - }, - }, - }); - await expect(plugin.setup()).resolves.not.toThrow(); - }); - }); - - describe('exports()', () => { - it('returns object with query function', async () => { - const plugin = new VectorSearchPlugin({ - indexes: { - test: { indexName: 'cat.sch.idx', columns: ['id'] }, - }, - }); - await plugin.setup(); - const exports = plugin.exports(); - expect(exports).toHaveProperty('query'); - expect(typeof exports.query).toBe('function'); - }); - }); - - describe('getResourceRequirements()', () => { - it('returns resource entry for each configured index', () => { - const plugin = new VectorSearchPlugin({ - indexes: { - products: { indexName: 'cat.sch.products', columns: ['id'] }, - docs: { indexName: 'cat.sch.docs', columns: ['id'] }, - }, - }); - const resources = plugin.getResourceRequirements(); - expect(resources).toHaveLength(2); - expect(resources[0]).toEqual({ - type: 'vector-search-index', - name: 'cat.sch.products', - permission: 'SELECT', - }); - expect(resources[1]).toEqual({ - type: 'vector-search-index', - name: 'cat.sch.docs', - permission: 'SELECT', - }); - }); - }); - - describe('manifest', () => { - it('has correct name and env declarations', () => { - expect(VectorSearchPlugin.manifest.name).toBe('vector-search'); - expect(VectorSearchPlugin.manifest.env).toContainEqual( - expect.objectContaining({ name: 'DATABRICKS_HOST' }) - ); - }); - }); -}); diff --git a/packages/vector-search/tests/plugin/auth.test.ts b/packages/vector-search/tests/plugin/auth.test.ts deleted file mode 100644 index 925b4ef4..00000000 --- a/packages/vector-search/tests/plugin/auth.test.ts +++ /dev/null @@ -1,108 +0,0 @@ -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { ServicePrincipalTokenProvider, OboTokenExtractor } from '../../src/plugin/auth'; - -const mockFetch = vi.fn(); -vi.stubGlobal('fetch', mockFetch); - -describe('ServicePrincipalTokenProvider', () => { - let provider: ServicePrincipalTokenProvider; - - beforeEach(() => { - vi.stubEnv('DATABRICKS_CLIENT_ID', 'test-client-id'); - vi.stubEnv('DATABRICKS_CLIENT_SECRET', 'test-client-secret'); - provider = new ServicePrincipalTokenProvider('test-host.databricks.com'); - mockFetch.mockReset(); - vi.useRealTimers(); - }); - - afterEach(() => { - vi.unstubAllEnvs(); - }); - - it('fetches token from OIDC endpoint', async () => { - mockFetch.mockResolvedValue({ - ok: true, - json: () => Promise.resolve({ access_token: 'token-abc', expires_in: 3600 }), - }); - - const token = await provider.getToken(); - - expect(token).toBe('token-abc'); - const [url, opts] = mockFetch.mock.calls[0]; - expect(url).toBe('https://test-host.databricks.com/oidc/v1/token'); - expect(opts.method).toBe('POST'); - expect(opts.headers['Content-Type']).toBe('application/x-www-form-urlencoded'); - const body = new URLSearchParams(opts.body); - expect(body.get('grant_type')).toBe('client_credentials'); - expect(body.get('client_id')).toBe('test-client-id'); - expect(body.get('client_secret')).toBe('test-client-secret'); - expect(body.get('scope')).toBe('all-apis'); - }); - - it('returns cached token on subsequent calls within expiry', async () => { - mockFetch.mockResolvedValue({ - ok: true, - json: () => Promise.resolve({ access_token: 'token-abc', expires_in: 3600 }), - }); - - await provider.getToken(); - await provider.getToken(); - - expect(mockFetch).toHaveBeenCalledTimes(1); - }); - - it('refreshes token when within 2-minute expiry buffer', async () => { - vi.useFakeTimers(); - - mockFetch - .mockResolvedValueOnce({ - ok: true, - json: () => Promise.resolve({ access_token: 'token-1', expires_in: 3600 }), - }) - .mockResolvedValueOnce({ - ok: true, - json: () => Promise.resolve({ access_token: 'token-2', expires_in: 3600 }), - }); - - const token1 = await provider.getToken(); - expect(token1).toBe('token-1'); - - // Advance to within 2 minutes of expiry (3600s - 120s = 3480s) - vi.advanceTimersByTime(3481 * 1000); - - const token2 = await provider.getToken(); - expect(token2).toBe('token-2'); - expect(mockFetch).toHaveBeenCalledTimes(2); - }); -}); - -describe('OboTokenExtractor', () => { - it('extracts token from x-forwarded-access-token header', () => { - const req = { - headers: { 'x-forwarded-access-token': 'user-token-xyz' }, - } as any; - - const token = OboTokenExtractor.extractFromRequest(req); - expect(token).toBe('user-token-xyz'); - }); - - it('throws UNAUTHORIZED when header is missing', () => { - const req = { headers: {} } as any; - - expect(() => OboTokenExtractor.extractFromRequest(req)).toThrow(); - try { - OboTokenExtractor.extractFromRequest(req); - } catch (err: any) { - expect(err.code).toBe('UNAUTHORIZED'); - expect(err.statusCode).toBe(401); - } - }); - - it('throws UNAUTHORIZED when header is empty string', () => { - const req = { - headers: { 'x-forwarded-access-token': '' }, - } as any; - - expect(() => OboTokenExtractor.extractFromRequest(req)).toThrow(); - }); -}); diff --git a/packages/vector-search/tests/plugin/routes.test.ts b/packages/vector-search/tests/plugin/routes.test.ts deleted file mode 100644 index ef043061..00000000 --- a/packages/vector-search/tests/plugin/routes.test.ts +++ /dev/null @@ -1,206 +0,0 @@ -import { describe, it, expect, vi, beforeEach, beforeAll } from 'vitest'; -import express from 'express'; -import request from 'supertest'; -import { createVectorSearchRouter } from '../../src/plugin/routes'; -import { VectorSearchPlugin } from '../../src/plugin/VectorSearchPlugin'; - -// Mock fetch for the VectorSearchClient -const mockFetch = vi.fn(); -vi.stubGlobal('fetch', mockFetch); - -describe('Vector Search Routes', () => { - let app: express.Express; - let plugin: VectorSearchPlugin; - - const validVsResponse = { - manifest: { column_count: 3, columns: [{ name: 'id' }, { name: 'title' }, { name: 'score' }] }, - result: { row_count: 2, data_array: [[1, 'ML Guide', 0.95], [2, 'AI Primer', 0.87]] }, - next_page_token: null, - debug_info: { latency_ms: 35 }, - }; - - beforeAll(async () => { - vi.stubEnv('DATABRICKS_HOST', 'test-host.databricks.com'); - vi.stubEnv('DATABRICKS_CLIENT_ID', 'test-client'); - vi.stubEnv('DATABRICKS_CLIENT_SECRET', 'test-secret'); - - plugin = new VectorSearchPlugin({ - indexes: { - products: { - indexName: 'cat.sch.products', - columns: ['id', 'title', 'description', 'category'], - queryType: 'hybrid', - numResults: 20, - }, - cached: { - indexName: 'cat.sch.cached', - columns: ['id', 'text'], - cache: { enabled: true, ttlSeconds: 60 }, - }, - paginated: { - indexName: 'cat.sch.paginated', - columns: ['id', 'text'], - pagination: true, - endpointName: 'my-endpoint', - }, - obo: { - indexName: 'cat.sch.obo', - columns: ['id', 'text'], - auth: 'on-behalf-of-user', - }, - }, - }); - await plugin.setup(); - - app = express(); - app.use(express.json()); - app.use('/api/vector-search', createVectorSearchRouter(plugin)); - }); - - beforeEach(() => { - mockFetch.mockReset(); - // Mock the OIDC token fetch that happens on first query - mockFetch.mockImplementation((url: string) => { - if (typeof url === 'string' && url.includes('/oidc/v1/token')) { - return Promise.resolve({ - ok: true, - json: () => Promise.resolve({ access_token: 'sp-token', expires_in: 3600 }), - }); - } - return Promise.resolve({ - ok: true, - json: () => Promise.resolve(validVsResponse), - }); - }); - }); - - describe('POST /:alias/query', () => { - it('returns results for valid query', async () => { - const res = await request(app) - .post('/api/vector-search/products/query') - .send({ queryText: 'machine learning' }) - .expect(200); - - expect(res.body.results).toHaveLength(2); - expect(res.body.results[0].score).toBe(0.95); - expect(res.body.results[0].data.title).toBe('ML Guide'); - expect(res.body.totalCount).toBe(2); - expect(res.body.queryTimeMs).toBe(35); - }); - - it('returns 404 for unknown alias', async () => { - const res = await request(app) - .post('/api/vector-search/unknown/query') - .send({ queryText: 'test' }) - .expect(404); - - expect(res.body.code).toBe('INDEX_NOT_FOUND'); - }); - - it('returns 400 for missing queryText and queryVector', async () => { - const res = await request(app) - .post('/api/vector-search/products/query') - .send({}) - .expect(400); - - expect(res.body.code).toBe('INVALID_QUERY'); - }); - - it('passes filters to VS client', async () => { - await request(app) - .post('/api/vector-search/products/query') - .send({ queryText: 'test', filters: { category: 'books' } }) - .expect(200); - - // Verify the VS API call included filters - const vsCall = mockFetch.mock.calls.find( - (c: any[]) => typeof c[0] === 'string' && c[0].includes('/query') - ); - expect(vsCall).toBeDefined(); - const body = JSON.parse(vsCall![1].body); - expect(body.filters).toEqual({ category: 'books' }); - }); - - it('uses OBO token when auth is on-behalf-of-user', async () => { - await request(app) - .post('/api/vector-search/obo/query') - .set('x-forwarded-access-token', 'user-token-123') - .send({ queryText: 'test' }) - .expect(200); - - const vsCall = mockFetch.mock.calls.find( - (c: any[]) => typeof c[0] === 'string' && c[0].includes('/query') - ); - expect(vsCall![1].headers['Authorization']).toBe('Bearer user-token-123'); - }); - - it('returns 401 when OBO index has no user token', async () => { - const res = await request(app) - .post('/api/vector-search/obo/query') - .send({ queryText: 'test' }) - .expect(401); - - expect(res.body.code).toBe('UNAUTHORIZED'); - }); - }); - - describe('POST /:alias/next-page', () => { - it('returns 400 when pagination not enabled', async () => { - const res = await request(app) - .post('/api/vector-search/products/next-page') - .send({ pageToken: 'abc' }) - .expect(400); - - expect(res.body.code).toBe('INVALID_QUERY'); - expect(res.body.message).toContain('Pagination'); - }); - - it('returns 400 when pageToken missing', async () => { - const res = await request(app) - .post('/api/vector-search/paginated/next-page') - .send({}) - .expect(400); - - expect(res.body.code).toBe('INVALID_QUERY'); - expect(res.body.message).toContain('pageToken'); - }); - - it('calls query-next-page endpoint when valid', async () => { - await request(app) - .post('/api/vector-search/paginated/next-page') - .send({ pageToken: 'token123' }) - .expect(200); - - const nextPageCall = mockFetch.mock.calls.find( - (c: any[]) => typeof c[0] === 'string' && c[0].includes('/query-next-page') - ); - expect(nextPageCall).toBeDefined(); - const body = JSON.parse(nextPageCall![1].body); - expect(body.page_token).toBe('token123'); - expect(body.endpoint_name).toBe('my-endpoint'); - }); - }); - - describe('GET /:alias/config', () => { - it('returns public config for valid alias', async () => { - const res = await request(app) - .get('/api/vector-search/products/config') - .expect(200); - - expect(res.body.alias).toBe('products'); - expect(res.body.columns).toEqual(['id', 'title', 'description', 'category']); - expect(res.body.queryType).toBe('hybrid'); - expect(res.body.numResults).toBe(20); - expect(res.body.reranker).toBe(false); - expect(res.body.pagination).toBe(false); - }); - - it('returns 404 for unknown alias', async () => { - const res = await request(app) - .get('/api/vector-search/unknown/config') - .expect(404); - - expect(res.body.code).toBe('INDEX_NOT_FOUND'); - }); - }); -}); diff --git a/packages/vector-search/tests/ui/components/components.test.tsx b/packages/vector-search/tests/ui/components/components.test.tsx deleted file mode 100644 index 681f8069..00000000 --- a/packages/vector-search/tests/ui/components/components.test.tsx +++ /dev/null @@ -1,158 +0,0 @@ -import { describe, it, expect, vi } from 'vitest'; -import { render, screen, fireEvent } from '@testing-library/react'; -import { SearchBox } from '../../../src/ui/components/SearchBox'; -import { SearchResultCard } from '../../../src/ui/components/SearchResultCard'; -import { SearchResults } from '../../../src/ui/components/SearchResults'; -import { SearchLoadMore } from '../../../src/ui/components/SearchLoadMore'; - -describe('SearchBox', () => { - it('renders input with placeholder', () => { - render( {}} placeholder="Search products..." />); - expect(screen.getByPlaceholderText('Search products...')).toBeInTheDocument(); - }); - - it('calls onSearch on input change', () => { - const onSearch = vi.fn(); - render(); - fireEvent.change(screen.getByRole('searchbox'), { target: { value: 'test' } }); - expect(onSearch).toHaveBeenCalledWith('test'); - }); - - it('shows clear button when value present', () => { - render( {}} value="test" />); - expect(screen.getByLabelText('Clear search')).toBeInTheDocument(); - }); - - it('hides clear button when value empty', () => { - render( {}} value="" />); - expect(screen.queryByLabelText('Clear search')).not.toBeInTheDocument(); - }); - - it('calls onSearch with empty string on clear', () => { - const onSearch = vi.fn(); - render(); - fireEvent.click(screen.getByLabelText('Clear search')); - expect(onSearch).toHaveBeenCalledWith(''); - }); - - it('clears on Escape key', () => { - const onSearch = vi.fn(); - render(); - fireEvent.keyDown(screen.getByRole('searchbox'), { key: 'Escape' }); - expect(onSearch).toHaveBeenCalledWith(''); - }); - - it('shows loading spinner when isLoading', () => { - render( {}} isLoading />); - expect(screen.getByTestId('loading-spinner')).toBeInTheDocument(); - }); -}); - -describe('SearchResultCard', () => { - const result = { - score: 0.95, - data: { id: 1, title: 'Machine Learning Guide', description: 'A guide to ML algorithms', category: 'books' }, - }; - - it('renders title and description', () => { - render(); - expect(screen.getByText('Machine Learning Guide')).toBeInTheDocument(); - expect(screen.getByText('A guide to ML algorithms')).toBeInTheDocument(); - }); - - it('highlights query words with mark tags', () => { - const { container } = render( - - ); - const marks = container.querySelectorAll('mark'); - expect(marks.length).toBeGreaterThan(0); - expect(marks[0].textContent).toBe('Machine'); - }); - - it('shows score badge when showScore is true', () => { - render(); - expect(screen.getByText('95%')).toBeInTheDocument(); - }); - - it('hides score badge by default', () => { - render(); - expect(screen.queryByText('95%')).not.toBeInTheDocument(); - }); - - it('renders display columns as metadata', () => { - render( - - ); - expect(screen.getByText('category:')).toBeInTheDocument(); - expect(screen.getByText('books')).toBeInTheDocument(); - }); -}); - -describe('SearchResults', () => { - const results = [ - { score: 0.95, data: { id: 1, title: 'Result 1' } }, - { score: 0.87, data: { id: 2, title: 'Result 2' } }, - ]; - - it('shows loading skeleton when loading with no results', () => { - render(); - expect(screen.getByTestId('loading-skeleton')).toBeInTheDocument(); - }); - - it('shows empty message when no results', () => { - render(); - expect(screen.getByText('No results found.')).toBeInTheDocument(); - }); - - it('shows custom empty message', () => { - render(); - expect(screen.getByText('Nothing here')).toBeInTheDocument(); - }); - - it('shows error banner', () => { - const error = { code: 'INTERNAL' as const, message: 'Server error', statusCode: 500 }; - render(); - expect(screen.getByText('Search failed')).toBeInTheDocument(); - expect(screen.getByText('Server error')).toBeInTheDocument(); - }); - - it('renders results with summary', () => { - render(); - expect(screen.getByText('2 results in 35ms')).toBeInTheDocument(); - expect(screen.getByText('Result 1')).toBeInTheDocument(); - expect(screen.getByText('Result 2')).toBeInTheDocument(); - }); - - it('returns null when no query', () => { - const { container } = render(); - expect(container.firstChild).toBeNull(); - }); -}); - -describe('SearchLoadMore', () => { - it('renders button when hasMore is true', () => { - render( {}} />); - expect(screen.getByText('Load more results')).toBeInTheDocument(); - }); - - it('renders nothing when hasMore is false', () => { - const { container } = render( {}} />); - expect(container.firstChild).toBeNull(); - }); - - it('shows Loading... when isLoading', () => { - render( {}} />); - expect(screen.getByText('Loading...')).toBeInTheDocument(); - }); - - it('calls onLoadMore on click', () => { - const onLoadMore = vi.fn(); - render(); - fireEvent.click(screen.getByText('Load more results')); - expect(onLoadMore).toHaveBeenCalledTimes(1); - }); -}); diff --git a/packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts b/packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts deleted file mode 100644 index c663c75e..00000000 --- a/packages/vector-search/tests/ui/hooks/useVectorSearch.test.ts +++ /dev/null @@ -1,201 +0,0 @@ -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { renderHook, act } from '@testing-library/react'; -import { useVectorSearch } from '../../../src/ui/hooks/useVectorSearch'; - -const mockFetch = vi.fn(); -vi.stubGlobal('fetch', mockFetch); - -const mockResponse = { - results: [{ score: 0.95, data: { id: 1, title: 'Test Result' } }], - totalCount: 1, - queryTimeMs: 20, - queryType: 'hybrid', - fromCache: false, - nextPageToken: null, -}; - -/** Flush all pending microtasks (promise callbacks) */ -const flushPromises = () => act(() => Promise.resolve()); - -describe('useVectorSearch', () => { - beforeEach(() => { - mockFetch.mockReset(); - vi.useFakeTimers(); - }); - - afterEach(() => { - vi.useRealTimers(); - }); - - it('debounces search calls (300ms default)', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); - const { result } = renderHook(() => useVectorSearch('products')); - - act(() => { result.current.search('a'); }); - act(() => { result.current.search('ab'); }); - act(() => { result.current.search('abc'); }); - - // Before debounce fires - expect(mockFetch).not.toHaveBeenCalled(); - - // After debounce — advance timers then flush promises for fetch resolution - await act(async () => { vi.advanceTimersByTime(300); }); - await flushPromises(); - - expect(mockFetch).toHaveBeenCalledTimes(1); - const body = JSON.parse(mockFetch.mock.calls[0][1].body); - expect(body.queryText).toBe('abc'); - }); - - it('does not search below minQueryLength', async () => { - const { result } = renderHook(() => - useVectorSearch('products', { minQueryLength: 3 }) - ); - - act(() => { result.current.search('ab'); }); - await act(async () => { vi.advanceTimersByTime(400); }); - - expect(mockFetch).not.toHaveBeenCalled(); - expect(result.current.results).toEqual([]); - }); - - it('sets isLoading true during search', async () => { - let resolveJson!: (v: unknown) => void; - mockFetch.mockReturnValue( - Promise.resolve({ - ok: true, - json: () => new Promise((r) => { resolveJson = r; }), - }) - ); - - const { result } = renderHook(() => useVectorSearch('products')); - - act(() => { result.current.search('test'); }); - await act(async () => { vi.advanceTimersByTime(300); }); - // fetch was called, but json() hasn't resolved yet - await flushPromises(); - - expect(result.current.isLoading).toBe(true); - - await act(async () => { resolveJson(mockResponse); }); - expect(result.current.isLoading).toBe(false); - expect(result.current.results).toHaveLength(1); - }); - - it('populates results after successful search', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); - const { result } = renderHook(() => useVectorSearch('products')); - - act(() => { result.current.search('test'); }); - await act(async () => { vi.advanceTimersByTime(300); }); - await flushPromises(); - - expect(result.current.results).toHaveLength(1); - expect(result.current.results[0].score).toBe(0.95); - expect(result.current.results[0].data).toEqual({ id: 1, title: 'Test Result' }); - expect(result.current.totalCount).toBe(1); - expect(result.current.queryTimeMs).toBe(20); - expect(result.current.fromCache).toBe(false); - expect(result.current.query).toBe('test'); - }); - - it('sets error on failed search', async () => { - mockFetch.mockResolvedValue({ - ok: false, - status: 404, - json: () => Promise.resolve({ code: 'INDEX_NOT_FOUND', message: 'Not found', statusCode: 404 }), - }); - - const { result } = renderHook(() => useVectorSearch('products')); - - act(() => { result.current.search('test'); }); - await act(async () => { vi.advanceTimersByTime(300); }); - await flushPromises(); - - expect(result.current.error).not.toBeNull(); - expect(result.current.error!.code).toBe('INDEX_NOT_FOUND'); - expect(result.current.isLoading).toBe(false); - }); - - it('clears everything on clear()', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); - const { result } = renderHook(() => useVectorSearch('products')); - - act(() => { result.current.search('test'); }); - await act(async () => { vi.advanceTimersByTime(300); }); - await flushPromises(); - - expect(result.current.results).toHaveLength(1); - - act(() => { result.current.clear(); }); - - expect(result.current.results).toEqual([]); - expect(result.current.query).toBe(''); - expect(result.current.totalCount).toBe(0); - expect(result.current.error).toBeNull(); - }); - - it('re-executes search when filters change', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); - const { result } = renderHook(() => useVectorSearch('products')); - - act(() => { result.current.search('test'); }); - await act(async () => { vi.advanceTimersByTime(300); }); - await flushPromises(); - - expect(mockFetch).toHaveBeenCalledTimes(1); - - await act(async () => { result.current.setFilters({ category: 'books' }); }); - await flushPromises(); - - expect(mockFetch).toHaveBeenCalledTimes(2); - const body = JSON.parse(mockFetch.mock.calls[1][1].body); - expect(body.filters).toEqual({ category: 'books' }); - }); - - it('calls onResults callback on success', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); - const onResults = vi.fn(); - const { result } = renderHook(() => - useVectorSearch('products', { onResults }) - ); - - act(() => { result.current.search('test'); }); - await act(async () => { vi.advanceTimersByTime(300); }); - await flushPromises(); - - expect(onResults).toHaveBeenCalledTimes(1); - expect(onResults).toHaveBeenCalledWith(mockResponse); - }); - - it('calls onError callback on failure', async () => { - const errorResponse = { code: 'INTERNAL', message: 'Server error', statusCode: 500 }; - mockFetch.mockResolvedValue({ - ok: false, - status: 500, - json: () => Promise.resolve(errorResponse), - }); - const onError = vi.fn(); - const { result } = renderHook(() => - useVectorSearch('products', { onError }) - ); - - act(() => { result.current.search('test'); }); - await act(async () => { vi.advanceTimersByTime(300); }); - await flushPromises(); - - expect(onError).toHaveBeenCalledTimes(1); - }); - - it('sends request to correct API endpoint', async () => { - mockFetch.mockResolvedValue({ ok: true, json: () => Promise.resolve(mockResponse) }); - const { result } = renderHook(() => useVectorSearch('products')); - - act(() => { result.current.search('test'); }); - await act(async () => { vi.advanceTimersByTime(300); }); - await flushPromises(); - - expect(mockFetch).toHaveBeenCalled(); - expect(mockFetch.mock.calls[0][0]).toBe('/api/vector-search/products/query'); - }); -}); diff --git a/packages/vector-search/tsconfig.json b/packages/vector-search/tsconfig.json deleted file mode 100644 index c2c6364a..00000000 --- a/packages/vector-search/tsconfig.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "compilerOptions": { - "target": "ES2022", - "module": "ESNext", - "moduleResolution": "bundler", - "jsx": "react-jsx", - "declaration": true, - "outDir": "./dist", - "rootDir": "./src", - "strict": true, - "esModuleInterop": true, - "skipLibCheck": true - }, - "include": ["src/**/*"], - "exclude": ["node_modules", "dist", "**/*.test.ts", "**/*.test.tsx"] -} diff --git a/packages/vector-search/vitest.config.ts b/packages/vector-search/vitest.config.ts deleted file mode 100644 index 6ccbfd4c..00000000 --- a/packages/vector-search/vitest.config.ts +++ /dev/null @@ -1,9 +0,0 @@ -import { defineConfig } from 'vitest/config'; - -export default defineConfig({ - test: { - environment: 'jsdom', - globals: true, - setupFiles: ['./vitest.setup.ts'], - }, -}); diff --git a/packages/vector-search/vitest.setup.ts b/packages/vector-search/vitest.setup.ts deleted file mode 100644 index bb02c60c..00000000 --- a/packages/vector-search/vitest.setup.ts +++ /dev/null @@ -1 +0,0 @@ -import '@testing-library/jest-dom/vitest'; From 475bc60c900cba5c742456238ea0e5f7806f62e6 Mon Sep 17 00:00:00 2001 From: Adam Gurary Date: Tue, 24 Mar 2026 12:40:13 -0700 Subject: [PATCH 3/3] fix: add OpenTelemetry spans and proper error propagation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Connector: wrap VS API calls in telemetry spans with index name, query type, result count, and latency attributes - Connector: check AbortSignal before executing requests - Connector: add WideEvent context logging with query metadata - Plugin: replace this.execute() in route handlers with direct try/catch — preserves actual error details (code, message, status) instead of swallowing them into undefined - Remove unused SearchFilters import Signed-off-by: Adam Gurary --- .../src/connectors/vector-search/client.ts | 131 +++++++++++++++--- .../src/connectors/vector-search/types.ts | 3 + .../plugins/vector-search/vector-search.ts | 107 +++++++------- 3 files changed, 164 insertions(+), 77 deletions(-) diff --git a/packages/appkit/src/connectors/vector-search/client.ts b/packages/appkit/src/connectors/vector-search/client.ts index f3345061..264a2dc6 100644 --- a/packages/appkit/src/connectors/vector-search/client.ts +++ b/packages/appkit/src/connectors/vector-search/client.ts @@ -1,5 +1,13 @@ -import type { WorkspaceClient } from "@databricks/sdk-experimental"; +import { Context, type WorkspaceClient } from "@databricks/sdk-experimental"; +import type { TelemetryOptions } from "shared"; import { createLogger } from "../../logging/logger"; +import { + type Span, + SpanKind, + SpanStatusCode, + TelemetryManager, +} from "../../telemetry"; +import type { TelemetryProvider } from "../../telemetry"; import type { VectorSearchConnectorConfig, VsNextPageParams, @@ -11,11 +19,16 @@ const logger = createLogger("connectors:vector-search"); export class VectorSearchConnector { private readonly config: Required; + private readonly telemetry: TelemetryProvider; constructor(config: VectorSearchConnectorConfig = {}) { this.config = { timeout: config.timeout ?? 30_000, }; + this.telemetry = TelemetryManager.getProvider( + "vector-search", + config.telemetry, + ); } async query( @@ -23,6 +36,10 @@ export class VectorSearchConnector { params: VsQueryParams, signal?: AbortSignal, ): Promise { + if (signal?.aborted) { + throw new Error("Query cancelled before execution"); + } + const body: Record = { columns: params.columns, num_results: params.numResults, @@ -49,14 +66,59 @@ export class VectorSearchConnector { params.numResults, ); - return (await workspaceClient.apiClient.request({ - method: "POST", - path: `/api/2.0/vector-search/indexes/${params.indexName}/query`, - body, - headers: new Headers({ "Content-Type": "application/json" }), - raw: false, - query: {}, - })) as VsRawResponse; + return this.telemetry.startActiveSpan( + "vector-search.query", + { + kind: SpanKind.CLIENT, + attributes: { + "db.system": "databricks", + "vs.index_name": params.indexName, + "vs.query_type": params.queryType, + "vs.num_results": params.numResults, + "vs.has_filters": !!( + params.filters && Object.keys(params.filters).length > 0 + ), + "vs.has_reranker": !!params.reranker, + }, + }, + async (span: Span) => { + const startTime = Date.now(); + try { + const response = (await workspaceClient.apiClient.request({ + method: "POST", + path: `/api/2.0/vector-search/indexes/${params.indexName}/query`, + body, + headers: new Headers({ "Content-Type": "application/json" }), + raw: false, + query: {}, + })) as VsRawResponse; + + const duration = Date.now() - startTime; + span.setAttribute("vs.result_count", response.result.row_count); + span.setAttribute("vs.query_time_ms", response.debug_info?.response_time ?? 0); + span.setAttribute("vs.duration_ms", duration); + span.setStatus({ code: SpanStatusCode.OK }); + + logger.event()?.setContext("vector-search", { + index_name: params.indexName, + query_type: params.queryType, + result_count: response.result.row_count, + query_time_ms: response.debug_info?.response_time ?? 0, + duration_ms: duration, + }); + + return response; + } catch (error) { + span.recordException(error as Error); + span.setStatus({ + code: SpanStatusCode.ERROR, + message: error instanceof Error ? error.message : String(error), + }); + throw error; + } + }, + { name: "vector-search", includePrefix: true }, + ); } async queryNextPage( @@ -64,22 +126,53 @@ export class VectorSearchConnector { params: VsNextPageParams, signal?: AbortSignal, ): Promise { + if (signal?.aborted) { + throw new Error("Query cancelled before execution"); + } + logger.debug( "Fetching next page for index %s (endpoint=%s)", params.indexName, params.endpointName, ); - return (await workspaceClient.apiClient.request({ - method: "POST", - path: `/api/2.0/vector-search/indexes/${params.indexName}/query-next-page`, - body: { - endpoint_name: params.endpointName, - page_token: params.pageToken, + return this.telemetry.startActiveSpan( + "vector-search.queryNextPage", + { + kind: SpanKind.CLIENT, + attributes: { + "db.system": "databricks", + "vs.index_name": params.indexName, + "vs.endpoint_name": params.endpointName, + }, }, - headers: new Headers({ "Content-Type": "application/json" }), - raw: false, - query: {}, - })) as VsRawResponse; + async (span: Span) => { + try { + const response = (await workspaceClient.apiClient.request({ + method: "POST", + path: `/api/2.0/vector-search/indexes/${params.indexName}/query-next-page`, + body: { + endpoint_name: params.endpointName, + page_token: params.pageToken, + }, + headers: new Headers({ "Content-Type": "application/json" }), + raw: false, + query: {}, + })) as VsRawResponse; + + span.setAttribute("vs.result_count", response.result.row_count); + span.setStatus({ code: SpanStatusCode.OK }); + return response; + } catch (error) { + span.recordException(error as Error); + span.setStatus({ + code: SpanStatusCode.ERROR, + message: error instanceof Error ? error.message : String(error), + }); + throw error; + } + }, + { name: "vector-search", includePrefix: true }, + ); } } diff --git a/packages/appkit/src/connectors/vector-search/types.ts b/packages/appkit/src/connectors/vector-search/types.ts index 8e9f6c39..df042e8c 100644 --- a/packages/appkit/src/connectors/vector-search/types.ts +++ b/packages/appkit/src/connectors/vector-search/types.ts @@ -1,5 +1,8 @@ +import type { TelemetryOptions } from "shared"; + export interface VectorSearchConnectorConfig { timeout?: number; + telemetry?: TelemetryOptions; } export interface VsQueryParams { diff --git a/packages/appkit/src/plugins/vector-search/vector-search.ts b/packages/appkit/src/plugins/vector-search/vector-search.ts index b3b20a18..117f3e46 100644 --- a/packages/appkit/src/plugins/vector-search/vector-search.ts +++ b/packages/appkit/src/plugins/vector-search/vector-search.ts @@ -6,12 +6,10 @@ import { createLogger } from "../../logging/logger"; import { Plugin, toPlugin } from "../../plugin"; import type { PluginManifest } from "../../registry"; import type { VsRawResponse } from "../../connectors/vector-search/types"; -import { vectorSearchDefaults } from "./defaults"; import manifest from "./manifest.json"; import type { IVectorSearchConfig, IndexConfig, - SearchFilters, SearchRequest, SearchResponse, } from "./types"; @@ -30,7 +28,10 @@ export class VectorSearchPlugin extends Plugin { constructor(config: IVectorSearchConfig) { super(config); this.config = config; - this.connector = new VectorSearchConnector({ timeout: config.timeout }); + this.connector = new VectorSearchConnector({ + timeout: config.timeout, + telemetry: config.telemetry, + }); } async setup(): Promise { @@ -171,38 +172,33 @@ export class VectorSearchPlugin extends Plugin { body.columns ?? indexConfig.columns, ); - const result = await this.execute( - async (signal) => { - const workspaceClient = getWorkspaceClient(); - const raw = await this.connector.query( - workspaceClient, - { - indexName: indexConfig.indexName, - queryText, - queryVector, - columns: body.columns ?? indexConfig.columns, - numResults: body.numResults ?? indexConfig.numResults ?? 20, - queryType, - filters: body.filters, - reranker: rerankerConfig, - }, - signal, - ); - return this._parseResponse(raw, queryType); - }, - { default: vectorSearchDefaults }, - ); - - if (!result) { - res.status(500).json({ - code: "INTERNAL", - message: "Query execution failed", - statusCode: 500, + try { + const workspaceClient = getWorkspaceClient(); + const raw = await this.connector.query( + workspaceClient, + { + indexName: indexConfig.indexName, + queryText, + queryVector, + columns: body.columns ?? indexConfig.columns, + numResults: body.numResults ?? indexConfig.numResults ?? 20, + queryType, + filters: body.filters, + reranker: rerankerConfig, + }, + ); + res.json(this._parseResponse(raw, queryType)); + } catch (error) { + logger.error("Vector search query failed: %O", error); + const statusCode = + (error as { statusCode?: number }).statusCode ?? 500; + res.status(statusCode).json({ + code: (error as { code?: string }).code ?? "INTERNAL", + message: + error instanceof Error ? error.message : "Query execution failed", + statusCode, }); - return; } - - res.json(result); } async _handleNextPage( @@ -229,33 +225,28 @@ export class VectorSearchPlugin extends Plugin { return; } - const result = await this.execute( - async (signal) => { - const workspaceClient = getWorkspaceClient(); - const raw = await this.connector.queryNextPage( - workspaceClient, - { - indexName: indexConfig.indexName, - endpointName: indexConfig.endpointName!, - pageToken, - }, - signal, - ); - return this._parseResponse(raw, "hybrid"); - }, - { default: vectorSearchDefaults }, - ); - - if (!result) { - res.status(500).json({ - code: "INTERNAL", - message: "Next-page query failed", - statusCode: 500, + try { + const workspaceClient = getWorkspaceClient(); + const raw = await this.connector.queryNextPage( + workspaceClient, + { + indexName: indexConfig.indexName, + endpointName: indexConfig.endpointName!, + pageToken, + }, + ); + res.json(this._parseResponse(raw, "hybrid")); + } catch (error) { + logger.error("Vector search next-page query failed: %O", error); + const statusCode = + (error as { statusCode?: number }).statusCode ?? 500; + res.status(statusCode).json({ + code: (error as { code?: string }).code ?? "INTERNAL", + message: + error instanceof Error ? error.message : "Next-page query failed", + statusCode, }); - return; } - - res.json(result); } /**