diff --git a/lib/errors/AuthenticationError.ts b/lib/errors/AuthenticationError.ts index 54b3783c..c8588fa0 100644 --- a/lib/errors/AuthenticationError.ts +++ b/lib/errors/AuthenticationError.ts @@ -1,3 +1,8 @@ import HiveDriverError from './HiveDriverError'; -export default class AuthenticationError extends HiveDriverError {} +export default class AuthenticationError extends HiveDriverError { + constructor(message?: string) { + super(message); + this.name = 'AuthenticationError'; + } +} diff --git a/lib/sea/SeaAuth.ts b/lib/sea/SeaAuth.ts index cf16c80f..c6fd5178 100644 --- a/lib/sea/SeaAuth.ts +++ b/lib/sea/SeaAuth.ts @@ -16,19 +16,81 @@ import { ConnectionOptions } from '../contracts/IDBSQLClient'; import AuthenticationError from '../errors/AuthenticationError'; import HiveDriverError from '../errors/HiveDriverError'; +/** + * Default local listener port for the U2M authorization-code callback. + * Hardcoded here so the override of the kernel default (8020) to the + * thrift default (8030) is invariant for SEA callers — preserving parity + * with the existing Node driver. Not exposed on the public + * `ConnectionOptions` (thrift hides `callbackPorts` from its public + * surface too — see nodejs-thrift-expert survey §B.2). + */ +const U2M_DEFAULT_REDIRECT_PORT = 8030; + /** * Shape consumed by the napi-binding's `openSession()` (see - * `native/sea/index.d.ts`). M0 supports PAT only — `token` is required. + * `native/sea/index.d.ts`). Mirrors `ConnectionOptions` in the binding's + * `.d.ts`; declared locally to avoid coupling the JS-side adapter to the + * auto-generated TS file. + * + * Discriminated by `authMode`: + * - `'Pat'` → `token` is the PAT. + * - `'OAuthM2m'` → `oauthClientId` + `oauthClientSecret` drive a + * kernel-side client_credentials exchange. + * - `'OAuthU2m'` → `oauthRedirectPort` overrides the kernel default; + * everything else (client_id, scopes, callback timeout, + * token_url_override) uses kernel defaults. + * + * The `authMode` string literals MUST match the napi-emitted `AuthMode` + * variant names verbatim (`'Pat'`, `'OAuthM2m'`, `'OAuthU2m'` — napi-rs's + * `#[napi(string_enum)]` without an explicit case option emits the + * Rust variant identifier as-is). We duplicate the values here instead + * of importing `AuthMode` from `native/sea/index.d.ts` because that + * file declares `AuthMode` as `export const enum`, which is + * incompatible with `isolatedModules` and a runtime-coupling hazard. + * The Rust source of truth lives at `native/sea/src/database.rs`. + */ +/** + * Session-level defaults shared across all auth-mode variants. + * + * Mirrors `ConnectionOptions.catalog` / `.schema` / `.sessionConf` on + * the napi binding (kernel `Session::builder().defaults(DefaultOpts)` + * and `.session_conf(HashMap)` — the routes that actually populate SEA + * `CreateSession.catalog` / `.schema` / `.session_confs`). * - * Mirrors `ConnectionOptions` in the binding's `.d.ts`; declared locally - * to avoid coupling the JS-side adapter to the auto-generated TS file. + * Per-statement overrides do not exist on the kernel surface; both + * pyo3 and napi expose catalog / schema / sessionConf only at session + * creation. Mirror that here so the adapter doesn't promise a + * capability the binding can't honour. */ -export interface SeaNativeConnectionOptions { - hostName: string; - httpPath: string; - token: string; +export interface SeaSessionDefaults { + catalog?: string; + schema?: string; + sessionConf?: Record; } +export type SeaNativeConnectionOptions = SeaSessionDefaults & + ( + | { + hostName: string; + httpPath: string; + authMode: 'Pat'; + token: string; + } + | { + hostName: string; + httpPath: string; + authMode: 'OAuthM2m'; + oauthClientId: string; + oauthClientSecret: string; + } + | { + hostName: string; + httpPath: string; + authMode: 'OAuthU2m'; + oauthRedirectPort: number; + } + ); + function prependSlash(str: string): string { if (str.length > 0 && str.charAt(0) !== '/') { return `/${str}`; @@ -37,47 +99,188 @@ function prependSlash(str: string): string { } /** - * Validate that the user-supplied `ConnectionOptions` describe a PAT auth - * configuration and build the napi-binding's connection-options shape. + * Reject inputs that pass `typeof === 'string' && length > 0` but are + * structurally useless as credentials: whitespace-only strings, and the + * literal strings `'undefined'` / `'null'` (case-insensitive) that buggy + * shell exports (e.g. `export FOO="$UNSET_VAR"`) produce. Surfacing + * these here means an OAuth flow's `invalid_client` from the workspace + * is always a real credential mismatch, never a malformed-input passthrough. * - * M0 SCOPE: PAT only. - * - Accepts `authType: 'access-token'` and the undefined-authType default - * (which already means PAT throughout the existing driver — see + * Exported so the integration-test env-gate can reuse the same predicate + * and stay in lockstep with production (B-3 fix). + */ +export function isBlankOrReserved(s: string): boolean { + const normalized = s.trim().toLowerCase(); + return normalized.length === 0 || normalized === 'undefined' || normalized === 'null'; +} + +/** + * Validate the user-supplied `ConnectionOptions` and build the + * napi-binding's connection-options shape. + * + * Supported auth modes: + * - PAT: `authType: 'access-token'` (or undefined, which already means + * PAT throughout the existing driver — see * `DBSQLClient.createAuthProvider`). - * - Rejects every other `authType` discriminant with a clear - * "M0 supports only PAT" message so callers know OAuth / Federation / - * custom providers land in M1. + * - OAuth M2M: `authType: 'databricks-oauth'` + `oauthClientId` + + * `oauthClientSecret`. Kernel handles OIDC discovery, client_credentials + * exchange, and re-auth on expiry internally. + * - OAuth U2M: `authType: 'databricks-oauth'` + NO `oauthClientId` and + * NO `oauthClientSecret`. Kernel runs the PKCE auth-code dance (opens + * a browser, listens on localhost:8030, exchanges the code, persists + * to `~/.config/databricks-sql-kernel/oauth/{sha256}.json`). The flow + * selector keys off `oauthClientId` presence: present → M2M, absent → + * U2M. (Round-4 NF3-2 fix; previously secret-keyed — that variant + * routed a typo'd-secret M2M call to the U2M arm and swallowed the + * actionable error.) Mirrors thrift's intent at `DBSQLClient.ts:143`. + * + * Out of scope on the OAuth paths (rejected with a clear error): + * - `azureTenantId` / `useDatabricksOAuthInAzure` → Microsoft Entra + * direct flow. The kernel uses workspace-OIDC discovery (which works + * against Azure workspaces too — they serve `/oidc/.well-known/...`) + * and does not implement the Entra-direct scope-rewrite path. + * - `persistence` on M2M → M2M tokens are not cached (re-issuing is + * cheap; no refresh token). + * - `persistence` on U2M → custom token store is a parity gap; + * requires kernel-side `AuthConfig::External` plumbing. The kernel's + * auto-disk-cache works for the standard flow today. + * + * Ambiguity: + * - PAT path: rejects when OAuth fields (`oauthClientId` / + * `oauthClientSecret`) are simultaneously set. + * - OAuth path: rejects when `token` is set alongside OAuth fields. * * Throws: - * - `AuthenticationError` when the auth mode is PAT but `token` is missing - * or empty. - * - `HiveDriverError` when the auth mode is anything other than PAT. + * - `AuthenticationError` for missing/blank required credentials. + * - `HiveDriverError` for unsupported auth modes / Azure-direct / + * custom persistence / ambiguous combinations. */ export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNativeConnectionOptions { const { authType } = options as { authType?: string }; - if (authType !== undefined && authType !== 'access-token') { - throw new HiveDriverError( - `SEA backend (M0) supports only PAT auth (authType: 'access-token'); ` + - `got authType: '${authType}'. Other auth modes (databricks-oauth, ` + - `token-provider, external-token, static-token, custom) will land in M1.`, - ); + const base = { + hostName: options.host, + httpPath: prependSlash(options.path), + }; + + const oauth = options as { + oauthClientId?: string; + oauthClientSecret?: string; + azureTenantId?: string; + useDatabricksOAuthInAzure?: boolean; + persistence?: unknown; + }; + + if (authType === undefined || authType === 'access-token') { + const { token } = options as { token?: string }; + if (typeof token !== 'string' || isBlankOrReserved(token)) { + throw new AuthenticationError( + 'SEA backend: a non-empty PAT must be supplied via `token` when using `authType: \'access-token\'`.', + ); + } + if (oauth.oauthClientId !== undefined || oauth.oauthClientSecret !== undefined) { + throw new HiveDriverError( + 'SEA backend: cannot supply both `token` and `oauthClientId`/`oauthClientSecret` ' + + "on the same connection. Pick one: 'access-token' (PAT) uses `token`; " + + "'databricks-oauth' uses the OAuth fields.", + ); + } + return { ...base, authMode: 'Pat', token }; } - // PAT path — at this point `options` is structurally the access-token branch - // of `AuthOptions`, which guarantees a `token` field at the type level. We - // still defensively re-check because the public ConnectionOptions type - // permits `authType: undefined` with no token at runtime. - const { token } = options as { token?: string }; - if (typeof token !== 'string' || token.length === 0) { - throw new AuthenticationError( - 'SEA backend: a non-empty PAT must be supplied via `token` when using `authType: \'access-token\'`.', - ); + if (authType === 'databricks-oauth') { + if ((options as { token?: string }).token !== undefined) { + throw new HiveDriverError( + "SEA backend: cannot supply `token` alongside `authType: 'databricks-oauth'`. " + + "Use `authType: 'access-token'` for PAT, or omit `token` to use OAuth.", + ); + } + + if (oauth.azureTenantId !== undefined || oauth.useDatabricksOAuthInAzure === true) { + throw new HiveDriverError( + 'SEA backend: Azure-direct OAuth (azureTenantId / useDatabricksOAuthInAzure) ' + + 'is not supported. The workspace-OIDC discovery path handles Azure workspaces ' + + 'today without these options.', + ); + } + + // Flow selector mirrors thrift's `DBSQLClient.createAuthProvider` + // (`DBSQLClient.ts:143`): presence of `oauthClientId` indicates M2M + // intent, otherwise U2M. Routing decision is based on `oauthClientId` + // (the "do I have an id?" signal) rather than the secret, so a + // user who set an id but typoed/forgot the secret gets the M2M + // "secret is required" error instead of a U2M error that hides + // their actual intent. The U2M arm still defends against an id + // sneaking through: fires only when `oauthClientId` is provided as + // a blank-reserved literal (e.g., whitespace, `"null"`, `"undefined"`) + // alongside an absent/blank secret — both `idIsBlank` and + // `secretIsBlank` are true so U2M wins routing, but the caller's + // intent to use U2M with a partially-set id is ambiguous and + // rejected explicitly. + const idIsBlank = + oauth.oauthClientId === undefined || + (typeof oauth.oauthClientId === 'string' && isBlankOrReserved(oauth.oauthClientId)); + const secretIsBlank = + oauth.oauthClientSecret === undefined || + (typeof oauth.oauthClientSecret === 'string' && isBlankOrReserved(oauth.oauthClientSecret)); + + if (idIsBlank && secretIsBlank) { + // U2M — neither id nor secret supplied. + if (oauth.oauthClientId !== undefined) { + // Defense-in-depth: id was set but blank/reserved literal. + // The kernel hardcodes `client_id = "databricks-cli"` for U2M; + // there's no JS-side override knob. + throw new HiveDriverError( + 'SEA backend: `oauthClientId` is not supported on the OAuth U2M flow; ' + + "the kernel uses the built-in 'databricks-cli' client. " + + 'Omit `oauthClientId` for U2M, or supply `oauthClientSecret` for the M2M flow.', + ); + } + if (oauth.persistence !== undefined) { + throw new HiveDriverError( + 'SEA backend: `persistence` (custom OAuth token store) is not yet wired through ' + + 'to the kernel — requires `AuthConfig::External` plumbing. ' + + 'Today the kernel auto-persists U2M tokens to ' + + '`~/.config/databricks-sql-kernel/oauth/` which works for the standard flow; ' + + "the JS-supplied hook (matching thrift's `OAuthPersistence` interface) lands " + + 'when the kernel exposes it.', + ); + } + return { + ...base, + authMode: 'OAuthU2m', + oauthRedirectPort: U2M_DEFAULT_REDIRECT_PORT, + }; + } + + // M2M. + if (typeof oauth.oauthClientId !== 'string' || isBlankOrReserved(oauth.oauthClientId)) { + throw new AuthenticationError( + 'SEA backend: `oauthClientId` is required (non-empty, non-whitespace) for OAuth M2M.', + ); + } + if (typeof oauth.oauthClientSecret !== 'string' || isBlankOrReserved(oauth.oauthClientSecret)) { + throw new AuthenticationError( + 'SEA backend: `oauthClientSecret` must be a non-empty non-whitespace string for OAuth M2M.', + ); + } + if (oauth.persistence !== undefined) { + throw new HiveDriverError( + 'SEA backend: `persistence` is not supported on OAuth M2M ' + + '(M2M tokens have no refresh token; the kernel re-issues on expiry).', + ); + } + return { + ...base, + authMode: 'OAuthM2m', + oauthClientId: oauth.oauthClientId, + oauthClientSecret: oauth.oauthClientSecret, + }; } - return { - hostName: options.host, - httpPath: prependSlash(options.path), - token, - }; + throw new HiveDriverError( + `SEA backend: unsupported auth mode '${authType}'. ` + + "Supported modes on the SEA backend today: 'access-token' (PAT) and 'databricks-oauth' " + + '(M2M with oauthClientId+oauthClientSecret, or U2M with neither).', + ); } diff --git a/lib/sea/SeaBackend.ts b/lib/sea/SeaBackend.ts index 11c4ee78..61b1a333 100644 --- a/lib/sea/SeaBackend.ts +++ b/lib/sea/SeaBackend.ts @@ -22,35 +22,19 @@ import { SeaNativeBinding, SeaNativeConnection, } from './SeaNativeLoader'; -import { mapKernelErrorToJsError, KernelErrorShape } from './SeaErrorMapping'; +import { decodeNapiKernelError } from './SeaErrorMapping'; import { buildSeaConnectionOptions, SeaNativeConnectionOptions } from './SeaAuth'; import SeaSessionBackend from './SeaSessionBackend'; -/** - * Sentinel string the napi binding uses on `Error.reason` JSON envelopes. - * Keep in sync with `native/sea/src/error.rs` (`SENTINEL`). - */ -const KERNEL_ERROR_SENTINEL = '__databricks_error__:'; - -function rethrowKernelError(err: unknown): never { - if (err && typeof err === 'object' && 'message' in err) { - const reason = (err as { reason?: unknown }).reason; - if (typeof reason === 'string' && reason.startsWith(KERNEL_ERROR_SENTINEL)) { - try { - const payload = JSON.parse(reason.slice(KERNEL_ERROR_SENTINEL.length)) as KernelErrorShape; - throw mapKernelErrorToJsError(payload); - } catch (parseErr) { - if (parseErr !== err) { - throw parseErr; - } - } - } - } - throw err; -} - export interface SeaBackendOptions { - context: IClientContext; + /** + * Optional in the type so unit tests that only exercise the auth- + * routing surface (which doesn't touch context) can pass + * `{ nativeBinding }`. The constructor downcasts undefined to + * `IClientContext` because runtime callers from `DBSQLClient` always + * supply one — see `lib/DBSQLClient.ts` SEA seam. + */ + context?: IClientContext; /** * Optional injection seam for unit tests. When provided, replaces the * default `getSeaNative()` call so tests can swap in a mock napi @@ -69,9 +53,11 @@ export interface SeaBackendOptions { * use. The actual session open happens inside `openSession()`. * * **Auth validation:** delegates to `buildSeaConnectionOptions` from - * `SeaAuth`, which mirrors the existing DBSQLClient PAT validation - * pattern (slash-prepended httpPath, AuthenticationError on missing - * token, HiveDriverError on non-PAT authType naming M1 modes). + * `SeaAuth`, which mirrors the existing DBSQLClient validation pattern + * (slash-prepended httpPath, AuthenticationError on missing token or + * blank OAuth credentials, HiveDriverError on unsupported authType / + * Azure-direct / ambiguous credential combinations). M2M and U2M + * routing key off `oauthClientId` presence; see SeaAuth.ts. * * **Why we don't use IClientContext's connectionProvider here:** that * provider is the Thrift HTTP transport. The kernel owns its own @@ -103,28 +89,36 @@ export default class SeaBackend implements IBackend { throw new HiveDriverError('SeaBackend: not connected. Call connect() first.'); } + // Fold session-level defaults from the OpenSessionRequest into the + // napi `ConnectionOptions`. The kernel routes these through + // `Session::builder().defaults(DefaultOpts)` + `.session_conf(...)` + // so they land on the SEA `CreateSession` wire fields, not on each + // per-statement request. Matches pyo3's `Session.__new__` shape. + // + // Only set the optional keys when present so the napi call shape + // stays minimal — keeps wire snapshots / test assertions stable + // for callers who pass no defaults. + const sessionOptions: SeaNativeConnectionOptions = { ...this.nativeOptions }; + if (request.initialCatalog !== undefined) { + sessionOptions.catalog = request.initialCatalog; + } + if (request.initialSchema !== undefined) { + sessionOptions.schema = request.initialSchema; + } + if (request.configuration !== undefined) { + sessionOptions.sessionConf = { ...request.configuration }; + } + let nativeConnection: SeaNativeConnection; try { - nativeConnection = (await this.binding.openSession(this.nativeOptions)) as SeaNativeConnection; + nativeConnection = (await this.binding.openSession(sessionOptions)) as SeaNativeConnection; } catch (err) { - rethrowKernelError(err); + throw decodeNapiKernelError(err); } - // Merge `request.configuration` (the existing public field for Spark - // conf) with any backend-specific session config. The SEA wire - // protocol applies these per-statement, but we capture them at - // session-open time and forward with every executeStatement to - // preserve session-config semantics. - const sessionConfig = request.configuration ? { ...request.configuration } : undefined; - return new SeaSessionBackend({ connection: nativeConnection!, context: this.context, - defaults: { - initialCatalog: request.initialCatalog, - initialSchema: request.initialSchema, - sessionConfig, - }, }); } diff --git a/lib/sea/SeaErrorMapping.ts b/lib/sea/SeaErrorMapping.ts index 7e8a5534..d7bec2ee 100644 --- a/lib/sea/SeaErrorMapping.ts +++ b/lib/sea/SeaErrorMapping.ts @@ -3,6 +3,15 @@ import AuthenticationError from '../errors/AuthenticationError'; import OperationStateError, { OperationStateErrorCode } from '../errors/OperationStateError'; import ParameterError from '../errors/ParameterError'; +/** + * Sentinel prefix the napi binding's `napi_err_from_kernel` puts on + * `Error.message` when the underlying failure was a structured kernel + * `Error` rather than a plain napi `InvalidArg` from binding-side + * validation. Defined here (and in `native/sea/src/error.rs:44`) — the + * two MUST stay in lockstep. + */ +const ERROR_SENTINEL = '__databricks_error__:'; + /** * Shape of the kernel error surfaced by the napi-binding's `napi_err_from_kernel`. * @@ -43,31 +52,51 @@ export type KernelErrorCode = | 'SqlError'; /** - * An `Error` with a preserved SQLSTATE on the `sqlState` property. Used as the - * narrowed return type of {@link mapKernelErrorToJsError} so callers that need - * the SQLSTATE can `error.sqlState` without an `any` cast. + * Optional metadata fields the kernel may attach via the + * `__databricks_error__:` envelope (per `native/sea/src/error.rs:50-89`). + * + * `errorCode` is namespaced under `kernelMetadata` rather than placed at + * the top level because `OperationStateError` already declares a top-level + * `errorCode: enum` field, and `DBSQLOperation.ts:209` switches on it + * (`err.errorCode === OperationStateErrorCode.Canceled`). Top-level + * defineProperty would clobber that enum with a kernel string and break + * cancel/close detection. + */ +export interface KernelMetadata { + errorCode?: string; + vendorCode?: number; + httpStatus?: number; + retryable?: boolean; + queryId?: string; +} + +/** + * An `Error` carrying optional SEA-side kernel context. `sqlState` is + * exposed at the top level (no collision in the existing driver error + * tree); the remaining envelope fields live under a `kernelMetadata` + * namespace to avoid clobbering pre-existing `errorCode` semantics on + * `OperationStateError`. */ export interface ErrorWithSqlState extends Error { sqlState?: string; + kernelMetadata?: KernelMetadata; } /** - * Attach the kernel's SQLSTATE to the JS error object via the `sqlState` property. - * The driver has no pre-existing `sqlState` convention (no other error class - * sets it today) so this single helper defines it for the SEA path. + * Attach a non-enumerable own-property to the error. The shape matches + * Node's convention for attaching `.code` to system errors: + * non-enumerable (clean `JSON.stringify`) but readable via direct + * property access and `Object.getOwnPropertyDescriptor`. One helper for + * both the top-level `sqlState` and the namespaced `kernelMetadata` + * object so the `defineProperty` flags live in exactly one place. */ -function attachSqlState(error: ErrorWithSqlState, sqlstate?: string): ErrorWithSqlState { - if (sqlstate !== undefined) { - // Using Object.defineProperty so the property is non-enumerable but still - // visible via direct access — matches the way Node attaches `.code` to system errors. - Object.defineProperty(error, 'sqlState', { - value: sqlstate, - writable: true, - enumerable: false, - configurable: true, - }); - } - return error; +function defineErrorMetadata(error: Error, key: K, value: V): void { + Object.defineProperty(error, key, { + value, + writable: true, + enumerable: false, + configurable: true, + }); } /** @@ -137,5 +166,124 @@ export function mapKernelErrorToJsError(kErr: KernelErrorShape): ErrorWithSqlSta break; } - return attachSqlState(error, sqlstate); + if (sqlstate !== undefined) { + defineErrorMetadata(error, 'sqlState', sqlstate); + } + return error; +} + +/** + * Build a {@link KernelMetadata} object from a parsed envelope, applying + * per-field type validation. A kernel-side bug that emits, say, + * `retryable: "true"` (string) instead of `true` (boolean) would + * otherwise leak the wrong-typed value through to JS callers; the + * type-guard discards the malformed field rather than passing it through. + */ +function buildKernelMetadata(parsed: Record): KernelMetadata { + const meta: KernelMetadata = {}; + if (typeof parsed.errorCode === 'string') { + meta.errorCode = parsed.errorCode; + } + if (typeof parsed.vendorCode === 'number') { + meta.vendorCode = parsed.vendorCode; + } + if (typeof parsed.httpStatus === 'number') { + meta.httpStatus = parsed.httpStatus; + } + if (typeof parsed.retryable === 'boolean') { + meta.retryable = parsed.retryable; + } + if (typeof parsed.queryId === 'string') { + meta.queryId = parsed.queryId; + } + return meta; +} + +/** + * Decode a napi-binding error into the typed JS error class. + * + * Two paths: + * - Structured kernel error: `Error.message` starts with + * {@link ERROR_SENTINEL} followed by a JSON envelope. We strip the + * sentinel, parse the JSON, route the {@link KernelErrorShape} + * through {@link mapKernelErrorToJsError}, and attach the remaining + * envelope fields under a single non-enumerable `kernelMetadata` + * namespace. Namespacing avoids the collision with + * `OperationStateError.errorCode` (an enum already switched on at the + * JS layer — see `DBSQLOperation.ts:209`). + * - Binding-side error (e.g. `napi::Error::new(InvalidArg, "openSession: + * \`token\` is required for the requested auth mode")` produced by + * the binding's own validation): returned unchanged. These don't + * carry kernel `code` info, so we surface them as-is. + * + * Non-`Error` values (e.g. a `Promise.reject('string')`) pass through + * wrapped in `HiveDriverError` so callers always see an `Error` + * subclass. + */ +export function decodeNapiKernelError(err: unknown): Error { + if (!(err instanceof Error)) { + return new HiveDriverError(typeof err === 'string' ? err : 'SEA backend: unknown error'); + } + + const { message } = err; + if (typeof message !== 'string' || !message.startsWith(ERROR_SENTINEL)) { + return err; + } + + const jsonStr = message.slice(ERROR_SENTINEL.length); + let parsed: unknown; + try { + parsed = JSON.parse(jsonStr); + } catch { + // Corrupted envelope — surface the raw post-sentinel payload rather + // than silently dropping the original error. Strip the internal + // `__databricks_error__:` prefix; it's a binding/JS-side framing + // marker, not user-actionable, and leaking it makes the message + // confusing to operators triaging a malformed kernel response. + // + // Mutate in place when possible so the napi-binding's original + // stack survives — that stack is the only useful triage signal on + // a malformed-envelope path (where did a sentinel-prefixed + // non-JSON message come from?). Fall back to a fresh + // `HiveDriverError` only if a future napi-rs revision makes + // `Error.message` non-writable (no such guarantee today, but the + // descriptor contract is implementation-defined). + try { + err.message = jsonStr; + return err; + } catch { + return new HiveDriverError(jsonStr); + } + } + + if ( + typeof parsed !== 'object' || + parsed === null || + typeof (parsed as { code?: unknown }).code !== 'string' || + typeof (parsed as { message?: unknown }).message !== 'string' + ) { + return err; + } + + const envelope = parsed as Record; + const code = envelope.code as string; + const msg = envelope.message as string; + const sqlState = typeof envelope.sqlState === 'string' ? envelope.sqlState : undefined; + + const jsErr = mapKernelErrorToJsError({ code, message: msg, sqlstate: sqlState }); + + const meta = buildKernelMetadata(envelope); + // Skip the namespace attachment entirely when no fields validated + // through — keeps `err.kernelMetadata` absent rather than `{}` for + // simple envelopes (the common case). + if ( + meta.errorCode !== undefined || + meta.vendorCode !== undefined || + meta.httpStatus !== undefined || + meta.retryable !== undefined || + meta.queryId !== undefined + ) { + defineErrorMetadata(jsErr, 'kernelMetadata', meta); + } + return jsErr; } diff --git a/lib/sea/SeaNativeLoader.ts b/lib/sea/SeaNativeLoader.ts index a685b5e7..96c422b0 100644 --- a/lib/sea/SeaNativeLoader.ts +++ b/lib/sea/SeaNativeLoader.ts @@ -26,13 +26,12 @@ import type { Connection as NativeConnection, Statement as NativeStatement, - ConnectionOptions, - ExecuteOptions, ArrowBatch, ArrowSchema, } from '@sea-native'; +import type { SeaNativeConnectionOptions } from './SeaAuth'; -export type { ConnectionOptions, ExecuteOptions, ArrowBatch, ArrowSchema }; +export type { ArrowBatch, ArrowSchema, SeaNativeConnectionOptions }; export type Connection = NativeConnection; export type Statement = NativeStatement; @@ -40,13 +39,21 @@ export type Statement = NativeStatement; // refactor renamed these. New code should import the un-prefixed names. export type SeaNativeConnection = NativeConnection; export type SeaNativeStatement = NativeStatement; -export type SeaExecuteOptions = ExecuteOptions; export type SeaArrowBatch = ArrowBatch; export type SeaArrowSchema = ArrowSchema; export interface SeaNativeBinding { version(): string; - openSession(options: ConnectionOptions): Promise; + /** + * Open a session over PAT, OAuth M2M, or OAuth U2M auth. Returns an + * opaque Connection. The discriminated `SeaNativeConnectionOptions` + * union from `SeaAuth` is the source of truth for the per-mode + * required fields, so the loader-seam enforces the same compile-time + * check the adapter applies — a caller can't bypass + * `buildSeaConnectionOptions` and pass, say, `{ authMode: 'Pat' }` + * with no token. + */ + openSession(opts: SeaNativeConnectionOptions): Promise; Connection: typeof NativeConnection; Statement: typeof NativeStatement; } diff --git a/lib/sea/SeaSessionBackend.ts b/lib/sea/SeaSessionBackend.ts index ea8d54d3..a79759ea 100644 --- a/lib/sea/SeaSessionBackend.ts +++ b/lib/sea/SeaSessionBackend.ts @@ -31,52 +31,14 @@ import { import Status from '../dto/Status'; import InfoValue from '../dto/InfoValue'; import HiveDriverError from '../errors/HiveDriverError'; -import { SeaNativeConnection, SeaExecuteOptions } from './SeaNativeLoader'; -import { mapKernelErrorToJsError, KernelErrorShape } from './SeaErrorMapping'; +import { SeaNativeConnection } from './SeaNativeLoader'; +import { decodeNapiKernelError } from './SeaErrorMapping'; import SeaOperationBackend from './SeaOperationBackend'; -const KERNEL_ERROR_SENTINEL = '__databricks_error__:'; - -function rethrowKernelError(err: unknown): never { - if (err && typeof err === 'object' && 'message' in err) { - const reason = (err as { reason?: unknown }).reason; - if (typeof reason === 'string' && reason.startsWith(KERNEL_ERROR_SENTINEL)) { - try { - const payload = JSON.parse(reason.slice(KERNEL_ERROR_SENTINEL.length)) as KernelErrorShape; - throw mapKernelErrorToJsError(payload); - } catch (parseErr) { - if (parseErr !== err) { - throw parseErr; - } - } - } - } - throw err; -} - -/** - * Per-session defaults that apply to every `executeStatement` issued - * through this backend. Captured at `SeaBackend.openSession()` time from - * the `OpenSessionRequest` — `initialCatalog` / `initialSchema` / - * `sessionConfig`. - * - * The napi binding routes these to the kernel's `statement_conf` map, - * which the SEA wire treats as session-scoped parameters. They are - * forwarded with every `executeStatement` call so the JDBC-style - * "session config" semantics are preserved even though SEA's wire - * protocol is statement-scoped. - */ -export interface SeaSessionDefaults { - initialCatalog?: string; - initialSchema?: string; - sessionConfig?: Record; -} - export interface SeaSessionBackendOptions { /** The opaque napi `Connection` handle returned by `openSession`. */ connection: SeaNativeConnection; context: IClientContext; - defaults?: SeaSessionDefaults; /** Optional override for `id`. Defaults to a fresh UUIDv4. */ id?: string; } @@ -91,30 +53,26 @@ export interface SeaSessionBackendOptions { * backend continues to handle the metadata path by default (callers * opt into SEA via `ConnectionOptions.useSEA`). * - * **Session config flow:** the SEA wire protocol is statement-scoped, - * so "session config" semantics (Spark conf, `initialCatalog`, - * `initialSchema`) are emulated by forwarding the same defaults with - * every `executeStatement` call. Per-statement overrides on - * `ExecuteStatementOptions` are reserved for M1; M0 carries only the - * defaults captured at session-open time plus the `useCloudFetch` - * boolean projected onto `sessionConfig.use_cloud_fetch` for the - * kernel. + * **Session config flow:** catalog / schema / sessionConf are applied + * once at session creation (kernel `Session::builder().defaults()` + + * `.session_conf()` → SEA `CreateSession.catalog` / `.schema` / + * `.session_confs`) and remain in effect for every statement run on + * the resulting napi `Connection`. No per-statement forwarding is + * needed — that pattern was removed when the napi binding moved these + * onto `openSession` to match pyo3. */ export default class SeaSessionBackend implements ISessionBackend { private readonly connection: SeaNativeConnection; private readonly context: IClientContext; - private readonly defaults: SeaSessionDefaults; - private readonly _id: string; private closed = false; - constructor({ connection, context, defaults, id }: SeaSessionBackendOptions) { + constructor({ connection, context, id }: SeaSessionBackendOptions) { this.connection = connection; this.context = context; - this.defaults = defaults ?? {}; this._id = id ?? uuidv4(); } @@ -127,13 +85,21 @@ export default class SeaSessionBackend implements ISessionBackend { } /** - * Execute a SQL statement through the napi binding. Merges the - * session-level defaults (`initialCatalog` / `initialSchema` / - * `sessionConfig`) with the per-call `useCloudFetch` override. + * Execute a SQL statement through the napi binding. + * + * Catalog / schema / sessionConf were applied at session open, so + * there are no per-statement options to thread through. * * M0 intentionally rejects `queryTimeout`, `namedParameters`, and - * `ordinalParameters` with explicit deferred-to-M1 errors. The Thrift - * backend remains the path for consumers that need any of those today. + * `ordinalParameters` with explicit deferred-to-M1 errors. `useCloudFetch` + * is a no-op on the SEA path — the kernel hardcodes the SEA + * `disposition` to `INLINE_OR_EXTERNAL_LINKS`, and per-statement + * conf overrides have no reader on the kernel; cloud-fetch behaviour + * is governed entirely by the kernel's `ResultConfig` (M1 binding + * surface). + * + * The Thrift backend remains the path for consumers that need any + * of those today. */ public async executeStatement(statement: string, options: ExecuteStatementOptions): Promise { this.failIfClosed(); @@ -150,26 +116,11 @@ export default class SeaSessionBackend implements ISessionBackend { ); } - // Merge session-level sessionConfig with per-statement useCloudFetch. - // The kernel accepts only string-valued conf values; booleans are - // String()'d to "true"/"false" matching the existing Thrift conf - // convention. - const sessionConfig: Record = { ...(this.defaults.sessionConfig ?? {}) }; - if (options.useCloudFetch !== undefined) { - sessionConfig.use_cloud_fetch = String(options.useCloudFetch); - } - - const executeOptions: SeaExecuteOptions = { - initialCatalog: this.defaults.initialCatalog, - initialSchema: this.defaults.initialSchema, - sessionConfig: Object.keys(sessionConfig).length > 0 ? sessionConfig : undefined, - }; - let nativeStatement; try { - nativeStatement = await this.connection.executeStatement(statement, executeOptions); + nativeStatement = await this.connection.executeStatement(statement); } catch (err) { - rethrowKernelError(err); + throw decodeNapiKernelError(err); } return new SeaOperationBackend({ statement: nativeStatement!, @@ -220,7 +171,7 @@ export default class SeaSessionBackend implements ISessionBackend { try { await this.connection.close(); } catch (err) { - rethrowKernelError(err); + throw decodeNapiKernelError(err); } this.closed = true; return Status.success(); diff --git a/native/sea/index.d.ts b/native/sea/index.d.ts index 5fb5e902..658feeea 100644 --- a/native/sea/index.d.ts +++ b/native/sea/index.d.ts @@ -3,27 +3,17 @@ /* auto-generated by NAPI-RS */ -/** - * JS-visible per-execute options. M0 only carries - * initialCatalog / initialSchema / sessionConfig — parameters and - * per-statement overrides land in M1. - */ -export interface ExecuteOptions { - /** Default catalog applied to this statement via session conf. */ - initialCatalog?: string - /** Default schema applied to this statement via session conf. */ - initialSchema?: string - /** - * Per-statement session conf overrides (forwarded to SEA - * `parameters` / Thrift `confOverlay`). - */ - sessionConfig?: Record -} /** * JS-visible options for opening a Databricks SQL session over PAT. * * M0 supports PAT only — `token` is required. OAuth M2M / U2M variants * land in M1 along with a discriminated-union shape on the JS side. + * + * Catalog / schema / sessionConf are applied once at session creation + * and remain in effect for every statement run on the resulting + * `Connection`. The SEA wire protocol carries them on + * `CreateSession`, not on `ExecuteStatement` — so there is no + * per-statement override path in either this binding or pyo3. */ export interface ConnectionOptions { /** @@ -41,6 +31,24 @@ export interface ConnectionOptions { * empty PATs at session construction). */ token: string + /** + * Default catalog for statements executed on this session. + * Routed through the kernel's `DefaultOpts` and onto the SEA + * `CreateSession.catalog` wire field. + */ + catalog?: string + /** + * Default schema for statements executed on this session. + * Routed through the kernel's `DefaultOpts` and onto the SEA + * `CreateSession.schema` wire field. + */ + schema?: string + /** + * Server-bound session conf (Spark conf, `ANSI_MODE`, `TIMEZONE`, + * query-tag presets, …). Forwarded verbatim to SEA + * `session_confs`. Unknown keys are rejected server-side. + */ + sessionConf?: Record } /** * Open a Databricks SQL session over PAT auth and return an opaque @@ -86,8 +94,12 @@ export declare class Connection { /** * Execute a SQL statement and return a Statement handle that * streams batches via `fetchNextBatch()`. + * + * No per-statement options: catalog / schema / sessionConf are + * session-level (`openSession`). Positional / named parameters + * land in M1 via `Statement::spec().param(…)` on the kernel. */ - executeStatement(sql: string, options: ExecuteOptions): Promise + executeStatement(sql: string): Promise /** * Explicit close. Marks the connection wrapper as closed so * subsequent calls on this `Connection` return `InvalidArg`, then diff --git a/tests/integration/sea/auth-m2m-e2e.test.ts b/tests/integration/sea/auth-m2m-e2e.test.ts new file mode 100644 index 00000000..12f9c438 --- /dev/null +++ b/tests/integration/sea/auth-m2m-e2e.test.ts @@ -0,0 +1,119 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import { DBSQLClient } from '../../../lib'; +import AuthenticationError from '../../../lib/errors/AuthenticationError'; +import { isBlankOrReserved } from '../../../lib/sea/SeaAuth'; + +/** + * sea-auth M1 OAuth M2M end-to-end: + * 1. Construct a DBSQLClient. + * 2. `connect({ useSEA: true, authType: 'databricks-oauth', oauthClientId, + * oauthClientSecret })` against pecotesting. + * 3. `openSession()` — kernel runs OIDC discovery + client_credentials + * exchange. Successful openSession is the proof that the kernel-side + * OAuth M2M plumbing works end-to-end: discovery + token exchange + + * Bearer header on the create-session request all succeeded. + * 4. Close the session, then the client. + * + * No query is executed here — execution is the responsibility of the + * sea-execution feature's own e2e (mirror of the M0 PAT e2e scope at + * `auth-pat-e2e.test.ts`). If kernel-side OAuth fails, `openSession()` + * raises before returning. + * + * Required env (exported by `/home/madhavendra.rathore/.zshrc` on the developer machine): + * - DATABRICKS_PECOTESTING_SERVER_HOSTNAME + * - DATABRICKS_PECOTESTING_HTTP_PATH2 (second pecotesting warehouse — AAD SP registered here) + * - DATABRICKS_PECOTESTING_AAD_CLIENT_ID (Azure AD SP registered on pecotesting) + * - DATABRICKS_PECOTESTING_AAD_CLIENT_SECRET (matching secret) + * + * Skipped (not failed) when any of the four env vars is missing, so CI + * machines without OAuth credentials don't fail-flap. + */ +describe('sea-auth e2e — OAuth M2M through DBSQLClient ↔ SeaBackend ↔ napi binding', function suite() { + const host = process.env.DATABRICKS_PECOTESTING_SERVER_HOSTNAME; + const path = process.env.DATABRICKS_PECOTESTING_HTTP_PATH2; + const oauthClientId = process.env.DATABRICKS_PECOTESTING_AAD_CLIENT_ID; + const oauthClientSecret = process.env.DATABRICKS_PECOTESTING_AAD_CLIENT_SECRET; + + this.timeout(120_000); + + before(function gate() { + // Reject not just absent env vars but also blank/whitespace/literal- + // `'undefined'`/`'null'` values from buggy shell exports — these + // would otherwise reach the workspace as bogus creds and yield an + // `invalid_client` indistinguishable from a real SP-not-registered + // issue. Reuse the production `isBlankOrReserved` predicate so the + // test gate stays in lockstep with the case-insensitive variant + // shipped in round-2 (B-3 fix). + const looksReal = (s: string | undefined): s is string => + typeof s === 'string' && !isBlankOrReserved(s); + if (!looksReal(host) || !looksReal(path) || !looksReal(oauthClientId) || !looksReal(oauthClientSecret)) { + // eslint-disable-next-line no-invalid-this + this.skip(); + } + }); + + it('connects, opens a session, closes the session, closes the client', async () => { + const client = new DBSQLClient(); + + const connected = await client.connect({ + host: host as string, + path: path as string, + authType: 'databricks-oauth', + oauthClientId: oauthClientId as string, + oauthClientSecret: oauthClientSecret as string, + useSEA: true, + }); + expect(connected).to.equal(client); + + const session = await client.openSession(); + expect(session.id).to.be.a('string'); + expect(session.id.length).to.be.greaterThan(0); + + const status = await session.close(); + expect(status.isSuccess).to.equal(true); + + await client.close(); + }); + + // Negative path — proves the kernel-side OAuth error path is intact + // and surfaces as the typed `AuthenticationError` (DA-F1 + DA-F6). + // Distinguishes "creds wrong" (this test passes with bogus secret) + // from "all code broken" (this test fails with a non-AuthenticationError). + it('rejects with AuthenticationError when oauthClientSecret is deliberately wrong', async () => { + const client = new DBSQLClient(); + + await client.connect({ + host: host as string, + path: path as string, + authType: 'databricks-oauth', + oauthClientId: oauthClientId as string, + oauthClientSecret: 'definitely-not-the-real-secret-deadbeef', + useSEA: true, + }); + + let caught: unknown; + try { + await client.openSession(); + } catch (e) { + caught = e; + } + expect(caught).to.be.instanceOf(AuthenticationError); + expect((caught as Error).message).to.match(/invalid_client/i); + + await client.close(); + }); +}); diff --git a/tests/integration/sea/auth-u2m-e2e.test.ts b/tests/integration/sea/auth-u2m-e2e.test.ts new file mode 100644 index 00000000..93d7c9c3 --- /dev/null +++ b/tests/integration/sea/auth-u2m-e2e.test.ts @@ -0,0 +1,72 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import { DBSQLClient } from '../../../lib'; + +/** + * sea-auth M1 OAuth U2M end-to-end — **SKIPPED pending browser harness**. + * + * U2M is interactive: the kernel opens a system browser + * (`auth/oauth/u2m.rs:414`, via the `open` crate), binds a local + * listener on port 8030 (via the JS adapter's hardcoded override), and + * waits up to 120s for the user to authenticate. + * + * Driving this from CI requires Playwright/Puppeteer to navigate the + * browser through the workspace login + consent screens. That harness + * is tracked as `TBD-oauth_u2m_test_harness` in testing-agent's + * findings; until it exists, this test stays `it.skip` so the e2e + * suite carries a slot for whoever lands the harness work. + * + * The intended assertion sequence (mirrors `auth-m2m-e2e.test.ts`): + * 1. `client.connect({ useSEA: true, authType: 'databricks-oauth' })` + * — NO `oauthClientSecret` → kernel picks the U2M flow. + * 2. `openSession()` — kernel opens browser, waits for callback on + * localhost:8030, exchanges the auth code, returns Bearer token, + * issues the create-session request to SEA. + * 3. `session.close()` then `client.close()`. + * + * Required env (gated additionally via `it.skip` until the harness + * lands, so absent env is a no-op today): + * - DATABRICKS_PECOTESTING_SERVER_HOSTNAME + * - DATABRICKS_PECOTESTING_HTTP_PATH + * - (no client_id/secret — U2M uses kernel default `databricks-cli`) + */ +describe('sea-auth e2e — OAuth U2M through DBSQLClient ↔ SeaBackend ↔ napi binding', function suite() { + this.timeout(300_000); + + // eslint-disable-next-line mocha/no-skipped-tests + it.skip('[pending TBD-oauth_u2m_test_harness] interactive U2M round-trip', async () => { + const host = process.env.DATABRICKS_PECOTESTING_SERVER_HOSTNAME as string; + const path = process.env.DATABRICKS_PECOTESTING_HTTP_PATH as string; + + const client = new DBSQLClient(); + + const connected = await client.connect({ + host, + path, + authType: 'databricks-oauth', + useSEA: true, + }); + expect(connected).to.equal(client); + + const session = await client.openSession(); + expect(session.id).to.be.a('string'); + + const status = await session.close(); + expect(status.isSuccess).to.equal(true); + + await client.close(); + }); +}); diff --git a/tests/unit/sea/_helpers/fakeBinding.ts b/tests/unit/sea/_helpers/fakeBinding.ts new file mode 100644 index 00000000..a36082ed --- /dev/null +++ b/tests/unit/sea/_helpers/fakeBinding.ts @@ -0,0 +1,60 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { SeaNativeBinding, SeaNativeConnection } from '../../../../lib/sea/SeaNativeLoader'; + +export interface RecordedCall { + method: string; + args: unknown[]; +} + +export interface FakeBinding { + binding: SeaNativeBinding; + calls: RecordedCall[]; +} + +/** + * Build a fake `SeaNativeBinding` that records every `openSession` call + * and returns a `Connection` whose `close()` is also recorded. Shared + * across the SEA auth unit-test files (PAT / M2M / U2M / future flows) + * so the closure shape lives in exactly one place. + * + * No real native code runs — the fake is structural-typing-only. + */ +export function makeFakeBinding(): FakeBinding { + const calls: RecordedCall[] = []; + + const fakeConnection = { + async executeStatement() { + throw new Error('not used in this test'); + }, + async close() { + calls.push({ method: 'connection.close', args: [] }); + }, + }; + + const binding: SeaNativeBinding = { + version() { + return 'fake-binding'; + }, + async openSession(opts: Parameters[0]) { + calls.push({ method: 'openSession', args: [opts] }); + return fakeConnection as unknown as SeaNativeConnection; + }, + Connection: function FakeConnection() {} as unknown as Function, + Statement: function FakeStatement() {} as unknown as Function, + }; + + return { binding, calls }; +} diff --git a/tests/unit/sea/auth-edge-cases.test.ts b/tests/unit/sea/auth-edge-cases.test.ts new file mode 100644 index 00000000..b2e752ef --- /dev/null +++ b/tests/unit/sea/auth-edge-cases.test.ts @@ -0,0 +1,637 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import SeaBackend from '../../../lib/sea/SeaBackend'; +import { buildSeaConnectionOptions } from '../../../lib/sea/SeaAuth'; +import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; +import AuthenticationError from '../../../lib/errors/AuthenticationError'; +import HiveDriverError from '../../../lib/errors/HiveDriverError'; +import { makeFakeBinding } from './_helpers/fakeBinding'; + +describe('SeaAuth — edge cases (input validation + ambiguity)', () => { + describe('whitespace-only and reserved-literal credentials are rejected', () => { + it('rejects whitespace-only PAT', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + token: ' \t ', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + AuthenticationError, + /non-empty PAT/, + ); + }); + + it('rejects literal "undefined" as PAT (buggy shell-export hazard)', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + token: 'undefined', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + AuthenticationError, + /non-empty PAT/, + ); + }); + + it('rejects literal "null" as PAT', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + token: 'null', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + AuthenticationError, + /non-empty PAT/, + ); + }); + + it('rejects mixed-case "UNDEFINED" / "Null" / "NULL" as PAT (case-insensitive)', () => { + for (const reserved of ['UNDEFINED', 'Undefined', 'Null', 'NULL', 'nUlL']) { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + token: reserved, + }; + + expect(() => buildSeaConnectionOptions(opts), `for token=${reserved}`).to.throw( + AuthenticationError, + /non-empty PAT/, + ); + } + }); + + // Round-4 NF3-2: presence of `oauthClientId` signals M2M intent. + // A blank/reserved-literal `oauthClientSecret` is then a missing-secret + // typo, not a request to fall back to U2M. Surface the M2M "secret + // required" AuthenticationError so the user fixes the real problem + // rather than swap class to a HiveDriverError pointing at a flow + // they didn't intend to use. + it('rejects mixed-case reserved-literal oauthClientSecret with AuthenticationError when id is set', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: 'NULL', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + AuthenticationError, + /oauthClientSecret.*non-empty.*OAuth M2M/, + ); + }); + + it('rejects whitespace-only oauthClientId on M2M', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: ' ', + oauthClientSecret: 'dose-fake-secret', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + AuthenticationError, + /oauthClientId.*required/, + ); + }); + + it('rejects whitespace-only oauthClientSecret with AuthenticationError when oauthClientId is set (M2M intent)', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: '\n\t', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + AuthenticationError, + /oauthClientSecret.*non-empty.*OAuth M2M/, + ); + }); + + it('rejects literal "undefined" as oauthClientId on M2M', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'undefined', + oauthClientSecret: 'dose-fake-secret', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + AuthenticationError, + /oauthClientId.*required/, + ); + }); + + it('rejects literal "undefined" as oauthClientSecret with AuthenticationError when id is set (M2M intent)', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: 'undefined', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + AuthenticationError, + /oauthClientSecret.*non-empty.*OAuth M2M/, + ); + }); + + // Round-4 NF3-2: pin the exact class against the round-3 NF-N3 + // regression where M2M-with-empty-secret was routed through the U2M + // arm and raised a bare `HiveDriverError`. `instanceof + // AuthenticationError` correctly returns `false` for a bare + // `HiveDriverError` instance (instanceof is a one-way subclass + // check), so the subclass check IS sufficient to catch the + // regression. We don't add an `error.name` or `constructor.name` + // belt — the former requires `this.name` on the subclass (LE4-1 + // handles that separately for downstream-consumer benefit, not for + // this test), and the latter is bundler-fragile (terser/esbuild + // strip class names without `keep_classnames`). + it('M2M-with-empty-secret throws AuthenticationError, not bare HiveDriverError (class pin)', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'x', + oauthClientSecret: '', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + AuthenticationError, + /oauthClientSecret.*non-empty.*OAuth M2M/, + ); + }); + + // Round-5 DA4-2: the round-3 → round-4 test flips left the U2M-arm + // defense-in-depth U2M+id rejection without coverage. It's still + // reachable: when `oauthClientId` is a blank-reserved literal + // (whitespace, `"null"`, `"undefined"`) AND `oauthClientSecret` is + // absent/blank, BOTH `idIsBlank` and `secretIsBlank` are true so + // U2M wins routing — but a non-undefined id signals ambiguity that + // U2M cannot honor (the kernel hardcodes `databricks-cli`). + it('routes a whitespace oauthClientId with no oauthClientSecret to the U2M defense-in-depth rejection', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: ' ', + } as unknown as ConnectionOptions; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /oauthClientId.*not supported on the OAuth U2M flow/, + ); + }); + }); + + describe('ambiguous credentials are rejected', () => { + it('rejects PAT path with stray oauthClientId', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'access-token', + token: 'dapi-fake-pat', + // eslint-disable-next-line @typescript-eslint/no-explicit-any + oauthClientId: 'client-uuid', + } as any; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /cannot supply both `token` and `oauthClientId/, + ); + }); + + it('rejects PAT path with stray oauthClientSecret', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'access-token', + token: 'dapi-fake-pat', + // eslint-disable-next-line @typescript-eslint/no-explicit-any + oauthClientSecret: 'dose-fake-secret', + } as any; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /cannot supply both `token` and `oauthClientId/, + ); + }); + + it('rejects M2M path with stray token', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: 'dose-fake-secret', + // eslint-disable-next-line @typescript-eslint/no-explicit-any + token: 'dapi-fake-pat', + } as any; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /cannot supply `token` alongside `authType: 'databricks-oauth'`/, + ); + }); + + it('rejects U2M path with stray token', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + // no client secret → would be U2M, but token is set → rejected first + // eslint-disable-next-line @typescript-eslint/no-explicit-any + token: 'dapi-fake-pat', + } as any; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /cannot supply `token` alongside `authType: 'databricks-oauth'`/, + ); + }); + + // NF-N3: a blank `oauthClientSecret` (the + // `process.env.MY_SECRET || ''` shape) should route to U2M, not + // to the M2M arm with an "empty secret" rejection. M2M's error + // message would never mention U2M, leaving the user stuck. + it('routes blank oauthClientSecret to U2M (not to an M2M-blank-secret rejection)', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientSecret: '', + }; + + const native = buildSeaConnectionOptions(opts); + expect(native.authMode).to.equal('OAuthU2m'); + }); + + it('routes whitespace-only oauthClientSecret to U2M too', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientSecret: ' \t ', + }; + + const native = buildSeaConnectionOptions(opts); + expect(native.authMode).to.equal('OAuthU2m'); + }); + + it('routes literal-"undefined" oauthClientSecret to U2M too', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientSecret: 'undefined', + }; + + const native = buildSeaConnectionOptions(opts); + expect(native.authMode).to.equal('OAuthU2m'); + }); + }); + + describe('explicit-undefined vs missing for Azure-direct discriminants', () => { + it('accepts explicit `azureTenantId: undefined` on M2M (treated as not-set)', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: 'dose-fake-secret', + azureTenantId: undefined, + }; + + const native = buildSeaConnectionOptions(opts); + expect(native.authMode).to.equal('OAuthM2m'); + }); + + it('accepts `useDatabricksOAuthInAzure: false` on M2M (only `=== true` rejects)', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: 'dose-fake-secret', + useDatabricksOAuthInAzure: false, + }; + + const native = buildSeaConnectionOptions(opts); + expect(native.authMode).to.equal('OAuthM2m'); + }); + + it('accepts explicit `azureTenantId: undefined` on U2M too', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + azureTenantId: undefined, + }; + + const native = buildSeaConnectionOptions(opts); + expect(native.authMode).to.equal('OAuthU2m'); + }); + }); +}); + +describe('SeaBackend — kernel error envelope decoding (DA-F1)', () => { + /** + * Build a fake binding whose `openSession` rejects with the verbatim + * `__databricks_error__:{...}` envelope shape the napi binding's + * `napi_err_from_kernel` produces. Used to exercise + * `decodeNapiKernelError` end-to-end without compiling the native + * module. + */ + function bindingRejectingWith(envelopeJson: string) { + const { binding } = makeFakeBinding(); + binding.openSession = (async () => { + throw new Error(`__databricks_error__:${envelopeJson}`); + }) as typeof binding.openSession; + return binding; + } + + const validConnectArgs: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + token: 'dapi-fake-pat', + }; + + it('maps Unauthenticated kernel envelope → AuthenticationError with kernel message preserved', async () => { + const binding = bindingRejectingWith( + '{"code":"Unauthenticated","message":"OAuth M2M token exchange failed: invalid_client"}', + ); + const backend = new SeaBackend({ nativeBinding: binding }); + await backend.connect(validConnectArgs); + + let caught: unknown; + try { + await backend.openSession({}); + } catch (e) { + caught = e; + } + expect(caught).to.be.instanceOf(AuthenticationError); + expect((caught as Error).message).to.match(/invalid_client/); + }); + + it('maps NetworkError kernel envelope → HiveDriverError with kernel message preserved', async () => { + const binding = bindingRejectingWith( + '{"code":"NetworkError","message":"OIDC discovery failed: connection refused"}', + ); + const backend = new SeaBackend({ nativeBinding: binding }); + await backend.connect(validConnectArgs); + + let caught: unknown; + try { + await backend.openSession({}); + } catch (e) { + caught = e; + } + expect(caught).to.be.instanceOf(HiveDriverError); + expect((caught as Error).message).to.match(/OIDC discovery failed/); + }); + + it('preserves SQLSTATE on the decoded error when present', async () => { + const binding = bindingRejectingWith( + '{"code":"Unauthenticated","message":"forbidden","sqlState":"28000"}', + ); + const backend = new SeaBackend({ nativeBinding: binding }); + await backend.connect(validConnectArgs); + + let caught: unknown; + try { + await backend.openSession({}); + } catch (e) { + caught = e; + } + expect(caught).to.be.instanceOf(AuthenticationError); + expect((caught as { sqlState?: string }).sqlState).to.equal('28000'); + }); + + it('passes through plain napi errors (no sentinel) unchanged', async () => { + const { binding } = makeFakeBinding(); + binding.openSession = (async () => { + throw new Error('openSession: `token` is required for the requested auth mode'); + }) as typeof binding.openSession; + const backend = new SeaBackend({ nativeBinding: binding }); + await backend.connect(validConnectArgs); + + let caught: unknown; + try { + await backend.openSession({}); + } catch (e) { + caught = e; + } + expect(caught).to.be.instanceOf(Error); + expect((caught as Error).message).to.match(/`token` is required/); + }); + + it('falls back to original Error for a corrupted envelope, stripping the internal sentinel', async () => { + const binding = bindingRejectingWith('not valid json'); + const backend = new SeaBackend({ nativeBinding: binding }); + await backend.connect(validConnectArgs); + + let caught: unknown; + try { + await backend.openSession({}); + } catch (e) { + caught = e; + } + // Corrupted envelopes should NOT silently disappear — we return + // the original Error so the operator sees the raw payload. + expect(caught).to.be.instanceOf(Error); + expect((caught as Error).message).to.contain('not valid json'); + // Round-4 NF3-3: the `__databricks_error__:` prefix is an internal + // JS<->binding framing marker; it must not leak to the user-facing + // message even on the corrupted-envelope fallback path. + expect((caught as Error).message).to.not.match(/^__databricks_error__:/); + expect((caught as Error).message).to.equal('not valid json'); + }); + + // NF-4 / NF-N1: preserve the 5 optional kernel envelope fields on the + // decoded JS error under a single `kernelMetadata` namespace. + // Namespaced to avoid the collision with `OperationStateError.errorCode` + // and `RetryError.errorCode` (both pre-existing enum fields switched + // on at `DBSQLOperation.ts:209`). + it('preserves errorCode + vendorCode + httpStatus + retryable + queryId under kernelMetadata namespace', async () => { + const binding = bindingRejectingWith( + '{"code":"Unavailable","message":"upstream timed out",' + + '"sqlState":"08006","errorCode":"UPSTREAM_TIMEOUT","vendorCode":1234,' + + '"httpStatus":503,"retryable":true,"queryId":"query-abc-123"}', + ); + const backend = new SeaBackend({ nativeBinding: binding }); + await backend.connect(validConnectArgs); + + let caught: unknown; + try { + await backend.openSession({}); + } catch (e) { + caught = e; + } + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const err = caught as any; + expect(err.sqlState).to.equal('08006'); + expect(err.kernelMetadata).to.deep.equal({ + errorCode: 'UPSTREAM_TIMEOUT', + vendorCode: 1234, + httpStatus: 503, + retryable: true, + queryId: 'query-abc-123', + }); + }); + + it('keeps sqlState and kernelMetadata non-enumerable (matches Node `.code` pattern)', async () => { + const binding = bindingRejectingWith( + '{"code":"NetworkError","message":"x","sqlState":"08000","httpStatus":502}', + ); + const backend = new SeaBackend({ nativeBinding: binding }); + await backend.connect(validConnectArgs); + + let caught: unknown; + try { + await backend.openSession({}); + } catch (e) { + caught = e; + } + expect(Object.keys(caught as object)).to.not.include('sqlState'); + expect(Object.keys(caught as object)).to.not.include('kernelMetadata'); + // But direct access still works. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const err = caught as any; + expect(err.sqlState).to.equal('08000'); + expect(err.kernelMetadata?.httpStatus).to.equal(502); + }); + + // NF-N1: namespace must NOT clobber a pre-existing `errorCode` enum + // field on OperationStateError / RetryError. Cancelled envelopes map + // to OperationStateError(Canceled), and DBSQLOperation.ts:209 switches + // on `err.errorCode === OperationStateErrorCode.Canceled` — that must + // continue to read the enum 'CANCELED', not the kernel's textual + // errorCode. + it('does not clobber OperationStateError.errorCode enum when kernel envelope sends a textual errorCode', async () => { + const binding = bindingRejectingWith( + '{"code":"Cancelled","message":"user-cancel","errorCode":"USER_REQUESTED_CANCEL"}', + ); + const backend = new SeaBackend({ nativeBinding: binding }); + await backend.connect(validConnectArgs); + + let caught: unknown; + try { + await backend.openSession({}); + } catch (e) { + caught = e; + } + // The enum-typed top-level errorCode is untouched (still the + // CANCELED enum string from OperationStateError's constructor). + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const err = caught as any; + expect(err.errorCode).to.equal('CANCELED'); + // The kernel's textual errorCode survives under the namespace. + expect(err.kernelMetadata?.errorCode).to.equal('USER_REQUESTED_CANCEL'); + }); + + // NF-N4: per-field type guards. If the kernel sends a wrong-typed + // field (e.g. `retryable: "true"` string instead of `true` boolean), + // the decoder should drop that field rather than propagate the + // wrong type. + it('drops envelope fields with the wrong runtime type instead of passing them through', async () => { + // errorCode wrong-type (number instead of string), vendorCode + // wrong-type (string instead of number), httpStatus correct, + // retryable wrong-type (string instead of boolean), queryId null. + // Only httpStatus should survive the type-guard. + const binding = bindingRejectingWith( + '{"code":"NetworkError","message":"x","errorCode":42,"vendorCode":"not-a-number","httpStatus":502,"retryable":"true","queryId":null}', + ); + const backend = new SeaBackend({ nativeBinding: binding }); + await backend.connect(validConnectArgs); + + let caught: unknown; + try { + await backend.openSession({}); + } catch (e) { + caught = e; + } + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const err = caught as any; + // Only the well-typed httpStatus survives. + expect(err.kernelMetadata).to.deep.equal({ httpStatus: 502 }); + }); + + it('omits the kernelMetadata namespace entirely when no envelope fields survive validation', async () => { + // A minimal envelope (just code + message + sqlState) yields an + // empty metadata object — and we should NOT attach a `{}`-shaped + // namespace because that's pure noise. The sqlState top-level + // field is unaffected. + const binding = bindingRejectingWith( + '{"code":"Internal","message":"x","sqlState":"08001"}', + ); + const backend = new SeaBackend({ nativeBinding: binding }); + await backend.connect(validConnectArgs); + + let caught: unknown; + try { + await backend.openSession({}); + } catch (e) { + caught = e; + } + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const err = caught as any; + expect(err.sqlState).to.equal('08001'); + expect(err.kernelMetadata).to.equal(undefined); + }); + + // NF-1: SeaSessionBackend.close() must wrap the napi call too. + it('SeaSessionBackend.close() decodes kernel-error envelopes from native.close()', async () => { + const { binding } = makeFakeBinding(); + // Make openSession return a fake Connection whose close() throws + // a kernel-shaped envelope. + const failingClose = { + async executeStatement() { + throw new Error('unused'); + }, + async close() { + throw new Error( + '__databricks_error__:{"code":"Internal","message":"server-side close failed"}', + ); + }, + }; + binding.openSession = (async () => failingClose as unknown) as typeof binding.openSession; + + const backend = new SeaBackend({ nativeBinding: binding }); + await backend.connect(validConnectArgs); + const session = await backend.openSession({}); + + let caught: unknown; + try { + await session.close(); + } catch (e) { + caught = e; + } + // Before the NF-1 fix, this would surface as a raw Error whose + // message starts with `__databricks_error__:`. After the fix, the + // sentinel is stripped and the typed class is dispatched. + expect(caught).to.be.instanceOf(HiveDriverError); + expect((caught as Error).message).to.equal('server-side close failed'); + expect((caught as Error).message).to.not.contain('__databricks_error__'); + }); +}); diff --git a/tests/unit/sea/auth-m2m.test.ts b/tests/unit/sea/auth-m2m.test.ts new file mode 100644 index 00000000..0a38ebc5 --- /dev/null +++ b/tests/unit/sea/auth-m2m.test.ts @@ -0,0 +1,211 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import SeaBackend from '../../../lib/sea/SeaBackend'; +import { buildSeaConnectionOptions } from '../../../lib/sea/SeaAuth'; +import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; +import AuthenticationError from '../../../lib/errors/AuthenticationError'; +import HiveDriverError from '../../../lib/errors/HiveDriverError'; +import { makeFakeBinding } from './_helpers/fakeBinding'; + +describe('SeaAuth + SeaBackend — OAuth M2M auth flow', () => { + describe('buildSeaConnectionOptions', () => { + it('accepts databricks-oauth + oauthClientId + oauthClientSecret', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: 'dose-fake-secret', + }; + + const native = buildSeaConnectionOptions(opts); + expect(native).to.deep.equal({ + hostName: 'example.cloud.databricks.com', + httpPath: '/sql/1.0/warehouses/abc', + authMode: 'OAuthM2m', + oauthClientId: 'client-uuid', + oauthClientSecret: 'dose-fake-secret', + }); + }); + + it('prepends `/` to the path on the M2M branch too', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: 'sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: 'dose-fake-secret', + }; + + const native = buildSeaConnectionOptions(opts); + expect(native.httpPath).to.equal('/sql/1.0/warehouses/abc'); + }); + + it('rejects missing oauthClientId with AuthenticationError', () => { + const opts = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientSecret: 'dose-fake-secret', + } as unknown as ConnectionOptions; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + AuthenticationError, + /oauthClientId.*required/, + ); + }); + + it('rejects empty oauthClientId with AuthenticationError', () => { + const opts = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: '', + oauthClientSecret: 'dose-fake-secret', + } as unknown as ConnectionOptions; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + AuthenticationError, + /oauthClientId.*required/, + ); + }); + + it('rejects empty oauthClientSecret with AuthenticationError when oauthClientId is set (M2M intent)', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: '', + }; + + // Presence of `oauthClientId` signals M2M intent; an empty secret + // is a typo/missing-env, not a request to fall back to U2M. + // Surface the M2M "secret required" error so the user knows the + // real problem instead of getting routed to a different flow. + expect(() => buildSeaConnectionOptions(opts)).to.throw( + AuthenticationError, + /oauthClientSecret.*non-empty.*OAuth M2M/, + ); + }); + + it('rejects azureTenantId with a clear Entra-direct-out-of-scope error', () => { + const opts: ConnectionOptions = { + host: 'adb-12345.0.azuredatabricks.net', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: 'dose-fake-secret', + azureTenantId: 'tenant-uuid', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /Azure-direct OAuth.*is not supported/, + ); + }); + + it('rejects useDatabricksOAuthInAzure with the same Entra-direct error', () => { + const opts: ConnectionOptions = { + host: 'adb-12345.0.azuredatabricks.net', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: 'dose-fake-secret', + useDatabricksOAuthInAzure: true, + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /Azure-direct OAuth.*is not supported/, + ); + }); + + it('rejects a `persistence` hook on M2M (no cache needed)', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: 'dose-fake-secret', + persistence: { + read: async () => undefined, + persist: async () => undefined, + }, + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /`persistence` is not supported on OAuth M2M/, + ); + }); + }); + + describe('SeaBackend.connect + openSession (M2M)', () => { + it('round-trips M2M options through to the napi binding', async () => { + const { binding, calls } = makeFakeBinding(); + const backend = new SeaBackend({ nativeBinding: binding }); + + await backend.connect({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: 'dose-fake-secret', + }); + + const session = await backend.openSession({}); + // Post-integration: SeaSessionBackend generates UUIDv4 ids; the + // earlier auth-only counter-id scheme was superseded. + expect(session.id).to.match( + /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i, + ); + + expect(calls).to.have.lengthOf(1); + expect(calls[0].method).to.equal('openSession'); + expect(calls[0].args[0]).to.deep.equal({ + hostName: 'example.cloud.databricks.com', + httpPath: '/sql/1.0/warehouses/abc', + authMode: 'OAuthM2m', + oauthClientId: 'client-uuid', + oauthClientSecret: 'dose-fake-secret', + }); + + await session.close(); + await backend.close(); + }); + + it('rejects connect() for missing oauthClientId before touching the binding', async () => { + const { binding, calls } = makeFakeBinding(); + const backend = new SeaBackend({ nativeBinding: binding }); + + let caught: unknown; + try { + await backend.connect({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + // eslint-disable-next-line @typescript-eslint/no-explicit-any + oauthClientSecret: 'dose-fake-secret', + } as any); + } catch (e) { + caught = e; + } + expect(caught).to.be.instanceOf(AuthenticationError); + expect(calls).to.have.lengthOf(0); + }); + }); +}); diff --git a/tests/unit/sea/auth-pat.test.ts b/tests/unit/sea/auth-pat.test.ts index 21d5d629..bdd024f7 100644 --- a/tests/unit/sea/auth-pat.test.ts +++ b/tests/unit/sea/auth-pat.test.ts @@ -31,6 +31,7 @@ describe('SeaAuth — PAT auth options builder', () => { expect(native).to.deep.equal({ hostName: 'example.cloud.databricks.com', httpPath: '/sql/1.0/warehouses/abc', + authMode: 'Pat', token: 'dapi-fake-pat', }); }); @@ -44,7 +45,10 @@ describe('SeaAuth — PAT auth options builder', () => { }; const native = buildSeaConnectionOptions(opts); - expect(native.token).to.equal('dapi-fake-pat'); + expect(native.authMode).to.equal('Pat'); + if (native.authMode === 'Pat') { + expect(native.token).to.equal('dapi-fake-pat'); + } }); it('prepends `/` to a path missing the leading slash', () => { @@ -79,31 +83,30 @@ describe('SeaAuth — PAT auth options builder', () => { expect(() => buildSeaConnectionOptions(opts)).to.throw(AuthenticationError, /non-empty PAT/); }); - it('rejects OAuth with a clear M0-scope error', () => { + it('accepts databricks-oauth without oauthClientSecret as the U2M happy path', () => { const opts: ConnectionOptions = { host: 'example.cloud.databricks.com', path: '/sql/1.0/warehouses/abc', authType: 'databricks-oauth', }; - expect(() => buildSeaConnectionOptions(opts)).to.throw( - HiveDriverError, - /M0\) supports only PAT.*databricks-oauth.*M1/, - ); + const native = buildSeaConnectionOptions(opts); + expect(native.authMode).to.equal('OAuthU2m'); }); - it('rejects token-provider with a clear M0-scope error', () => { + it('rejects token-provider with a clear unsupported-mode error', () => { const opts: ConnectionOptions = { host: 'example.cloud.databricks.com', path: '/sql/1.0/warehouses/abc', authType: 'token-provider', - tokenProvider: { getToken: async () => 'tok' } as unknown as ConnectionOptions extends infer T - ? // eslint-disable-next-line @typescript-eslint/no-explicit-any - any - : never, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + tokenProvider: { getToken: async () => 'tok' } as any, }; - expect(() => buildSeaConnectionOptions(opts)).to.throw(HiveDriverError, /token-provider.*M1/); + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /unsupported auth mode 'token-provider'/, + ); }); it('rejects external-token, static-token, and custom auth modes', () => { @@ -115,7 +118,10 @@ describe('SeaAuth — PAT auth options builder', () => { path: '/p', authType, } as any; - expect(() => buildSeaConnectionOptions(opts)).to.throw(HiveDriverError, /M0\) supports only PAT/); + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /unsupported auth mode/, + ); } }); }); @@ -124,4 +130,7 @@ describe('SeaAuth — PAT auth options builder', () => { // moved to tests/unit/sea/execution.test.ts during the sea-integration // merge (the execution branch's SeaBackend constructor signature // {context, nativeBinding} supersedes the auth-only (binding) shape). + // OAuth-specific flow-dispatch tests live in auth-m2m.test.ts and + // auth-u2m.test.ts; M2M end-to-end against a live workspace lives in + // tests/integration/sea/auth-m2m-e2e.test.ts. }); diff --git a/tests/unit/sea/auth-u2m.test.ts b/tests/unit/sea/auth-u2m.test.ts new file mode 100644 index 00000000..e18109fa --- /dev/null +++ b/tests/unit/sea/auth-u2m.test.ts @@ -0,0 +1,154 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import SeaBackend from '../../../lib/sea/SeaBackend'; +import { buildSeaConnectionOptions } from '../../../lib/sea/SeaAuth'; +import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; +import AuthenticationError from '../../../lib/errors/AuthenticationError'; +import HiveDriverError from '../../../lib/errors/HiveDriverError'; +import { makeFakeBinding } from './_helpers/fakeBinding'; + +describe('SeaAuth + SeaBackend — OAuth U2M auth flow', () => { + describe('buildSeaConnectionOptions', () => { + it('accepts databricks-oauth with no clientSecret as the U2M happy path (hardcoded port 8030)', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + }; + + const native = buildSeaConnectionOptions(opts); + expect(native).to.deep.equal({ + hostName: 'example.cloud.databricks.com', + httpPath: '/sql/1.0/warehouses/abc', + authMode: 'OAuthU2m', + oauthRedirectPort: 8030, + }); + }); + + it('rejects oauthClientId without oauthClientSecret as M2M-with-missing-secret', () => { + // Round-4 NF3-2: presence of `oauthClientId` signals M2M intent. + // Routing now keys off the id (the "do I have an id?" signal), + // not the secret. A caller who supplies id but no secret gets the + // M2M "secret is required" error — the actionable message for the + // real problem (typo'd env var, forgot to export it, etc.). + // + // The U2M arm still has a defense-in-depth rejection of a stray + // `oauthClientId` (the kernel hardcodes `databricks-cli` for U2M); + // see [NF-2 / round-1 history]. That defense fires only when + // BOTH id and secret are blank — the M2M arm's stricter checks + // catch this typical caller-error shape first. + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'custom-client', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + AuthenticationError, + /oauthClientSecret.*non-empty.*OAuth M2M/, + ); + }); + + it('prepends `/` to the path on the U2M branch too', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: 'sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + }; + + const native = buildSeaConnectionOptions(opts); + expect(native.httpPath).to.equal('/sql/1.0/warehouses/abc'); + }); + + it('rejects azureTenantId on the U2M path with the Entra-direct error', () => { + const opts: ConnectionOptions = { + host: 'adb-12345.0.azuredatabricks.net', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + azureTenantId: 'tenant-uuid', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /Azure-direct OAuth.*is not supported/, + ); + }); + + it('rejects useDatabricksOAuthInAzure on the U2M path', () => { + const opts: ConnectionOptions = { + host: 'adb-12345.0.azuredatabricks.net', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + useDatabricksOAuthInAzure: true, + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /Azure-direct OAuth.*is not supported/, + ); + }); + + it('rejects a `persistence` hook on U2M citing the AuthConfig::External kernel-plumbing gap', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + persistence: { + read: async () => undefined, + persist: async () => undefined, + }, + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /AuthConfig::External.*plumbing/, + ); + }); + }); + + describe('SeaBackend.connect + openSession (U2M)', () => { + it('round-trips U2M options through to the napi binding', async () => { + const { binding, calls } = makeFakeBinding(); + const backend = new SeaBackend({ nativeBinding: binding }); + + await backend.connect({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + }); + + const session = await backend.openSession({}); + // Post-integration: SeaSessionBackend generates UUIDv4 ids; the + // earlier auth-only counter-id scheme was superseded. + expect(session.id).to.match( + /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i, + ); + + expect(calls).to.have.lengthOf(1); + expect(calls[0].method).to.equal('openSession'); + expect(calls[0].args[0]).to.deep.equal({ + hostName: 'example.cloud.databricks.com', + httpPath: '/sql/1.0/warehouses/abc', + authMode: 'OAuthU2m', + oauthRedirectPort: 8030, + }); + + await session.close(); + await backend.close(); + }); + }); +}); diff --git a/tests/unit/sea/execution.test.ts b/tests/unit/sea/execution.test.ts index 88d7da23..2981fd3f 100644 --- a/tests/unit/sea/execution.test.ts +++ b/tests/unit/sea/execution.test.ts @@ -21,7 +21,6 @@ import { SeaNativeBinding, SeaNativeConnection, SeaNativeStatement, - SeaExecuteOptions, } from '../../../lib/sea/SeaNativeLoader'; import IClientContext, { ClientConfig } from '../../../lib/contracts/IClientContext'; import IDBSQLLogger, { LogLevel } from '../../../lib/contracts/IDBSQLLogger'; @@ -61,18 +60,15 @@ class FakeNativeConnection implements SeaNativeConnection { public lastSql?: string; - public lastOptions?: SeaExecuteOptions; - public throwOnExecute: Error | null = null; public statementToReturn: FakeNativeStatement = new FakeNativeStatement(); - public async executeStatement(sql: string, options: SeaExecuteOptions): Promise { + public async executeStatement(sql: string): Promise { if (this.throwOnExecute) { throw this.throwOnExecute; } this.lastSql = sql; - this.lastOptions = options; return this.statementToReturn; } @@ -136,7 +132,13 @@ describe('SeaBackend', () => { expect(binding.openSessionStub.called).to.equal(false); }); - it('connect() rejects non-PAT auth (M0 PAT-only)', async () => { + // sea-auth-u2m: `databricks-oauth` with no id/secret is now the U2M happy + // path (M0 was PAT-only, but the OAuth M2M+U2M feature on sea-auth-u2m + // accepts the full set of `databricks-oauth` variants). M2M/U2M flow- + // dispatch coverage lives in auth-m2m.test.ts / auth-u2m.test.ts; + // out-of-scope auth modes are now whatever neither PAT nor + // `databricks-oauth` covers (e.g. `token-provider`, `external-token`). + it('connect() rejects unsupported auth modes (non-PAT, non-OAuth)', async () => { const connection = new FakeNativeConnection(); const binding = makeBinding(connection); const backend = new SeaBackend({ context: makeContext(), nativeBinding: binding }); @@ -146,13 +148,14 @@ describe('SeaBackend', () => { await backend.connect({ host: 'example.databricks.com', path: '/sql/1.0/warehouses/abc', - authType: 'databricks-oauth', - } as ConnectionOptions); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + authType: 'token-provider', + } as any); } catch (err) { thrown = err; } expect(thrown).to.be.instanceOf(HiveDriverError); - expect((thrown as Error).message).to.match(/access-token/); + expect((thrown as Error).message).to.match(/unsupported auth mode/); }); it('connect() rejects missing token', async () => { @@ -207,9 +210,12 @@ describe('SeaBackend', () => { expect(binding.openSessionStub.calledOnce).to.equal(true); const args = binding.openSessionStub.firstCall.args[0]; + // sea-auth-u2m introduced the discriminated SeaNativeConnectionOptions + // shape with a leading `authMode` tag — `'Pat'` for the PAT branch. expect(args).to.deep.equal({ hostName: 'workspace.example', httpPath: '/sql/1.0/warehouses/xyz', + authMode: 'Pat', token: 'dapi-token', }); }); @@ -230,7 +236,7 @@ describe('SeaBackend', () => { expect(sessionBackend.id).to.be.a('string').and.have.length.greaterThan(0); }); - it('openSession() propagates initialCatalog / initialSchema / sessionConfig through to executeStatement', async () => { + it('openSession() forwards initialCatalog / initialSchema / configuration to the napi openSession call (not per-statement)', async () => { const connection = new FakeNativeConnection(); const binding = makeBinding(connection); const backend = new SeaBackend({ context: makeContext(), nativeBinding: binding }); @@ -247,14 +253,22 @@ describe('SeaBackend', () => { configuration: { 'spark.sql.execution.arrow.enabled': 'true' }, }); - await session.executeStatement('SELECT 1', {}); + // The defaults reach the kernel via `Session::builder().defaults()` + + // `.session_conf()`, applied on `CreateSession`. Assert they were + // folded into the napi `openSession` arg. + expect(binding.openSessionStub.calledOnce).to.equal(true); + expect(binding.openSessionStub.firstCall.args[0]).to.deep.include({ + authMode: 'Pat', + token: 't', + catalog: 'main', + schema: 'default', + sessionConf: { 'spark.sql.execution.arrow.enabled': 'true' }, + }); + // And the SQL still threads through executeStatement (now with no + // per-statement options). + await session.executeStatement('SELECT 1', {}); expect(connection.lastSql).to.equal('SELECT 1'); - expect(connection.lastOptions).to.deep.equal({ - initialCatalog: 'main', - initialSchema: 'default', - sessionConfig: { 'spark.sql.execution.arrow.enabled': 'true' }, - }); }); it('close() clears connection state without throwing', async () => { @@ -275,8 +289,8 @@ describe('SeaBackend', () => { }); describe('SeaSessionBackend', () => { - function makeSession(connection: SeaNativeConnection, defaults = {}) { - return new SeaSessionBackend({ connection, context: makeContext(), defaults }); + function makeSession(connection: SeaNativeConnection) { + return new SeaSessionBackend({ connection, context: makeContext() }); } it('executeStatement passes sql through verbatim', async () => { @@ -294,21 +308,6 @@ describe('SeaSessionBackend', () => { expect(op.id).to.be.a('string').and.have.length.greaterThan(0); }); - it('executeStatement merges session defaults into ExecuteOptions', async () => { - const connection = new FakeNativeConnection(); - const session = makeSession(connection, { - initialCatalog: 'main', - initialSchema: 'default', - sessionConfig: { foo: 'bar' }, - }); - await session.executeStatement('SELECT 1', {}); - expect(connection.lastOptions).to.deep.equal({ - initialCatalog: 'main', - initialSchema: 'default', - sessionConfig: { foo: 'bar' }, - }); - }); - it('executeStatement rejects namedParameters (M1)', async () => { const connection = new FakeNativeConnection(); const session = makeSession(connection);