Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 82 additions & 14 deletions lib/DBSQLClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,9 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I
/**
* Extract the numeric workspace ID for telemetry.
*
* The only reliable carrier in the connection params today is the `?o=N`
* query parameter on `httpPath` — Databricks SQL warehouses are typically
* connected to via paths like `/sql/1.0/warehouses/<id>?o=12345678901234`.
* Two URL shapes carry the workspace ID today:
* - Warehouse, query form: `/sql/1.0/warehouses/<id>?o=<wsId>`
* - All-purpose cluster, path form: `sql/protocolv1/o/<wsId>/<cluster-id>`
*
* Host-based extraction was tried previously but produced confidently-wrong
* values:
Expand All @@ -313,21 +313,84 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I
* Returns `undefined` when no workspace ID can be derived. Server-side
* attribution is better off seeing a missing field than a wrong value.
*/
private extractWorkspaceId(): string | undefined {
const { httpPath } = this;
private static extractWorkspaceId(httpPath: string | undefined): string | undefined {
if (!httpPath) {
return undefined;
}
const queryIdx = httpPath.indexOf('?');
if (queryIdx < 0) {
return undefined;
// Warehouse form: `?o=<digits>` in the query string.
if (queryIdx >= 0) {
const query = httpPath.slice(queryIdx + 1);
// Match `o=<digits>` as the first param, an inner `&o=<digits>`, etc.
// Workspace IDs are decimal integers; reject anything else so a stray
// `o=tenant_42` doesn't ship as a workspace ID.
const queryMatch = query.match(/(?:^|&)o=(\d+)(?:&|$)/);
if (queryMatch) {
return queryMatch[1];
}
}
// All-purpose cluster form: `/o/<digits>/<cluster-id>` as a path segment.
const pathOnly = queryIdx >= 0 ? httpPath.slice(0, queryIdx) : httpPath;
const pathMatch = pathOnly.match(/(?:^|\/)o\/(\d+)(?:\/|$)/);
return pathMatch ? pathMatch[1] : undefined;
}

// Detects an `o=<value>` or `/o/<value>` where `<value>` is present but
// non-numeric, so the caller can warn instead of silently dropping a
// malformed workspace param.
private static hasMalformedOrgParam(httpPath: string | undefined): boolean {
if (!httpPath) {
return false;
}
const queryIdx = httpPath.indexOf('?');
if (queryIdx >= 0) {
const query = httpPath.slice(queryIdx + 1);
const hasOrg = /(?:^|&)o=/.test(query);
const hasNumericOrg = /(?:^|&)o=\d+(?:&|$)/.test(query);
if (hasOrg && !hasNumericOrg) {
return true;
}
}
const query = httpPath.slice(queryIdx + 1);
// Match `o=<digits>` as the first param, an inner `&o=<digits>`, etc.
// Workspace IDs are decimal integers; reject anything else so a stray
// `o=tenant_42` doesn't ship as a workspace ID.
const match = query.match(/(?:^|&)o=(\d+)(?:&|$)/);
return match ? match[1] : undefined;
const pathOnly = queryIdx >= 0 ? httpPath.slice(0, queryIdx) : httpPath;
const hasPathOrg = /(?:^|\/)o\/[^/]+/.test(pathOnly);
const hasNumericPathOrg = /(?:^|\/)o\/\d+(?:\/|$)/.test(pathOnly);
return hasPathOrg && !hasNumericPathOrg;
}

/**
* Build the customHeaders map applied to telemetry POSTs and feature-flag
* GETs (SPOG / Single Panel of Glass support). When `httpPath` carries a
* workspace ID — either as a `?o=<wsId>` query (warehouse) or a
* `/o/<wsId>/<cluster-id>` path segment (all-purpose cluster) — endpoints
* that don't include the workspace in their URL path need it conveyed via
* the `x-databricks-org-id` header instead. A user-supplied value in
* `userHeaders` (case-insensitively keyed) wins over the parsed value.
*
* `httpPath` is passed explicitly (rather than read off `this.httpPath`) so
* the SPOG-routing dependency is visible in the signature — a future
* refactor that reorders connect() can't silently break injection.
*/
private buildCustomHeaders(
httpPath: string | undefined,
userHeaders: Record<string, string> | undefined,
): Record<string, string> | undefined {
const merged: Record<string, string> = { ...(userHeaders ?? {}) };
const hasOrgIdAlready = Object.keys(merged).some((k) => k.toLowerCase() === 'x-databricks-org-id');
if (hasOrgIdAlready) {
this.logger.log(LogLevel.debug, 'SPOG: x-databricks-org-id supplied by caller; not extracting from httpPath');
} else {
const orgId = DBSQLClient.extractWorkspaceId(httpPath);
if (orgId) {
merged['x-databricks-org-id'] = orgId;
this.logger.log(LogLevel.debug, `SPOG: injecting x-databricks-org-id=${orgId} (extracted from httpPath)`);
} else if (DBSQLClient.hasMalformedOrgParam(httpPath)) {
this.logger.log(
LogLevel.warn,
'SPOG: httpPath contains non-numeric workspace ID; x-databricks-org-id not injected',
);
}
}
Comment thread
samikshya-db marked this conversation as resolved.
return Object.keys(merged).length > 0 ? merged : undefined;
}

/**
Expand Down Expand Up @@ -561,6 +624,11 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I
this.config.userAgentEntry = options.userAgentEntry;
}

// SPOG: parse `?o=<workspaceId>` out of httpPath and stash it as
// `x-databricks-org-id` for the telemetry + feature-flag clients, which
// hit endpoints that don't carry the workspace in their URL path.
this.config.customHeaders = this.buildCustomHeaders(options.path, options.customHeaders);

this.authProvider = this.createAuthProvider(options, authProvider);

this.connectionProvider = this.createConnectionProvider(options);
Expand Down Expand Up @@ -677,7 +745,7 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I
safeEmit(this, (emitter) => {
if (!this.host) return;
const latencyMs = Date.now() - startTime;
const workspaceId = this.extractWorkspaceId();
const workspaceId = DBSQLClient.extractWorkspaceId(this.httpPath);
const driverConfig = this.driverConfigShipped ? undefined : this.buildDriverConfiguration();
if (driverConfig) {
this.driverConfigShipped = true;
Expand Down
11 changes: 11 additions & 0 deletions lib/contracts/IClientContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ export interface ClientConfig {
*/
telemetryFlushOnExit?: boolean;
userAgentEntry?: string;

/**
* Extra HTTP headers attached to driver-owned out-of-band requests
* (telemetry, feature flags). Populated by `DBSQLClient.connect()` from
* `ConnectionOptions.customHeaders` plus an `x-databricks-org-id` header
* derived from the `?o=` query parameter on `httpPath` when present, to
* support SPOG (Single Panel of Glass) account-level routing on endpoints
* that don't carry `?o=` in their URL path. NOT applied to Thrift or
* OAuth/OIDC requests.
*/
customHeaders?: Record<string, string>;
}

export default interface IClientContext {
Expand Down
11 changes: 11 additions & 0 deletions lib/contracts/IDBSQLClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ export type ConnectionOptions = {
proxy?: ProxyOptions;
enableMetricViewMetadata?: boolean;

/**
* Extra HTTP headers attached to driver-owned out-of-band requests
* (telemetry POSTs and feature-flag GETs). Not applied to the primary
* Thrift transport or to OAuth/OIDC token requests.
*
* When `path` contains `?o=<workspaceId>` (SPOG account-level routing),
* the driver automatically injects an `x-databricks-org-id` header unless
* one is already present in this map.
*/
customHeaders?: Record<string, string>;

/**
* Whether the driver emits telemetry events (connection / statement /
* cloud-fetch / error). Defaults to `true`.
Expand Down
1 change: 1 addition & 0 deletions lib/telemetry/DatabricksTelemetryExporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ export default class DatabricksTelemetryExporter {
let headers: Record<string, string> = {
'Content-Type': 'application/json',
'User-Agent': userAgent,
...(config.customHeaders ?? {}),
};

if (authenticatedExport) {
Expand Down
1 change: 1 addition & 0 deletions lib/telemetry/FeatureFlagCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ export default class FeatureFlagCache {
const headers: Record<string, string> = {
'Content-Type': 'application/json',
'User-Agent': this.userAgent,
...(this.context.getConfig().customHeaders ?? {}),
...(await this.getAuthHeaders()),
};

Expand Down
6 changes: 5 additions & 1 deletion lib/telemetry/TelemetryClientProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,11 @@ class TelemetryClientProvider {
return;
}

holder.client.unregisterContext(context);
// Skip unregister on the last release so close()'s final flush can still
// resolve auth/connection providers from the FIFO snapshot.
if (holder.refCount > 1) {
holder.client.unregisterContext(context);
}
holder.refCount -= 1;
logger.log(LogLevel.debug, `TelemetryClient reference count for ${host}: ${holder.refCount}`);

Expand Down
145 changes: 132 additions & 13 deletions tests/unit/DBSQLClient.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,18 @@ describe('DBSQLClient.connect', () => {

logSpy.restore();
});

it('populates config.customHeaders with org-id parsed from ?o= (SPOG)', async () => {
const client = new DBSQLClient();
await client.connect({ ...connectOptions, path: '/sql/1.0/warehouses/abc?o=12345678901234' });
expect(client.getConfig().customHeaders).to.deep.equal({ 'x-databricks-org-id': '12345678901234' });
});

it('leaves config.customHeaders undefined when path has no ?o= and none supplied', async () => {
const client = new DBSQLClient();
await client.connect({ ...connectOptions, path: '/sql/1.0/warehouses/abc' });
expect(client.getConfig().customHeaders).to.be.undefined;
});
});

describe('DBSQLClient.openSession', () => {
Expand Down Expand Up @@ -755,33 +767,140 @@ describe('DBSQLClient telemetry paths', () => {

describe('extractWorkspaceId', () => {
it('returns the numeric o= param from httpPath', () => {
const client = new DBSQLClient();
(client as any).httpPath = '/sql/1.0/warehouses/abc?o=12345678901234';
const id = (client as any).extractWorkspaceId();
const id = (DBSQLClient as any).extractWorkspaceId('/sql/1.0/warehouses/abc?o=12345678901234');
expect(id).to.equal('12345678901234');
});

it('returns undefined when no query string', () => {
const client = new DBSQLClient();
(client as any).httpPath = '/sql/1.0/warehouses/abc';
expect((client as any).extractWorkspaceId()).to.be.undefined;
expect((DBSQLClient as any).extractWorkspaceId('/sql/1.0/warehouses/abc')).to.be.undefined;
});

it('returns undefined when o= is not numeric', () => {
const client = new DBSQLClient();
(client as any).httpPath = '/sql/1.0/warehouses/abc?o=tenant_xyz';
expect((client as any).extractWorkspaceId()).to.be.undefined;
expect((DBSQLClient as any).extractWorkspaceId('/sql/1.0/warehouses/abc?o=tenant_xyz')).to.be.undefined;
});

it('handles o= as a non-first param', () => {
const client = new DBSQLClient();
(client as any).httpPath = '/sql/1.0/warehouses/abc?foo=bar&o=42&baz=qux';
expect((client as any).extractWorkspaceId()).to.equal('42');
expect((DBSQLClient as any).extractWorkspaceId('/sql/1.0/warehouses/abc?foo=bar&o=42&baz=qux')).to.equal('42');
});

it('returns undefined when httpPath is unset', () => {
expect((DBSQLClient as any).extractWorkspaceId(undefined)).to.be.undefined;
});

it('returns the numeric workspace id from all-purpose cluster path form', () => {
expect((DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/99999999999999/0101-000000-aaaaaaaa')).to.equal(
'99999999999999',
);
});

it('returns the numeric workspace id from all-purpose cluster path with leading slash', () => {
expect((DBSQLClient as any).extractWorkspaceId('/sql/protocolv1/o/12345/0101-000000-aaaaaaaa')).to.equal('12345');
});

it('returns undefined when all-purpose cluster path has non-numeric workspace segment', () => {
expect((DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/tenant_xyz/0101-000000-aaaaaaaa')).to.be
.undefined;
});

it('prefers ?o= query form over /o/ path form when both are present', () => {
expect((DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/111/cluster?o=222')).to.equal('222');
});
});

describe('buildCustomHeaders (SPOG)', () => {
it('injects x-databricks-org-id from ?o= in httpPath', () => {
const client = new DBSQLClient();
expect((client as any).extractWorkspaceId()).to.be.undefined;
const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=12345678901234', undefined);
expect(headers).to.deep.equal({ 'x-databricks-org-id': '12345678901234' });
});

it('returns undefined when no ?o= and no user-supplied customHeaders', () => {
const client = new DBSQLClient();
const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc', undefined);
expect(headers).to.be.undefined;
});

it('preserves user-supplied customHeaders alongside parsed org-id', () => {
const client = new DBSQLClient();
const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=42', { 'x-trace-id': 'tid-001' });
expect(headers).to.deep.equal({ 'x-trace-id': 'tid-001', 'x-databricks-org-id': '42' });
});

it('user-supplied x-databricks-org-id wins over ?o= parsed value (case-insensitive)', () => {
const client = new DBSQLClient();
const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=42', {
'X-Databricks-Org-Id': '999',
});
expect(headers).to.deep.equal({ 'X-Databricks-Org-Id': '999' });
});

it('does not inject org-id when ?o= value is non-numeric', () => {
const client = new DBSQLClient();
const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=tenant_xyz', undefined);
expect(headers).to.be.undefined;
});

it('injects x-databricks-org-id from all-purpose cluster path form', () => {
const client = new DBSQLClient();
const headers = (client as any).buildCustomHeaders(
'sql/protocolv1/o/99999999999999/0101-000000-aaaaaaaa',
undefined,
);
expect(headers).to.deep.equal({ 'x-databricks-org-id': '99999999999999' });
});

it('logs a warning when workspace ID segment is non-numeric (path form)', () => {
const client = new DBSQLClient();
const logSpy = sinon.spy((client as any).logger, 'log');
try {
(client as any).buildCustomHeaders('sql/protocolv1/o/tenant_xyz/cluster', undefined);
const warnCalls = logSpy.getCalls().filter((c) => c.args[0] === LogLevel.warn);
expect(warnCalls).to.have.lengthOf(1);
expect(warnCalls[0].args[1]).to.match(/non-numeric workspace ID/);
} finally {
logSpy.restore();
}
});

it('logs a warning when ?o= is present but non-numeric', () => {
const client = new DBSQLClient();
const logSpy = sinon.spy((client as any).logger, 'log');
try {
(client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=tenant_xyz', undefined);
const warnCalls = logSpy.getCalls().filter((c) => c.args[0] === LogLevel.warn);
expect(warnCalls).to.have.lengthOf(1);
expect(warnCalls[0].args[1]).to.match(/non-numeric workspace ID/);
} finally {
logSpy.restore();
}
});

it('logs a debug line when injecting org-id from httpPath', () => {
const client = new DBSQLClient();
const logSpy = sinon.spy((client as any).logger, 'log');
try {
(client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=42', undefined);
const injectLog = logSpy
.getCalls()
.find((c) => c.args[0] === LogLevel.debug && /injecting x-databricks-org-id=42/.test(String(c.args[1])));
expect(injectLog, 'expected SPOG inject debug log').to.exist;
} finally {
logSpy.restore();
}
});

it('logs a debug line when caller supplies x-databricks-org-id', () => {
const client = new DBSQLClient();
const logSpy = sinon.spy((client as any).logger, 'log');
try {
(client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=42', { 'x-databricks-org-id': '999' });
const callerLog = logSpy
.getCalls()
.find((c) => c.args[0] === LogLevel.debug && /supplied by caller/.test(String(c.args[1])));
expect(callerLog, 'expected SPOG caller-supplied debug log').to.exist;
} finally {
logSpy.restore();
}
});
});

Expand Down
Loading
Loading