diff --git a/cmd/root/root.go b/cmd/root/root.go index 6b6de2a9baa..fc7b583c3c2 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -79,6 +79,7 @@ func New(ctx context.Context) *cobra.Command { ctx = withCommandExecIdInUserAgent(ctx) ctx = withUpstreamInUserAgent(ctx) ctx = withInteractiveModeInUserAgent(ctx) + ctx = withHostInUserAgent(ctx) ctx = InjectTestPidToUserAgent(ctx) cmd.SetContext(ctx) return nil @@ -185,6 +186,7 @@ Stack Trace: Command: commandStr, OperatingSystem: runtime.GOOS, DbrVersion: dbr.RuntimeVersion(ctx).String(), + Host: string(cmdio.DetectHost(ctx)), ExecutionTimeMs: time.Since(startTime).Milliseconds(), ExitCode: int64(exitCode), }) diff --git a/cmd/root/user_agent_host.go b/cmd/root/user_agent_host.go new file mode 100644 index 00000000000..2024dffc7c8 --- /dev/null +++ b/cmd/root/user_agent_host.go @@ -0,0 +1,24 @@ +// This file integrates terminal/IDE host detection with the user agent string. +// +// The detection logic is in libs/cmdio. This file retrieves the host from +// the context and adds it to the user agent. +// +// Example user agent strings: +// - "cli/X.Y.Z ... host/vscode ..." +// - "cli/X.Y.Z ... host/cursor ..." +// - "cli/X.Y.Z ... host/unknown ..." +package root + +import ( + "context" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go/useragent" +) + +// Key in the user agent. +const hostKey = "host" + +func withHostInUserAgent(ctx context.Context) context.Context { + return useragent.InContext(ctx, hostKey, string(cmdio.DetectHost(ctx))) +} diff --git a/cmd/root/user_agent_host_test.go b/cmd/root/user_agent_host_test.go new file mode 100644 index 00000000000..d639baac83b --- /dev/null +++ b/cmd/root/user_agent_host_test.go @@ -0,0 +1,34 @@ +package root + +import ( + "testing" + + "github.com/databricks/databricks-sdk-go/useragent" + "github.com/stretchr/testify/assert" +) + +// hostEnvKeys mirrors the env vars read by cmdio.DetectHost. Tests clear them +// so the developer's shell environment cannot bleed into assertions. +var hostEnvKeys = []string{ + "TERM_PROGRAM", + "TERMINAL_EMULATOR", +} + +func clearHostEnv(t *testing.T) { + for _, k := range hostEnvKeys { + t.Setenv(k, "") + } +} + +func TestHostInUserAgent_Unknown(t *testing.T) { + clearHostEnv(t) + ctx := withHostInUserAgent(t.Context()) + assert.Contains(t, useragent.FromContext(ctx), "host/unknown") +} + +func TestHostInUserAgent_VSCode(t *testing.T) { + clearHostEnv(t) + t.Setenv("TERM_PROGRAM", "vscode") + ctx := withHostInUserAgent(t.Context()) + assert.Contains(t, useragent.FromContext(ctx), "host/vscode") +} diff --git a/libs/cmdio/host.go b/libs/cmdio/host.go new file mode 100644 index 00000000000..b91e90d79f0 --- /dev/null +++ b/libs/cmdio/host.go @@ -0,0 +1,68 @@ +package cmdio + +import ( + "context" + + "github.com/databricks/cli/libs/env" +) + +// Host describes the terminal or IDE the CLI is being invoked from. +// Values are an enum, never raw env values, so they are safe to log. +type Host string + +const ( + // HostVSCode covers TERM_PROGRAM=vscode, which is set by vanilla VSCode + // and every fork that inherits its terminal integration (Cursor, Windsurf, + // code-server, etc.). The forks don't expose a stable, trustworthy + // discriminator in env, so we deliberately don't try to split them apart. + HostVSCode Host = "vscode" + + HostJetBrains Host = "jetbrains" + HostAppleTerminal Host = "apple-terminal" + HostITerm Host = "iterm" + HostWarp Host = "warp" + HostWezTerm Host = "wezterm" + HostGhostty Host = "ghostty" + HostUnknown Host = "unknown" +) + +const ( + envTermProgram = "TERM_PROGRAM" + envTerminalEmulator = "TERMINAL_EMULATOR" +) + +// DetectHost returns the terminal or IDE host the CLI is being run from, +// derived from environment variables only. +// +// Only detections backed by direct observation or upstream documentation +// are included. Anything we can't verify (Windsurf vs. Cursor split, Zed, +// Hyper, Tabby, etc.) falls into HostUnknown until we see real evidence. +// +// Whether a user has a particular extension or AI agent active (Copilot, +// Claude Code, Cursor Agent, etc.) is intentionally not modelled here. +// That's an independent dimension, so a downstream query can ask "vscode +// users without Copilot" by joining the two signals. +func DetectHost(ctx context.Context) Host { + switch env.Get(ctx, envTermProgram) { + case "vscode": + return HostVSCode + case "Apple_Terminal": + return HostAppleTerminal + case "iTerm.app": + return HostITerm + case "WarpTerminal": + return HostWarp + case "WezTerm": + return HostWezTerm + case "ghostty": + return HostGhostty + } + + // JediTerm is JetBrains' terminal library; sets TERMINAL_EMULATOR + // per https://github.com/JetBrains/jediterm/issues/253. + if env.Get(ctx, envTerminalEmulator) == "JetBrains-JediTerm" { + return HostJetBrains + } + + return HostUnknown +} diff --git a/libs/cmdio/host_test.go b/libs/cmdio/host_test.go new file mode 100644 index 00000000000..2c3a05111a7 --- /dev/null +++ b/libs/cmdio/host_test.go @@ -0,0 +1,85 @@ +package cmdio + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// envKeysToIsolate lists every environment variable read by DetectHost. +// Tests clear all of them at the start so process env from the developer's +// shell (e.g. TERM_PROGRAM=iTerm.app on a macOS dev machine) cannot leak in. +var envKeysToIsolate = []string{ + envTermProgram, + envTerminalEmulator, +} + +func isolateHostEnv(t *testing.T, overrides map[string]string) { + for _, k := range envKeysToIsolate { + t.Setenv(k, "") + } + for k, v := range overrides { + t.Setenv(k, v) + } +} + +func TestDetectHost(t *testing.T) { + tests := []struct { + name string + envs map[string]string + want Host + }{ + { + name: "no env vars", + envs: nil, + want: HostUnknown, + }, + { + name: "vscode and forks all classify as vscode", + envs: map[string]string{"TERM_PROGRAM": "vscode"}, + want: HostVSCode, + }, + { + name: "jetbrains", + envs: map[string]string{"TERMINAL_EMULATOR": "JetBrains-JediTerm"}, + want: HostJetBrains, + }, + { + name: "apple terminal", + envs: map[string]string{"TERM_PROGRAM": "Apple_Terminal"}, + want: HostAppleTerminal, + }, + { + name: "iterm", + envs: map[string]string{"TERM_PROGRAM": "iTerm.app"}, + want: HostITerm, + }, + { + name: "warp", + envs: map[string]string{"TERM_PROGRAM": "WarpTerminal"}, + want: HostWarp, + }, + { + name: "wezterm", + envs: map[string]string{"TERM_PROGRAM": "WezTerm"}, + want: HostWezTerm, + }, + { + name: "ghostty", + envs: map[string]string{"TERM_PROGRAM": "ghostty"}, + want: HostGhostty, + }, + { + name: "unknown TERM_PROGRAM falls through to unknown", + envs: map[string]string{"TERM_PROGRAM": "somethingnew"}, + want: HostUnknown, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + isolateHostEnv(t, tt.envs) + assert.Equal(t, tt.want, DetectHost(t.Context())) + }) + } +} diff --git a/libs/telemetry/protos/databricks_cli_log.go b/libs/telemetry/protos/databricks_cli_log.go index 64baa6b384a..0792ee03032 100644 --- a/libs/telemetry/protos/databricks_cli_log.go +++ b/libs/telemetry/protos/databricks_cli_log.go @@ -23,6 +23,11 @@ type ExecutionContext struct { // If true, the CLI is being run from a Databricks notebook / cluster web terminal. FromWebTerminal bool `json:"from_web_terminal,omitempty"` + // Terminal or IDE the CLI is being run from, detected from environment + // variables (TERM_PROGRAM, TERMINAL_EMULATOR, etc.). Enum value, never a + // raw env value. See libs/cmdio/host.go for the full enum. + Host string `json:"host,omitempty"` + // Time taken for the CLI command to execute. // We want to serialize the zero value as well so the omitempty tag is not set. ExecutionTimeMs int64 `json:"execution_time_ms"`