From 932c672f40bab5f88e38855b01041e6ec3f3487f Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 21 May 2026 15:43:06 +0200 Subject: [PATCH] Add env section to bundle scripts for DABs interpolation scripts. now accepts an env: map whose values may reference ${bundle.*}, ${workspace.*}, and ${var.*}. The script content is still passed to the shell as-is (no DABs interpolation), removing the ambiguity between bundle variables and shell variables that previously forced all ${...} usage to be rejected. When an env value references an unsupported prefix (e.g. ${resources.*}), validation reports a clear error pointing at the field. The content-side error now suggests the new env section instead of just stating ${...} is unsupported. Fixes #4179 Co-authored-by: Isaac --- NEXT_CHANGELOG.md | 1 + .../run/scripts/env-bad-prefix/databricks.yml | 14 +++ .../run/scripts/env-bad-prefix/out.test.toml | 3 + .../run/scripts/env-bad-prefix/output.txt | 16 ++++ .../bundle/run/scripts/env-bad-prefix/script | 1 + .../run/scripts/env-bad-prefix/test.toml | 5 ++ .../run/scripts/env-section/databricks.yml | 21 +++++ .../run/scripts/env-section/out.test.toml | 3 + .../bundle/run/scripts/env-section/output.txt | 6 ++ .../bundle/run/scripts/env-section/script | 1 + .../bundle/run/scripts/env-section/test.toml | 5 ++ .../run/scripts/no-interpolation/output.txt | 56 ++++++++---- bundle/config/root.go | 7 ++ bundle/config/validate/scripts.go | 87 +++++++++++++++---- bundle/internal/schema/annotations.yml | 3 + bundle/schema/jsonschema.json | 3 + cmd/bundle/run.go | 13 ++- 17 files changed, 207 insertions(+), 38 deletions(-) create mode 100644 acceptance/bundle/run/scripts/env-bad-prefix/databricks.yml create mode 100644 acceptance/bundle/run/scripts/env-bad-prefix/out.test.toml create mode 100644 acceptance/bundle/run/scripts/env-bad-prefix/output.txt create mode 100644 acceptance/bundle/run/scripts/env-bad-prefix/script create mode 100644 acceptance/bundle/run/scripts/env-bad-prefix/test.toml create mode 100644 acceptance/bundle/run/scripts/env-section/databricks.yml create mode 100644 acceptance/bundle/run/scripts/env-section/out.test.toml create mode 100644 acceptance/bundle/run/scripts/env-section/output.txt create mode 100644 acceptance/bundle/run/scripts/env-section/script create mode 100644 acceptance/bundle/run/scripts/env-section/test.toml diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index f2e569c609c..45363106df2 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -8,3 +8,4 @@ ### Bundles * The error reported when a direct-only resource (catalogs, external locations, vector search endpoints) is used with the terraform engine now also suggests setting `bundle.engine: direct` in `databricks.yml`, in addition to the `DATABRICKS_BUNDLE_ENGINE` environment variable ([#5295](https://github.com/databricks/cli/pull/5295)). +* Added an `env:` section to `scripts.` for declaring environment variables whose values may reference `${bundle.*}`, `${workspace.*}`, and `${var.*}`. Script `content:` continues to be passed to the shell as-is (no DABs interpolation), avoiding ambiguity with shell variables. See issue [#4179](https://github.com/databricks/cli/issues/4179). diff --git a/acceptance/bundle/run/scripts/env-bad-prefix/databricks.yml b/acceptance/bundle/run/scripts/env-bad-prefix/databricks.yml new file mode 100644 index 00000000000..c771fb55473 --- /dev/null +++ b/acceptance/bundle/run/scripts/env-bad-prefix/databricks.yml @@ -0,0 +1,14 @@ +bundle: + name: script-env-bad-prefix + +resources: + jobs: + my_job: + name: my-job + +scripts: + bad: + env: + JOB_ID: ${resources.jobs.my_job.id} + UNKNOWN: ${something.else} + content: echo "$JOB_ID $UNKNOWN" diff --git a/acceptance/bundle/run/scripts/env-bad-prefix/out.test.toml b/acceptance/bundle/run/scripts/env-bad-prefix/out.test.toml new file mode 100644 index 00000000000..f784a183258 --- /dev/null +++ b/acceptance/bundle/run/scripts/env-bad-prefix/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/run/scripts/env-bad-prefix/output.txt b/acceptance/bundle/run/scripts/env-bad-prefix/output.txt new file mode 100644 index 00000000000..239578fe563 --- /dev/null +++ b/acceptance/bundle/run/scripts/env-bad-prefix/output.txt @@ -0,0 +1,16 @@ + +>>> [CLI] bundle validate +Error: ${resources.jobs.my_job.id} cannot be used in scripts.bad.env.JOB_ID; only ${bundle.*}, ${workspace.*}, and ${var.*} are resolved before scripts execute + at scripts.bad.env.JOB_ID + in databricks.yml:12:15 + +Error: ${something.else} cannot be used in scripts.bad.env.UNKNOWN; only ${bundle.*}, ${workspace.*}, and ${var.*} are resolved before scripts execute + at scripts.bad.env.UNKNOWN + in databricks.yml:13:16 + +Name: script-env-bad-prefix +Target: default + +Found 2 errors + +Exit code: 1 diff --git a/acceptance/bundle/run/scripts/env-bad-prefix/script b/acceptance/bundle/run/scripts/env-bad-prefix/script new file mode 100644 index 00000000000..f52b452ee67 --- /dev/null +++ b/acceptance/bundle/run/scripts/env-bad-prefix/script @@ -0,0 +1 @@ +errcode trace $CLI bundle validate diff --git a/acceptance/bundle/run/scripts/env-bad-prefix/test.toml b/acceptance/bundle/run/scripts/env-bad-prefix/test.toml new file mode 100644 index 00000000000..c2bbdaa30cf --- /dev/null +++ b/acceptance/bundle/run/scripts/env-bad-prefix/test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/run/scripts/env-section/databricks.yml b/acceptance/bundle/run/scripts/env-section/databricks.yml new file mode 100644 index 00000000000..648cbd9cf36 --- /dev/null +++ b/acceptance/bundle/run/scripts/env-section/databricks.yml @@ -0,0 +1,21 @@ +bundle: + name: script-env-section + +variables: + region: + default: us-west-2 + database: + default: mydb + +scripts: + show_env: + env: + BUNDLE_NAME: ${bundle.name} + REGION: ${var.region} + DATABASE: ${var.database} + MIXED: "region=${var.region};db=${var.database}" + content: | + echo "bundle=$BUNDLE_NAME" + echo "region=$REGION" + echo "database=$DATABASE" + echo "mixed=$MIXED" diff --git a/acceptance/bundle/run/scripts/env-section/out.test.toml b/acceptance/bundle/run/scripts/env-section/out.test.toml new file mode 100644 index 00000000000..f784a183258 --- /dev/null +++ b/acceptance/bundle/run/scripts/env-section/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/run/scripts/env-section/output.txt b/acceptance/bundle/run/scripts/env-section/output.txt new file mode 100644 index 00000000000..335d9028fbe --- /dev/null +++ b/acceptance/bundle/run/scripts/env-section/output.txt @@ -0,0 +1,6 @@ + +>>> [CLI] bundle run show_env +bundle=script-env-section +region=us-west-2 +database=mydb +mixed=region=us-west-2;db=mydb diff --git a/acceptance/bundle/run/scripts/env-section/script b/acceptance/bundle/run/scripts/env-section/script new file mode 100644 index 00000000000..e25d0638fb7 --- /dev/null +++ b/acceptance/bundle/run/scripts/env-section/script @@ -0,0 +1 @@ +trace $CLI bundle run show_env diff --git a/acceptance/bundle/run/scripts/env-section/test.toml b/acceptance/bundle/run/scripts/env-section/test.toml new file mode 100644 index 00000000000..c2bbdaa30cf --- /dev/null +++ b/acceptance/bundle/run/scripts/env-section/test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/run/scripts/no-interpolation/output.txt b/acceptance/bundle/run/scripts/no-interpolation/output.txt index 45bc397bdb7..d5fe772792c 100644 --- a/acceptance/bundle/run/scripts/no-interpolation/output.txt +++ b/acceptance/bundle/run/scripts/no-interpolation/output.txt @@ -1,40 +1,64 @@ >>> [CLI] bundle deploy -Error: Found ${FOO} in script one. Interpolation syntax ${...} is not allowed in scripts +Error: Found ${FOO} in script one.content. Interpolation syntax ${...} is not supported in script content at scripts.one.content in databricks.yml:6:14 -We do not support the ${...} interpolation syntax in scripts because -it's ambiguous whether it's a variable reference or reference to an -environment variable. +Script content is passed to the shell as-is, so ${...} is left for the shell to expand. +To interpolate a bundle value into the script, declare an environment variable +in the script's "env:" section and reference it from "content" with $NAME: -Error: Found ${var.BAR} in script two. Interpolation syntax ${...} is not allowed in scripts + scripts: + one: + env: + MY_VAR: ${var.foo} + content: echo "$MY_VAR" + +Error: Found ${var.BAR} in script two.content. Interpolation syntax ${...} is not supported in script content at scripts.two.content in databricks.yml:8:14 -We do not support the ${...} interpolation syntax in scripts because -it's ambiguous whether it's a variable reference or reference to an -environment variable. +Script content is passed to the shell as-is, so ${...} is left for the shell to expand. +To interpolate a bundle value into the script, declare an environment variable +in the script's "env:" section and reference it from "content" with $NAME: + + scripts: + two: + env: + MY_VAR: ${var.foo} + content: echo "$MY_VAR" Exit code: 1 >>> [CLI] bundle run foo -Error: Found ${FOO} in script one. Interpolation syntax ${...} is not allowed in scripts +Error: Found ${FOO} in script one.content. Interpolation syntax ${...} is not supported in script content at scripts.one.content in databricks.yml:6:14 -We do not support the ${...} interpolation syntax in scripts because -it's ambiguous whether it's a variable reference or reference to an -environment variable. +Script content is passed to the shell as-is, so ${...} is left for the shell to expand. +To interpolate a bundle value into the script, declare an environment variable +in the script's "env:" section and reference it from "content" with $NAME: -Error: Found ${var.BAR} in script two. Interpolation syntax ${...} is not allowed in scripts + scripts: + one: + env: + MY_VAR: ${var.foo} + content: echo "$MY_VAR" + +Error: Found ${var.BAR} in script two.content. Interpolation syntax ${...} is not supported in script content at scripts.two.content in databricks.yml:8:14 -We do not support the ${...} interpolation syntax in scripts because -it's ambiguous whether it's a variable reference or reference to an -environment variable. +Script content is passed to the shell as-is, so ${...} is left for the shell to expand. +To interpolate a bundle value into the script, declare an environment variable +in the script's "env:" section and reference it from "content" with $NAME: + + scripts: + two: + env: + MY_VAR: ${var.foo} + content: echo "$MY_VAR" Exit code: 1 diff --git a/bundle/config/root.go b/bundle/config/root.go index 6d4697cc1ba..ae8439e010a 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -24,6 +24,13 @@ import ( type Script struct { // Content of the script to be executed. Content string `json:"content"` + + // Env is a map of environment variables exported when running the script. + // Values may reference ${bundle.*}, ${workspace.*}, ${var.*} (or + // ${variables.*}); other prefixes are rejected at validation time. + // Use this to pass bundle configuration into a script's shell environment + // without polluting the script content with DABs interpolation syntax. + Env map[string]string `json:"env,omitempty"` } type Root struct { //nolint:recvcheck // value receivers for read-only accessors, pointer for mutators diff --git a/bundle/config/validate/scripts.go b/bundle/config/validate/scripts.go index 04c6045bb42..5d11f4939c4 100644 --- a/bundle/config/validate/scripts.go +++ b/bundle/config/validate/scripts.go @@ -4,12 +4,13 @@ import ( "context" "fmt" "maps" - "regexp" "slices" + "strings" "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/dynvar" ) type validateScripts struct{} @@ -22,10 +23,14 @@ func (f *validateScripts) Name() string { return "validate:scripts" } -func (f *validateScripts) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - diags := diag.Diagnostics{} +// allowedEnvRefPrefixes are the variable prefixes that may appear in a +// script's "env:" section. These match the prefixes resolved before scripts +// execute (defaultPrefixes in resolve_variable_references.go); "var" is the +// shorthand for "variables". +var allowedEnvRefPrefixes = []string{"bundle", "workspace", "var", "variables"} - re := regexp.MustCompile(`\$\{.*\}`) +func (f *validateScripts) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics // Sort the scripts to have a deterministic order for the // generated diagnostics. @@ -33,36 +38,80 @@ func (f *validateScripts) Apply(ctx context.Context, b *bundle.Bundle) diag.Diag for _, k := range scriptKeys { script := b.Config.Scripts[k] - p := dyn.NewPath(dyn.Key("scripts"), dyn.Key(k), dyn.Key("content")) + contentPath := dyn.NewPath(dyn.Key("scripts"), dyn.Key(k), dyn.Key("content")) if script.Content == "" { diags = append(diags, diag.Diagnostic{ Severity: diag.Error, Summary: fmt.Sprintf("Script %s has no content", k), - Paths: []dyn.Path{p}, + Paths: []dyn.Path{contentPath}, }) continue } - v, err := dyn.GetByPath(b.Config.Value(), p) - if err != nil { - return diags.Extend(diag.FromErr(err)) + diags = diags.Extend(validateScriptContent(b, k, script.Content, contentPath)) + diags = diags.Extend(validateScriptEnv(b, k, script.Env)) + } + + return diags +} + +func validateScriptContent(b *bundle.Bundle, key, content string, p dyn.Path) diag.Diagnostics { + ref, ok := dynvar.NewRef(dyn.V(content)) + if !ok { + return nil + } + + first := ref.Matches[0][0] + return diag.Diagnostics{{ + Severity: diag.Error, + Summary: fmt.Sprintf("Found %s in script %s.content. Interpolation syntax ${...} is not supported in script content", first, key), + Detail: `Script content is passed to the shell as-is, so ${...} is left for the shell to expand. +To interpolate a bundle value into the script, declare an environment variable +in the script's "env:" section and reference it from "content" with $NAME: + + scripts: + ` + key + `: + env: + MY_VAR: ${var.foo} + content: echo "$MY_VAR"`, + Locations: locationsForPath(b, p), + Paths: []dyn.Path{p}, + }} +} + +func validateScriptEnv(b *bundle.Bundle, key string, env map[string]string) diag.Diagnostics { + var diags diag.Diagnostics + + for _, name := range slices.Sorted(maps.Keys(env)) { + ref, ok := dynvar.NewRef(dyn.V(env[name])) + if !ok { + continue } - // Check for interpolation syntax - match := re.FindString(script.Content) - if match != "" { + envValuePath := dyn.NewPath(dyn.Key("scripts"), dyn.Key(key), dyn.Key("env"), dyn.Key(name)) + + for _, refPath := range ref.References() { + prefix, _, _ := strings.Cut(refPath, ".") + if slices.Contains(allowedEnvRefPrefixes, prefix) { + continue + } diags = append(diags, diag.Diagnostic{ - Severity: diag.Error, - Summary: fmt.Sprintf("Found %s in script %s. Interpolation syntax ${...} is not allowed in scripts", match, k), - Detail: `We do not support the ${...} interpolation syntax in scripts because -it's ambiguous whether it's a variable reference or reference to an -environment variable.`, - Locations: v.Locations(), - Paths: []dyn.Path{p}, + Severity: diag.Error, + Summary: fmt.Sprintf("${%s} cannot be used in scripts.%s.env.%s; only ${bundle.*}, ${workspace.*}, and ${var.*} are resolved before scripts execute", refPath, key, name), + Locations: locationsForPath(b, envValuePath), + Paths: []dyn.Path{envValuePath}, }) } } return diags } + +func locationsForPath(b *bundle.Bundle, p dyn.Path) []dyn.Location { + v, err := dyn.GetByPath(b.Config.Value(), p) + if err != nil { + return nil + } + return v.Locations() +} diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 041ba102ddb..42ee167d4a4 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -364,6 +364,9 @@ github.com/databricks/cli/bundle/config.Script: "content": "description": |- PLACEHOLDER + "env": + "description": |- + PLACEHOLDER github.com/databricks/cli/bundle/config.Sync: "exclude": "description": |- diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 414e70fedd2..fc2d42960c8 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -2677,6 +2677,9 @@ "properties": { "content": { "$ref": "#/$defs/string" + }, + "env": { + "$ref": "#/$defs/map/string" } }, "additionalProperties": false, diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index e98fe59ac4e..1849444a921 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -10,6 +10,7 @@ import ( "slices" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/resources" "github.com/databricks/cli/bundle/run" @@ -165,7 +166,7 @@ Example usage: if len(runArgs) > 0 { return fmt.Errorf("additional arguments are not supported for scripts. Got: %v. We recommend using environment variables to pass runtime arguments to a script. For example: FOO=bar databricks bundle run my_script", runArgs) } - return executeScript(b.Config.Scripts[key].Content, cmd, b) + return executeScript(b.Config.Scripts[key], cmd, b) } return nil @@ -276,8 +277,14 @@ func scriptEnv(cmd *cobra.Command, b *bundle.Bundle) []string { return out } -func executeScript(content string, cmd *cobra.Command, b *bundle.Bundle) error { - return execv.Shell(content, b.BundleRootPath, scriptEnv(cmd, b)) +func executeScript(script config.Script, cmd *cobra.Command, b *bundle.Bundle) error { + env := scriptEnv(cmd, b) + // Append after auth/profile/target so script-declared values take precedence + // if they happen to collide. + for _, name := range slices.Sorted(maps.Keys(script.Env)) { + env = append(env, name+"="+script.Env[name]) + } + return execv.Shell(script.Content, b.BundleRootPath, env) } func executeInline(cmd *cobra.Command, args []string, b *bundle.Bundle) error {