From c43bfe17277fe82d7c72c92f407d3371c03bf795 Mon Sep 17 00:00:00 2001 From: Jules Macret Date: Tue, 5 May 2026 17:37:29 +0200 Subject: [PATCH 1/6] feat(interp): host-binary exec via host:= [DEMO ONLY] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a parallel "host:" namespace to AllowedCommands so a single allowlisted host binary (e.g. host:logrotate=/usr/sbin/logrotate) can be exec'd directly. Plumbs stdin/stdout/stderr, propagates exit code, kills via SIGKILL on context cancel, forwards only PATH/HOME/LANG to the child. Linux-only; non-linux platforms return 127. This fundamentally changes rshell's threat model — the entire reason rshell exists is to *not* execute host binaries. This entry-point exists for the host-remediation demo (step 6: logrotate -f) and is not intended to merge to main without a real design pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 6 ++ SHELL_FEATURES.md | 1 + cmd/rshell/main.go | 2 +- cmd/rshell/main_test.go | 2 +- interp/allowed_commands_test.go | 2 +- interp/api.go | 72 ++++++++++--- interp/host_exec_linux.go | 84 +++++++++++++++ interp/host_exec_other.go | 21 ++++ interp/host_exec_test.go | 184 ++++++++++++++++++++++++++++++++ interp/runner_exec.go | 19 +++- 10 files changed, 374 insertions(+), 19 deletions(-) create mode 100644 interp/host_exec_linux.go create mode 100644 interp/host_exec_other.go create mode 100644 interp/host_exec_test.go diff --git a/README.md b/README.md index e339ee9c2..7d0c0a3bb 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,12 @@ Every access path is default-deny: **AllowedCommands** restricts which commands (builtins or external) the interpreter may execute. Commands must be specified with the `rshell:` namespace prefix (e.g. `rshell:cat`, `rshell:echo`). If not set, no commands are allowed. +> ### ⚠️ Demo only — `host:` namespace (not for production) +> +> A `host:=` entry (e.g. `host:logrotate=/usr/sbin/logrotate`) allowlists a single host binary at the given absolute path. When dispatch sees a non-builtin command name matching ``, the binary at `` is exec'd directly (no PATH lookup, no shell, no argv filtering). Stdin/stdout/stderr are plumbed through, the binary's exit code is propagated, context cancellation kills the process via `SIGKILL`, and the env passed to the binary is filtered to `PATH`, `HOME`, `LANG`. Linux-only; on darwin/windows the dispatch returns 127 with "host execution not supported on this platform". +> +> **This entry-point fundamentally changes rshell's threat model — the entire reason rshell exists is to *not* execute host binaries.** It exists for the host-remediation demo (step 6: `logrotate -f /etc/logrotate.d/app`) and would need a real design pass before becoming a product feature. The current implementation has no audit logging, no argv filtering, no multi-binary scaffolding, and no operator-friendly configuration story. + **AllowedPaths** restricts all file operations to specified directories using Go's `os.Root` API (`openat` syscalls), making it immune to symlink traversal, TOCTOU races, and `..` escape attacks. Both reads and writes are sandboxed by the same mechanism — files outside the allowlist cannot be opened, created, truncated, or appended to. The cross-root symlink fallback is read-only: a symlink that points outside its `os.Root` is followed for reads but never for writes (avoids a TOCTOU window where a malicious link target could be swapped between resolution and open). File-target output redirections (`>`, `>>`, `2>`, `&>`, `&>>`) open through the same sandbox: writes inside `AllowedPaths` succeed, anything else fails with `permission denied` and exit 1. The literal target `/dev/null` is short-circuited to a discarded sink without going through the sandbox. Configured directories that cannot be opened (missing, not a directory, no permission) are skipped with a diagnostic message; by default these messages are flushed once to the runner's stderr at construction time. Callers that need to keep stderr clean of sandbox diagnostics can route them to a dedicated sink with `WarningsWriter(io.Writer)` or retrieve them programmatically via `Runner.Warnings()`. > **Note:** The `ss`, `ip route`, and `df` builtins bypass `AllowedPaths` for their kernel-state reads. `ss` and `ip route` open `/proc/net/*` paths directly; `df` reads `/proc/self/mountinfo` (Linux) or calls `getfsstat(2)` (macOS), then issues `unix.Statfs(2)` against every kernel-reported mount point. These paths are hardcoded — never derived from user input — and `Statfs` returns metadata only (block / inode counts, filesystem type, block size). There is no sandbox-escape risk, but operators cannot use `AllowedPaths` to block `ss` from enumerating local sockets, `ip route` from reading the routing table, or `df` from reporting mount-table capacity — these reads succeed regardless of the configured path policy. diff --git a/SHELL_FEATURES.md b/SHELL_FEATURES.md index 466e6c0bb..d517705f6 100644 --- a/SHELL_FEATURES.md +++ b/SHELL_FEATURES.md @@ -104,6 +104,7 @@ The in-shell `help` command mirrors these feature categories: run `help` for a c ## Execution - ✅ AllowedCommands — restricts which commands (builtins or external) may be executed; commands require the `rshell:` namespace prefix (e.g. `rshell:cat`); if not set, no commands are allowed +- ⚠️ **Demo only:** `host:=` AllowedCommands entries (e.g. `host:logrotate=/usr/sbin/logrotate`) allowlist a single host binary that rshell will exec directly when invoked. Linux-only. Plumbs stdin/stdout/stderr, propagates exit code, kills via `SIGKILL` on context cancel, forwards only `PATH`/`HOME`/`LANG`. This fundamentally changes rshell's threat model and is intended for the host-remediation demo, not production — see README "demo only" section. - ✅ AllowedPaths filesystem sandboxing — restricts all file access (read and write) to specified directories; cross-root symlink fallback is read-only to avoid TOCTOU on writes - ✅ Whole-run execution timeout — callers can bound a `Run()` call via `context.Context`, `interp.MaxExecutionTime`, or the CLI `--timeout` flag; the deadline applies to the entire script, not each individual command - ✅ ProcPath — overrides the proc filesystem path used by `ps` (default `/proc`; Linux-only; useful for testing/container environments) diff --git a/cmd/rshell/main.go b/cmd/rshell/main.go index beeacee47..bebb9c70a 100644 --- a/cmd/rshell/main.go +++ b/cmd/rshell/main.go @@ -133,7 +133,7 @@ func run(ctx context.Context, args []string, stdin io.Reader, stdout, stderr io. cmd.Flags().StringVarP(&command, "command", "c", "", "shell command string to execute") cmd.Flags().MarkHidden("command") //nolint:errcheck // flag is guaranteed to exist cmd.Flags().StringVarP(&allowedPaths, "allowed-paths", "p", "", "comma-separated list of directories the shell is allowed to access") - cmd.Flags().StringVar(&allowedCommands, "allowed-commands", "", "comma-separated list of namespaced commands (e.g. rshell:cat,rshell:find)") + cmd.Flags().StringVar(&allowedCommands, "allowed-commands", "", "comma-separated list of namespaced commands (e.g. rshell:cat,rshell:find,host:logrotate=/usr/sbin/logrotate)") cmd.Flags().BoolVar(&allowAllCmds, "allow-all-commands", false, "allow execution of all commands (builtins and external)") cmd.Flags().DurationVar(&timeout, "timeout", 0, "maximum execution time for the entire shell run (e.g. 100ms, 5s, 1m)") cmd.Flags().StringVar(&procPath, "proc-path", "", "path to the proc filesystem used by ps (default \"/proc\")") diff --git a/cmd/rshell/main_test.go b/cmd/rshell/main_test.go index 0ae52aca5..402f5ed42 100644 --- a/cmd/rshell/main_test.go +++ b/cmd/rshell/main_test.go @@ -276,7 +276,7 @@ func TestAllowedCommandsMissingNamespace(t *testing.T) { } func TestAllowedCommandsUnknownNamespace(t *testing.T) { - code, _, stderr := runCLI(t, "--allowed-commands", "host:echo", "-c", `echo hello`) + code, _, stderr := runCLI(t, "--allowed-commands", "bogus:echo", "-c", `echo hello`) assert.Equal(t, 1, code) assert.Contains(t, stderr, "unknown namespace") } diff --git a/interp/allowed_commands_test.go b/interp/allowed_commands_test.go index 38a115597..0c36af152 100644 --- a/interp/allowed_commands_test.go +++ b/interp/allowed_commands_test.go @@ -21,7 +21,7 @@ func TestAllowedCommandsNamespaceRequired(t *testing.T) { } func TestAllowedCommandsUnknownNamespace(t *testing.T) { - _, err := interp.New(interp.AllowedCommands([]string{"host:echo"})) + _, err := interp.New(interp.AllowedCommands([]string{"bogus:echo"})) require.Error(t, err) assert.Contains(t, err.Error(), "unknown namespace") } diff --git a/interp/api.go b/interp/api.go index b3dda78f2..bb0b111ea 100644 --- a/interp/api.go +++ b/interp/api.go @@ -82,6 +82,15 @@ type runnerConfig struct { // command. Intended for testing convenience. allowAllCommands bool + // hostCommands maps allowlisted host-binary names (e.g. "logrotate") to + // the absolute path of the binary on disk (e.g. "/usr/sbin/logrotate"). + // Populated by AllowedCommands entries of the form "host:=". + // + // DEMO ONLY: running host binaries fundamentally changes rshell's threat + // model. This entry-point exists for the host-remediation demo and is not + // intended to ship as a product feature. + hostCommands map[string]string + // maxExecutionTime bounds the duration of each Run call. Zero disables // the limit. When non-zero, Run derives a child context with this timeout. maxExecutionTime time.Duration @@ -734,10 +743,19 @@ func HostPrefix(prefix string) RunnerOption { } // AllowedCommands restricts command execution to the specified command names. -// Names must use the "rshell:" namespace prefix (e.g. "rshell:cat", -// "rshell:find"). Names without a colon separator or with an unknown namespace -// are rejected. The bare command name (after the prefix) is stored internally -// and matched exactly against the command name (args[0]) at execution time. +// Names must use a namespace prefix. +// +// Two namespaces are accepted: +// - "rshell:" — allowlist a builtin (e.g. "rshell:cat", "rshell:find"). +// The bare command name (after the prefix) is matched exactly against +// args[0] at execution time. +// - "host:=" — DEMO ONLY: allowlist a host binary at +// the given absolute path (e.g. "host:logrotate=/usr/sbin/logrotate"). +// When dispatch sees a non-builtin command name matching , the +// binary at is exec'd. See [hostCommands] for the threat- +// model caveat. Linux-only. +// +// Names without a colon separator or with an unknown namespace are rejected. // // Only commands whose name appears in the list may be executed; all others are // rejected with ": command not allowed". @@ -750,28 +768,52 @@ func HostPrefix(prefix string) RunnerOption { func AllowedCommands(names []string) RunnerOption { return func(r *Runner) error { m := make(map[string]bool, len(names)) + var hostMap map[string]string for _, n := range names { if n == "" { return fmt.Errorf("AllowedCommands: empty command name") } idx := strings.Index(n, ":") if idx < 0 { - return fmt.Errorf("AllowedCommands: %q missing namespace prefix (expected \"rshell:\")", n) + return fmt.Errorf("AllowedCommands: %q missing namespace prefix (expected \"rshell:\" or \"host:=\")", n) } ns := n[:idx] - cmd := n[idx+1:] - if strings.Index(cmd, ":") >= 0 { - return fmt.Errorf("AllowedCommands: %q contains multiple colons; expected format \"rshell:\"", n) - } - if ns != "rshell" { - return fmt.Errorf("AllowedCommands: %q has unknown namespace %q (only \"rshell\" is supported)", n, ns) - } - if cmd == "" { - return fmt.Errorf("AllowedCommands: %q has empty command name", n) + rest := n[idx+1:] + switch ns { + case "rshell": + if strings.Index(rest, ":") >= 0 { //nolint:gosimple // strings.Contains is not on the interp allowlist + return fmt.Errorf("AllowedCommands: %q contains multiple colons; expected format \"rshell:\"", n) + } + if rest == "" { + return fmt.Errorf("AllowedCommands: %q has empty command name", n) + } + m[rest] = true + case "host": + eq := strings.Index(rest, "=") + if eq < 0 { + return fmt.Errorf("AllowedCommands: %q missing \"=\" (expected format \"host:=\")", n) + } + name := rest[:eq] + path := rest[eq+1:] + if name == "" { + return fmt.Errorf("AllowedCommands: %q has empty host command name", n) + } + if path == "" { + return fmt.Errorf("AllowedCommands: %q has empty host binary path", n) + } + if !filepath.IsAbs(path) { + return fmt.Errorf("AllowedCommands: %q host binary path must be absolute, got %q", n, path) + } + if hostMap == nil { + hostMap = make(map[string]string) + } + hostMap[name] = path + default: + return fmt.Errorf("AllowedCommands: %q has unknown namespace %q (only \"rshell\" and \"host\" are supported)", n, ns) } - m[cmd] = true } r.allowedCommands = m + r.hostCommands = hostMap return nil } } diff --git a/interp/host_exec_linux.go b/interp/host_exec_linux.go new file mode 100644 index 000000000..04df4502f --- /dev/null +++ b/interp/host_exec_linux.go @@ -0,0 +1,84 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +//go:build linux + +// DEMO ONLY: this file enables rshell to execute a small allowlist of host +// binaries. Running host binaries fundamentally changes rshell's threat +// model — the entire reason rshell exists is to *not* execute host binaries. +// This entry-point exists for the host-remediation demo (see +// docs/RULES.md / SHELL_FEATURES.md "demo only" section) and would need a +// real design pass before becoming a product feature. + +package interp + +import ( + "context" + "errors" + "os" + "os/exec" +) + +// hostEnvAllowlist is the set of environment variables forwarded to host +// binaries. Anything else from the parent process env is stripped so that +// host invocations do not leak ambient configuration that the rest of the +// shell deliberately keeps out. +var hostEnvAllowlist = []string{"PATH", "HOME", "LANG"} + +// runHostCommand executes the host binary at path with args[1:] as its argv, +// plumbing the runner's stdin/stdout/stderr through. It returns the binary's +// exit code as a uint8 (so $? works) and propagates context cancellation by +// killing the process with SIGKILL — exec.CommandContext's default Cancel +// uses os.Kill (SIGKILL on Unix), which matches the timeout behaviour the +// rest of the runner applies to builtins. +// +// args[0] is the user-visible command name; args[1:] is the binary's argv. +// The binary path itself comes from the hostCommands allowlist entry, not +// from args, so PATH lookup is intentionally not performed. +func (r *Runner) runHostCommand(ctx context.Context, path string, args []string) uint8 { + cmd := exec.CommandContext(ctx, path, args[1:]...) + cmd.Dir = r.Dir + cmd.Env = filterHostEnv() + if r.stdin != nil { + cmd.Stdin = r.stdin + } + cmd.Stdout = r.stdout + cmd.Stderr = r.stderr + + err := cmd.Run() + if err == nil { + return 0 + } + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + // ExitCode returns -1 if the process was terminated by a signal + // (e.g. SIGKILL on context cancellation). Map that to 130 — the + // shell-conventional "terminated" code — so the caller can still + // observe a non-zero exit. + code := exitErr.ExitCode() + if code < 0 { + return 130 + } + return uint8(code) + } + // Failure to start or some other I/O error: surface to stderr and + // return 127 (the shell convention for "command not found / not + // executable"). + r.errf("rshell: %s: %v\n", args[0], err) + return 127 +} + +// filterHostEnv builds a minimal env slice for host binaries from the +// process env, forwarding only the names in hostEnvAllowlist. Variables +// that are unset are simply omitted from the result. +func filterHostEnv() []string { + out := make([]string, 0, len(hostEnvAllowlist)) + for _, name := range hostEnvAllowlist { + if v, ok := os.LookupEnv(name); ok { + out = append(out, name+"="+v) + } + } + return out +} diff --git a/interp/host_exec_other.go b/interp/host_exec_other.go new file mode 100644 index 000000000..5516601f6 --- /dev/null +++ b/interp/host_exec_other.go @@ -0,0 +1,21 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +//go:build !linux + +// DEMO ONLY: see host_exec_linux.go. The host-binary entry-point is a +// Linux-only demo; on darwin/windows we simply refuse to dispatch. + +package interp + +import "context" + +// runHostCommand is a stub for non-Linux platforms. It writes an explanatory +// error to the runner's stderr and returns 127 (shell-conventional "command +// not found / not executable") so callers can rely on a non-zero exit. +func (r *Runner) runHostCommand(_ context.Context, _ string, args []string) uint8 { + r.errf("rshell: %s: host execution not supported on this platform\n", args[0]) + return 127 +} diff --git a/interp/host_exec_test.go b/interp/host_exec_test.go new file mode 100644 index 000000000..23debf1ce --- /dev/null +++ b/interp/host_exec_test.go @@ -0,0 +1,184 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +//go:build linux + +// DEMO ONLY: tests for the host-binary entry-point. See host_exec_linux.go. + +package interp_test + +import ( + "bytes" + "context" + "errors" + "io" + "os" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "mvdan.cc/sh/v3/syntax" + + "github.com/DataDog/rshell/interp" +) + +// runExitCode runs node and returns the resulting exit code. A nil error +// from Run means the script exited 0; a non-nil error wraps an +// interp.ExitStatus that we surface as the code. Any other error is +// reported via t.Fatalf. +func runExitCode(t *testing.T, r *interp.Runner, node syntax.Node) uint8 { + t.Helper() + err := r.Run(context.Background(), node) + if err == nil { + return 0 + } + var es interp.ExitStatus + if errors.As(err, &es) { + return uint8(es) + } + t.Fatalf("Run returned unexpected error: %v", err) + return 0 +} + +func parseHostExecScript(t *testing.T, src string) *syntax.File { + t.Helper() + prog, err := syntax.NewParser().Parse(strings.NewReader(src), "") + require.NoError(t, err) + return prog +} + +// newHostExecRunner builds a Runner that allows just the host: entries the +// caller passes in. stdin/stdout/stderr are wired up to the supplied buffers +// so tests can assert on captured output. stdin is provided as a *os.File +// (via os.Pipe) when nonempty, since StdIO expects a Reader and the runner +// converts it into a pipe internally — providing a real file when we need +// real stdin keeps the host-exec path identical to production. +func newHostExecRunner(t *testing.T, hostEntries []string, stdin io.Reader) (*interp.Runner, *bytes.Buffer, *bytes.Buffer) { + t.Helper() + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + if stdin == nil { + stdin = strings.NewReader("") + } + r, err := interp.New( + interp.AllowedCommands(hostEntries), + interp.StdIO(stdin, stdout, stderr), + ) + require.NoError(t, err) + t.Cleanup(func() { _ = r.Close() }) + return r, stdout, stderr +} + +func TestHostExecStdoutPlumbed(t *testing.T) { + r, stdout, _ := newHostExecRunner(t, []string{"host:echo=/bin/echo"}, nil) + err := r.Run(context.Background(), parseHostExecScript(t, `echo hello world`)) + require.NoError(t, err) + assert.Equal(t, "hello world\n", stdout.String()) +} + +func TestHostExecExitCodePropagated(t *testing.T) { + r, _, _ := newHostExecRunner(t, []string{"host:false=/usr/bin/false"}, nil) + code := runExitCode(t, r, parseHostExecScript(t, `false`)) + assert.Equal(t, uint8(1), code) +} + +func TestHostExecStdinPlumbed(t *testing.T) { + stdin := strings.NewReader("piped-input\n") + r, stdout, _ := newHostExecRunner(t, []string{"host:cat=/bin/cat"}, stdin) + err := r.Run(context.Background(), parseHostExecScript(t, `cat`)) + require.NoError(t, err) + assert.Equal(t, "piped-input\n", stdout.String()) +} + +func TestHostExecRejectsNonAllowlistedName(t *testing.T) { + r, _, stderr := newHostExecRunner(t, []string{"host:echo=/bin/echo"}, nil) + code := runExitCode(t, r, parseHostExecScript(t, `notallowlisted`)) + assert.Equal(t, uint8(127), code) + assert.Contains(t, stderr.String(), "command not allowed") +} + +func TestHostExecContextCancellationKillsProcess(t *testing.T) { + r, _, _ := newHostExecRunner(t, []string{"host:sleep=/bin/sleep"}, nil) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + done := make(chan error, 1) + go func() { + // 30s sleep — deliberately long so we know the cancel was what + // stopped it, not natural completion. + done <- r.Run(ctx, parseHostExecScript(t, `sleep 30`)) + }() + + // Give the process a chance to actually start before cancelling. + time.Sleep(100 * time.Millisecond) + cancel() + + select { + case <-done: + // success: the run returned within the deadline below. + case <-time.After(2 * time.Second): + t.Fatal("host process was not killed within 2s of context cancel") + } +} + +func TestHostExecEnvFilteredToAllowlist(t *testing.T) { + // Set a sentinel env var that should NOT propagate, and one that + // should. /usr/bin/env prints the resulting environment; we assert + // that PATH (allowlisted) shows up and the sentinel does not. + t.Setenv("PATH", "/usr/bin:/bin") + t.Setenv("RSHELL_HOST_EXEC_LEAK_CHECK", "should-not-appear") + + r, stdout, _ := newHostExecRunner(t, []string{"host:env=/usr/bin/env"}, nil) + err := r.Run(context.Background(), parseHostExecScript(t, `env`)) + require.NoError(t, err) + out := stdout.String() + assert.Contains(t, out, "PATH=/usr/bin:/bin", "PATH must be forwarded") + assert.NotContains(t, out, "RSHELL_HOST_EXEC_LEAK_CHECK", "non-allowlisted env vars must be stripped") +} + +func TestHostExecForwardsHomeAndLang(t *testing.T) { + t.Setenv("HOME", "/tmp/host-exec-home") + t.Setenv("LANG", "C.UTF-8") + + r, stdout, _ := newHostExecRunner(t, []string{"host:env=/usr/bin/env"}, nil) + err := r.Run(context.Background(), parseHostExecScript(t, `env`)) + require.NoError(t, err) + out := stdout.String() + assert.Contains(t, out, "HOME=/tmp/host-exec-home") + assert.Contains(t, out, "LANG=C.UTF-8") +} + +// TestHostExecOnlyApprovedNamesDispatch ensures that an allowlisted *builtin* +// name still dispatches to the builtin and does not reach the host-exec path +// — the host: namespace is consulted only when the name is not a known +// builtin. +func TestHostExecBuiltinTakesPrecedence(t *testing.T) { + // "echo" is a known builtin in rshell. If the host:echo entry were + // erroneously preferred, /bin/echo would emit "FROM_HOST" via its + // argv, but because we pass the literal string and rely on builtin + // echo, the output ends with a single newline produced by the builtin. + r, stdout, _ := newHostExecRunner(t, + []string{"rshell:echo", "host:echo=/bin/echo"}, nil) + err := r.Run(context.Background(), parseHostExecScript(t, `echo from-builtin`)) + require.NoError(t, err) + assert.Equal(t, "from-builtin\n", stdout.String()) +} + +// Sanity: make sure os.Environ has an unrelated value we can rely on for the +// negative side of TestHostExecEnvFilteredToAllowlist. +func TestHostExecOSEnvironHasUnrelatedVar(t *testing.T) { + t.Setenv("RSHELL_HOST_EXEC_LEAK_CHECK", "yes") + found := false + for _, kv := range os.Environ() { + if strings.HasPrefix(kv, "RSHELL_HOST_EXEC_LEAK_CHECK=") { + found = true + break + } + } + require.True(t, found, "test setup: t.Setenv should make var visible via os.Environ") +} diff --git a/interp/runner_exec.go b/interp/runner_exec.go index 97a21e11a..d93883693 100644 --- a/interp/runner_exec.go +++ b/interp/runner_exec.go @@ -502,7 +502,8 @@ func (r *Runner) call(ctx context.Context, pos syntax.Pos, args []string) { // Evaluate both policy checks upfront so the span tags reflect the // independent facts about the command name regardless of which gate // short-circuits dispatch. - isAllowed := r.allowAllCommands || r.allowedCommands[name] + hostBinaryPath, isHostAllowed := r.hostCommands[name] + isAllowed := r.allowAllCommands || r.allowedCommands[name] || isHostAllowed fn, isKnown := builtins.Lookup(name) span, ctx := telemetry.StartSpanFromContext(ctx, "command") @@ -511,6 +512,12 @@ func (r *Runner) call(ctx context.Context, pos syntax.Pos, args []string) { span.SetTag("rshell.command.argc", len(args)-1) span.SetTag("rshell.command.is_allowed", isAllowed) span.SetTag("rshell.command.is_known", isKnown) + if isHostAllowed { + // DEMO ONLY: tag invocations that reach for a host binary so the + // demo narrative ("look, we can see exactly when the agent + // reached for the host") is visible in trace data. + span.SetTag("rshell.command.host_exec", true) + } // has_stdin_pipe / has_output_redirect reflect whether the command's // stdin/stdout were reassigned from the Runner's originals — true for // both pipeline stages and file redirects. @@ -785,6 +792,16 @@ func (r *Runner) call(ctx context.Context, pos syntax.Pos, args []string) { } return } + // DEMO ONLY: a host-binary entry takes precedence over the default + // execHandler so that an allowlisted host:= entry actually + // runs the binary instead of being rejected as "command not allowed". + // host: entries are intentionally only consulted when the name is not + // also a known builtin, so builtins always win. + if isHostAllowed { + r.dispatchedCount++ + r.exit.code = r.runHostCommand(ctx, hostBinaryPath, args) + return + } // Allowed but not known: the default execHandler (noExecHandler) will // reject with exit 127. unknownCount was already incremented above. r.exec(ctx, pos, args) From fa5e47a3abfbf0ff4653553dbc02390c56f4e77d Mon Sep 17 00:00:00 2001 From: Jules Macret Date: Tue, 5 May 2026 18:06:22 +0200 Subject: [PATCH 2/6] fix(interp): gate host: entries on !isKnown so they cannot authorize builtins Address Codex review (PR #228): an entry like "host:cat=/bin/false" flipped isAllowed=true and the isKnown branch then ran the cat builtin with stdin/AllowedPaths access, never executing the host binary. Compute isKnown before isAllowed and gate the host entry on !isKnown so a collision with a builtin name no longer launders builtin access. host_exec span tag is now keyed on hostDispatch (the actually-exec'd case) rather than mere entry presence. Includes a cross-platform regression test that exercises the gate without invoking the host-exec stub. Co-Authored-By: Claude Opus 4.7 (1M context) --- interp/allowed_commands_test.go | 59 +++++++++++++++++++++++++++++++++ interp/runner_exec.go | 26 +++++++++++---- 2 files changed, 78 insertions(+), 7 deletions(-) diff --git a/interp/allowed_commands_test.go b/interp/allowed_commands_test.go index 0c36af152..9b91af3f7 100644 --- a/interp/allowed_commands_test.go +++ b/interp/allowed_commands_test.go @@ -6,10 +6,15 @@ package interp_test import ( + "bytes" + "context" + "errors" + "strings" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "mvdan.cc/sh/v3/syntax" "github.com/DataDog/rshell/interp" ) @@ -62,3 +67,57 @@ func TestAllowedCommandsEmpty(t *testing.T) { require.Error(t, err) assert.Contains(t, err.Error(), "empty command name") } + +// TestHostEntryDoesNotAuthorizeBuiltin is a regression test: a host: entry +// whose name collides with a builtin must NOT silently authorize the +// builtin. Without the !isKnown gate in call(), an entry like +// "host:cat=/bin/false" would flip isAllowed=true and the builtin cat +// would run with stdin/AllowedPaths access, never executing /bin/false. +// Cross-platform: this exercises the dispatch gate, not the actual host +// exec, so it runs on darwin/windows too. +func TestHostEntryDoesNotAuthorizeBuiltin(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + r, err := interp.New( + interp.AllowedCommands([]string{"host:cat=/bin/false"}), + interp.StdIO(strings.NewReader(""), stdout, stderr), + ) + require.NoError(t, err) + t.Cleanup(func() { _ = r.Close() }) + + prog, err := syntax.NewParser().Parse(strings.NewReader("cat"), "") + require.NoError(t, err) + + runErr := r.Run(context.Background(), prog) + var status interp.ExitStatus + require.True(t, errors.As(runErr, &status), "expected ExitStatus error, got %v", runErr) + assert.Equal(t, interp.ExitStatus(127), status) + assert.Contains(t, stderr.String(), "command not allowed") + assert.Empty(t, stdout.String(), "cat builtin must not have run") +} + +// TestHostEntryAuthorizesNonBuiltin verifies the positive case for the +// dispatch gate — a host: entry whose name does NOT collide with a +// builtin still passes the allowlist check (it would fail with +// "command not allowed" if the gate were too strict). The actual exec +// path is platform-specific and tested in host_exec_test.go (linux); +// here we only assert that dispatch reaches it (on darwin/windows the +// host-exec stub returns 127 with a different message). +func TestHostEntryAuthorizesNonBuiltin(t *testing.T) { + stderr := &bytes.Buffer{} + r, err := interp.New( + interp.AllowedCommands([]string{"host:somenonsensename=/usr/bin/true"}), + interp.StdIO(strings.NewReader(""), &bytes.Buffer{}, stderr), + ) + require.NoError(t, err) + t.Cleanup(func() { _ = r.Close() }) + + prog, err := syntax.NewParser().Parse(strings.NewReader("somenonsensename"), "") + require.NoError(t, err) + + _ = r.Run(context.Background(), prog) + // Whatever exit code we got, the rejection path ("command not + // allowed") must NOT have been taken — that's the only thing the + // dispatch gate is responsible for here. + assert.NotContains(t, stderr.String(), "command not allowed") +} diff --git a/interp/runner_exec.go b/interp/runner_exec.go index d93883693..1aaa179c4 100644 --- a/interp/runner_exec.go +++ b/interp/runner_exec.go @@ -502,9 +502,19 @@ func (r *Runner) call(ctx context.Context, pos syntax.Pos, args []string) { // Evaluate both policy checks upfront so the span tags reflect the // independent facts about the command name regardless of which gate // short-circuits dispatch. - hostBinaryPath, isHostAllowed := r.hostCommands[name] - isAllowed := r.allowAllCommands || r.allowedCommands[name] || isHostAllowed fn, isKnown := builtins.Lookup(name) + hostBinaryPath, isHostAllowed := r.hostCommands[name] + // A host: entry is honoured for dispatch only when the name does not + // resolve to a builtin. Otherwise an entry like + // "host:cat=/bin/false" would silently authorize the cat builtin + // (because isAllowed would flip true) and the isKnown branch below + // would run cat with stdin/AllowedPaths access — never executing the + // host binary the entry actually pointed at. Builtins still take + // precedence when the name is also explicitly allowlisted via + // rshell:; this gate just prevents a host: entry from + // laundering builtin access. + hostDispatch := isHostAllowed && !isKnown + isAllowed := r.allowAllCommands || r.allowedCommands[name] || hostDispatch span, ctx := telemetry.StartSpanFromContext(ctx, "command") span.SetResourceName(name) @@ -512,10 +522,13 @@ func (r *Runner) call(ctx context.Context, pos syntax.Pos, args []string) { span.SetTag("rshell.command.argc", len(args)-1) span.SetTag("rshell.command.is_allowed", isAllowed) span.SetTag("rshell.command.is_known", isKnown) - if isHostAllowed { + if hostDispatch { // DEMO ONLY: tag invocations that reach for a host binary so the // demo narrative ("look, we can see exactly when the agent - // reached for the host") is visible in trace data. + // reached for the host") is visible in trace data. Tagged on + // hostDispatch (not isHostAllowed) so the tag reflects whether + // the host path was actually exec'd, not just whether an entry + // was configured. span.SetTag("rshell.command.host_exec", true) } // has_stdin_pipe / has_output_redirect reflect whether the command's @@ -795,9 +808,8 @@ func (r *Runner) call(ctx context.Context, pos syntax.Pos, args []string) { // DEMO ONLY: a host-binary entry takes precedence over the default // execHandler so that an allowlisted host:= entry actually // runs the binary instead of being rejected as "command not allowed". - // host: entries are intentionally only consulted when the name is not - // also a known builtin, so builtins always win. - if isHostAllowed { + // hostDispatch is already gated on !isKnown above, so builtins always win. + if hostDispatch { r.dispatchedCount++ r.exit.code = r.runHostCommand(ctx, hostBinaryPath, args) return From 8aedb5c9cbde9247732e3ed68a2e73bdfab0a951 Mon Sep 17 00:00:00 2001 From: Jules Macret Date: Wed, 6 May 2026 14:23:42 +0200 Subject: [PATCH 3/6] fix(interp): make host-exec tests actually exercise host path; use absolute paths on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Codex review (PR #228, two comments). P1 — host_exec_test.go: the tests bound host:echo, host:cat, host:false to /bin/echo, /bin/cat, /usr/bin/false respectively, but echo/cat/false are all rshell builtins. After the !isKnown gate fix in c4205dd7, those entries no longer authorize the name (builtins win), so the tests rejected with exit 127 / "command not allowed" before ever invoking the host binary. Switched to non-builtin command names (hostecho, hostcat, hostfalse) bound to the same binaries, so dispatch actually reaches runHostCommand. Verified by running the test binary on debian:bookworm-slim — all nine TestHostExec* now pass. Also rewrote TestHostExecBuiltinTakesPrecedence to point host:echo at /usr/bin/false: if the host path were erroneously preferred, the script would exit 1; because the builtin wins it exits 0. The previous form pointed both at /bin/echo, which produced identical output and could not distinguish the two paths. P2 — allowed_commands_test.go: the cross-platform host-entry tests used /bin/false and /usr/bin/true as host paths. AllowedCommands validates with filepath.IsAbs, which is false on Windows for paths without a drive letter or UNC prefix, so those tests fail at New() during interp.New on Windows. Switched to filepath.Join(t.TempDir(), "fake-binary"), which is absolute on every OS and is never actually exec'd (the tests assert the dispatch gate, not the exec). Co-Authored-By: Claude Opus 4.7 (1M context) --- interp/allowed_commands_test.go | 20 ++++++++++++----- interp/host_exec_test.go | 39 +++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 23 deletions(-) diff --git a/interp/allowed_commands_test.go b/interp/allowed_commands_test.go index 9b91af3f7..798f7d2fe 100644 --- a/interp/allowed_commands_test.go +++ b/interp/allowed_commands_test.go @@ -9,6 +9,7 @@ import ( "bytes" "context" "errors" + "path/filepath" "strings" "testing" @@ -71,15 +72,19 @@ func TestAllowedCommandsEmpty(t *testing.T) { // TestHostEntryDoesNotAuthorizeBuiltin is a regression test: a host: entry // whose name collides with a builtin must NOT silently authorize the // builtin. Without the !isKnown gate in call(), an entry like -// "host:cat=/bin/false" would flip isAllowed=true and the builtin cat -// would run with stdin/AllowedPaths access, never executing /bin/false. +// "host:cat=" would flip isAllowed=true and the builtin cat would +// run with stdin/AllowedPaths access, never executing the host path. // Cross-platform: this exercises the dispatch gate, not the actual host -// exec, so it runs on darwin/windows too. +// exec, so it runs on darwin/windows too. The host path uses +// t.TempDir() rather than a hardcoded /bin/... so AllowedCommands' +// filepath.IsAbs check passes on every OS (Windows requires drive +// letter or UNC). func TestHostEntryDoesNotAuthorizeBuiltin(t *testing.T) { + hostPath := filepath.Join(t.TempDir(), "fake-binary") stdout := &bytes.Buffer{} stderr := &bytes.Buffer{} r, err := interp.New( - interp.AllowedCommands([]string{"host:cat=/bin/false"}), + interp.AllowedCommands([]string{"host:cat=" + hostPath}), interp.StdIO(strings.NewReader(""), stdout, stderr), ) require.NoError(t, err) @@ -102,11 +107,14 @@ func TestHostEntryDoesNotAuthorizeBuiltin(t *testing.T) { // "command not allowed" if the gate were too strict). The actual exec // path is platform-specific and tested in host_exec_test.go (linux); // here we only assert that dispatch reaches it (on darwin/windows the -// host-exec stub returns 127 with a different message). +// host-exec stub returns 127 with a different message). t.TempDir() +// produces an absolute path on every OS so AllowedCommands accepts it +// on Windows too. func TestHostEntryAuthorizesNonBuiltin(t *testing.T) { + hostPath := filepath.Join(t.TempDir(), "fake-binary") stderr := &bytes.Buffer{} r, err := interp.New( - interp.AllowedCommands([]string{"host:somenonsensename=/usr/bin/true"}), + interp.AllowedCommands([]string{"host:somenonsensename=" + hostPath}), interp.StdIO(strings.NewReader(""), &bytes.Buffer{}, stderr), ) require.NoError(t, err) diff --git a/interp/host_exec_test.go b/interp/host_exec_test.go index 23debf1ce..1203e9ff4 100644 --- a/interp/host_exec_test.go +++ b/interp/host_exec_test.go @@ -73,29 +73,36 @@ func newHostExecRunner(t *testing.T, hostEntries []string, stdin io.Reader) (*in return r, stdout, stderr } +// Tests below intentionally use command names that do NOT collide with +// rshell builtins (echo/cat/false/true are all registered builtins). The +// !isKnown gate in call() means a host: entry is honored only when the +// name does not resolve to a builtin, so binding host:echo=/bin/echo and +// then running `echo` would dispatch the rshell echo builtin and never +// touch the host path under test. + func TestHostExecStdoutPlumbed(t *testing.T) { - r, stdout, _ := newHostExecRunner(t, []string{"host:echo=/bin/echo"}, nil) - err := r.Run(context.Background(), parseHostExecScript(t, `echo hello world`)) + r, stdout, _ := newHostExecRunner(t, []string{"host:hostecho=/bin/echo"}, nil) + err := r.Run(context.Background(), parseHostExecScript(t, `hostecho hello world`)) require.NoError(t, err) assert.Equal(t, "hello world\n", stdout.String()) } func TestHostExecExitCodePropagated(t *testing.T) { - r, _, _ := newHostExecRunner(t, []string{"host:false=/usr/bin/false"}, nil) - code := runExitCode(t, r, parseHostExecScript(t, `false`)) + r, _, _ := newHostExecRunner(t, []string{"host:hostfalse=/usr/bin/false"}, nil) + code := runExitCode(t, r, parseHostExecScript(t, `hostfalse`)) assert.Equal(t, uint8(1), code) } func TestHostExecStdinPlumbed(t *testing.T) { stdin := strings.NewReader("piped-input\n") - r, stdout, _ := newHostExecRunner(t, []string{"host:cat=/bin/cat"}, stdin) - err := r.Run(context.Background(), parseHostExecScript(t, `cat`)) + r, stdout, _ := newHostExecRunner(t, []string{"host:hostcat=/bin/cat"}, stdin) + err := r.Run(context.Background(), parseHostExecScript(t, `hostcat`)) require.NoError(t, err) assert.Equal(t, "piped-input\n", stdout.String()) } func TestHostExecRejectsNonAllowlistedName(t *testing.T) { - r, _, stderr := newHostExecRunner(t, []string{"host:echo=/bin/echo"}, nil) + r, _, stderr := newHostExecRunner(t, []string{"host:hostecho=/bin/echo"}, nil) code := runExitCode(t, r, parseHostExecScript(t, `notallowlisted`)) assert.Equal(t, uint8(127), code) assert.Contains(t, stderr.String(), "command not allowed") @@ -153,19 +160,17 @@ func TestHostExecForwardsHomeAndLang(t *testing.T) { assert.Contains(t, out, "LANG=C.UTF-8") } -// TestHostExecOnlyApprovedNamesDispatch ensures that an allowlisted *builtin* -// name still dispatches to the builtin and does not reach the host-exec path -// — the host: namespace is consulted only when the name is not a known -// builtin. +// TestHostExecBuiltinTakesPrecedence verifies that when a name is both an +// allowlisted builtin (rshell:echo) AND a configured host: entry, the +// builtin runs and the host binary is never exec'd. The host: entry +// points at /usr/bin/false on purpose: if the host path were +// erroneously preferred, the script would exit 1; because the builtin +// wins it exits 0 with the expected stdout. func TestHostExecBuiltinTakesPrecedence(t *testing.T) { - // "echo" is a known builtin in rshell. If the host:echo entry were - // erroneously preferred, /bin/echo would emit "FROM_HOST" via its - // argv, but because we pass the literal string and rely on builtin - // echo, the output ends with a single newline produced by the builtin. r, stdout, _ := newHostExecRunner(t, - []string{"rshell:echo", "host:echo=/bin/echo"}, nil) + []string{"rshell:echo", "host:echo=/usr/bin/false"}, nil) err := r.Run(context.Background(), parseHostExecScript(t, `echo from-builtin`)) - require.NoError(t, err) + require.NoError(t, err, "builtin echo must win — non-zero exit means /usr/bin/false ran") assert.Equal(t, "from-builtin\n", stdout.String()) } From f28fe13c4ba412a3d69dcd624228a1927281b014 Mon Sep 17 00:00:00 2001 From: Jules Macret Date: Wed, 6 May 2026 15:04:00 +0200 Subject: [PATCH 4/6] fix(interp): surface ctx.Err() from host-exec so timeout/cancel propagate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Codex review (PR #228, host_exec_linux.go:64). When MaxExecutionTime or the parent context expired while a host binary was running, exec.CommandContext killed the child and cmd.Run returned *exec.ExitError with ExitCode() == -1. The previous code mapped that to exit 130 and returned, leaving r.exit.err nil — so Run() returned ExitStatus(130) instead of context.DeadlineExceeded / context.Canceled. Net effect: cmd/rshell's timeout path never printed "execution timed out" or returned 124, and run-span telemetry classed the run as success. Now check ctx.Err() before mapping the signal to a numeric code: when the context expired, propagate the context error via r.exit.fatal so Run() returns it directly. The numeric return becomes symbolic (only observed when r.exit.err is nil). Test: new TestHostExecDeadlineSurfacesAsDeadlineExceeded asserts that a 100ms ctx.WithTimeout against `sleep 30` returns context.DeadlineExceeded within 2s; updated TestHostExecContextCancellationKillsProcess to assert context.Canceled is the propagated error. Both pass on debian:bookworm-slim. Co-Authored-By: Claude Opus 4.7 (1M context) --- interp/host_exec_linux.go | 17 ++++++++++++++++- interp/host_exec_test.go | 30 ++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/interp/host_exec_linux.go b/interp/host_exec_linux.go index 04df4502f..24dc69579 100644 --- a/interp/host_exec_linux.go +++ b/interp/host_exec_linux.go @@ -48,13 +48,28 @@ func (r *Runner) runHostCommand(ctx context.Context, path string, args []string) cmd.Stderr = r.stderr err := cmd.Run() + // If the runner's context expired (MaxExecutionTime, parent cancel, + // or builtin-style cooperative cancel), exec.CommandContext kills + // the child with SIGKILL and cmd.Run returns *exec.ExitError with + // ExitCode() == -1. We must surface ctx.Err() back through Run() + // rather than mapping the signal to a numeric exit code, otherwise + // Run() returns ExitStatus(130) and the CLI's timeout path + // (context.DeadlineExceeded → "execution timed out", exit 124) + // never fires and run-span telemetry is misclassified as success. + // Use exit.fatal so the err is recorded; the returned uint8 is only + // observed when r.exit.err is nil, so the value is symbolic here. + if ctxErr := ctx.Err(); ctxErr != nil { + r.exit.fatal(ctxErr) + return 130 + } if err == nil { return 0 } var exitErr *exec.ExitError if errors.As(err, &exitErr) { // ExitCode returns -1 if the process was terminated by a signal - // (e.g. SIGKILL on context cancellation). Map that to 130 — the + // without a context cancel having fired (e.g. an external + // SIGKILL from outside this process). Map that to 130 — the // shell-conventional "terminated" code — so the caller can still // observe a non-zero exit. code := exitErr.ExitCode() diff --git a/interp/host_exec_test.go b/interp/host_exec_test.go index 1203e9ff4..facdb5033 100644 --- a/interp/host_exec_test.go +++ b/interp/host_exec_test.go @@ -126,13 +126,39 @@ func TestHostExecContextCancellationKillsProcess(t *testing.T) { cancel() select { - case <-done: - // success: the run returned within the deadline below. + case err := <-done: + // The cancellation must propagate as context.Canceled, not as a + // numeric exit status. Otherwise the CLI's timeout/cancel path + // would be skipped and run telemetry would be misclassified. + require.ErrorIs(t, err, context.Canceled) case <-time.After(2 * time.Second): t.Fatal("host process was not killed within 2s of context cancel") } } +// TestHostExecDeadlineSurfacesAsDeadlineExceeded verifies that a parent +// context deadline kills the host binary AND surfaces as +// context.DeadlineExceeded back through Run(), so that the CLI's +// --timeout path ("execution timed out after Xms" / exit 124) and +// run-span outcome classification both work for host-binary invocations +// the same way they do for builtins. +func TestHostExecDeadlineSurfacesAsDeadlineExceeded(t *testing.T) { + r, _, _ := newHostExecRunner(t, []string{"host:sleep=/bin/sleep"}, nil) + + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + start := time.Now() + err := r.Run(ctx, parseHostExecScript(t, `sleep 30`)) + elapsed := time.Since(start) + + require.ErrorIs(t, err, context.DeadlineExceeded, + "host-exec context deadline must surface as context.DeadlineExceeded, "+ + "not ExitStatus(130). Got %v", err) + assert.Less(t, elapsed, 2*time.Second, + "deadline should fire within ~100ms, but Run() took %s", elapsed) +} + func TestHostExecEnvFilteredToAllowlist(t *testing.T) { // Set a sentinel env var that should NOT propagate, and one that // should. /usr/bin/env prints the resulting environment; we assert From fd93d4be97f13c6a1a0267abef72d66516a52a61 Mon Sep 17 00:00:00 2001 From: Jules Macret Date: Wed, 6 May 2026 16:01:10 +0200 Subject: [PATCH 5/6] fix(interp): read host-exec env from runner overlay, not ambient process env MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Codex review (PR #228, host_exec_linux.go:95). Previously filterHostEnv() called os.LookupEnv(name) for each allowlisted name — the ambient Go process env. That broke three guarantees: 1. The runner's documented "no host environment inherited by default" property: a runner constructed with the empty default Env still leaked PATH/HOME/LANG to host binaries from the surrounding Go process. 2. interp.Env(...) caller-provided values: the runner env was ignored entirely, so the host binary saw the wrong PATH/HOME/LANG. 3. Inline command assignments (PATH=/safe hostcmd): again ignored. filterHostEnv is now a method on Runner that reads from r.writeEnv — the same overlay that builtins read from. call() applies inline assigns to r.writeEnv before dispatching, so the host binary sees the post-assignment view. New tests on debian:bookworm-slim: - TestHostExecDefaultEnvIsEmpty: with no interp.Env set, ambient PATH/HOME/LANG must NOT appear in the host binary's env. - TestHostExecInlineAssignmentTakesEffect: PATH=/inline-override env must show PATH=/inline-override and not the runner's initial PATH. Existing TestHostExecEnvFilteredToAllowlist / TestHostExecForwardsHomeAndLang updated to plant env via interp.Env rather than t.Setenv. Co-Authored-By: Claude Opus 4.7 (1M context) --- interp/host_exec_linux.go | 30 +++++++++----- interp/host_exec_test.go | 86 ++++++++++++++++++++++++++++++++------- 2 files changed, 90 insertions(+), 26 deletions(-) diff --git a/interp/host_exec_linux.go b/interp/host_exec_linux.go index 24dc69579..ff7dd61b5 100644 --- a/interp/host_exec_linux.go +++ b/interp/host_exec_linux.go @@ -17,14 +17,13 @@ package interp import ( "context" "errors" - "os" "os/exec" ) -// hostEnvAllowlist is the set of environment variables forwarded to host -// binaries. Anything else from the parent process env is stripped so that -// host invocations do not leak ambient configuration that the rest of the -// shell deliberately keeps out. +// hostEnvAllowlist is the set of environment variable names forwarded to +// host binaries. Anything else in the runner environment is stripped so +// that host invocations do not leak ambient configuration that the rest +// of the shell deliberately keeps out. var hostEnvAllowlist = []string{"PATH", "HOME", "LANG"} // runHostCommand executes the host binary at path with args[1:] as its argv, @@ -40,7 +39,7 @@ var hostEnvAllowlist = []string{"PATH", "HOME", "LANG"} func (r *Runner) runHostCommand(ctx context.Context, path string, args []string) uint8 { cmd := exec.CommandContext(ctx, path, args[1:]...) cmd.Dir = r.Dir - cmd.Env = filterHostEnv() + cmd.Env = r.filterHostEnv() if r.stdin != nil { cmd.Stdin = r.stdin } @@ -86,14 +85,23 @@ func (r *Runner) runHostCommand(ctx context.Context, path string, args []string) } // filterHostEnv builds a minimal env slice for host binaries from the -// process env, forwarding only the names in hostEnvAllowlist. Variables -// that are unset are simply omitted from the result. -func filterHostEnv() []string { +// runner's environment overlay (r.writeEnv) — NOT the ambient Go +// process env — forwarding only the names in hostEnvAllowlist. +// Reading from r.writeEnv is what makes the runner's documented +// "empty by default, no host env inherited" guarantee hold for host +// binaries: an unset PATH/HOME/LANG in the runner is simply omitted, +// regardless of what the surrounding Go process exports. It also lets +// caller-provided values (interp.Env) and inline assignments +// (PATH=/safe hostcmd) take effect — those flow through r.writeEnv, +// where call() applies inline assigns before dispatch. +func (r *Runner) filterHostEnv() []string { out := make([]string, 0, len(hostEnvAllowlist)) for _, name := range hostEnvAllowlist { - if v, ok := os.LookupEnv(name); ok { - out = append(out, name+"="+v) + vr := r.writeEnv.Get(name) + if !vr.Declared() { + continue } + out = append(out, name+"="+vr.String()) } return out } diff --git a/interp/host_exec_test.go b/interp/host_exec_test.go index facdb5033..50661df0b 100644 --- a/interp/host_exec_test.go +++ b/interp/host_exec_test.go @@ -58,16 +58,29 @@ func parseHostExecScript(t *testing.T, src string) *syntax.File { // converts it into a pipe internally — providing a real file when we need // real stdin keeps the host-exec path identical to production. func newHostExecRunner(t *testing.T, hostEntries []string, stdin io.Reader) (*interp.Runner, *bytes.Buffer, *bytes.Buffer) { + t.Helper() + return newHostExecRunnerWithEnv(t, hostEntries, stdin, nil) +} + +// newHostExecRunnerWithEnv is like newHostExecRunner but also seeds the +// runner environment via interp.Env. The host-exec env-filtering tests +// must provide env this way (NOT via t.Setenv) because the +// implementation reads from r.writeEnv, not the ambient Go process env. +func newHostExecRunnerWithEnv(t *testing.T, hostEntries []string, stdin io.Reader, envPairs []string) (*interp.Runner, *bytes.Buffer, *bytes.Buffer) { t.Helper() stdout := &bytes.Buffer{} stderr := &bytes.Buffer{} if stdin == nil { stdin = strings.NewReader("") } - r, err := interp.New( + opts := []interp.RunnerOption{ interp.AllowedCommands(hostEntries), interp.StdIO(stdin, stdout, stderr), - ) + } + if envPairs != nil { + opts = append(opts, interp.Env(envPairs...)) + } + r, err := interp.New(opts...) require.NoError(t, err) t.Cleanup(func() { _ = r.Close() }) return r, stdout, stderr @@ -160,25 +173,29 @@ func TestHostExecDeadlineSurfacesAsDeadlineExceeded(t *testing.T) { } func TestHostExecEnvFilteredToAllowlist(t *testing.T) { - // Set a sentinel env var that should NOT propagate, and one that - // should. /usr/bin/env prints the resulting environment; we assert - // that PATH (allowlisted) shows up and the sentinel does not. - t.Setenv("PATH", "/usr/bin:/bin") + // Env values come from the runner's environment overlay (interp.Env), + // not the ambient Go process env. Use t.Setenv to plant a sentinel in + // the process env that must NOT leak through, and provide a + // runner-scoped PATH that the host binary should see. t.Setenv("RSHELL_HOST_EXEC_LEAK_CHECK", "should-not-appear") - r, stdout, _ := newHostExecRunner(t, []string{"host:env=/usr/bin/env"}, nil) + r, stdout, _ := newHostExecRunnerWithEnv(t, + []string{"host:env=/usr/bin/env"}, nil, + []string{"PATH=/usr/bin:/bin"}, + ) err := r.Run(context.Background(), parseHostExecScript(t, `env`)) require.NoError(t, err) out := stdout.String() - assert.Contains(t, out, "PATH=/usr/bin:/bin", "PATH must be forwarded") - assert.NotContains(t, out, "RSHELL_HOST_EXEC_LEAK_CHECK", "non-allowlisted env vars must be stripped") + assert.Contains(t, out, "PATH=/usr/bin:/bin", "PATH must be forwarded from runner env") + assert.NotContains(t, out, "RSHELL_HOST_EXEC_LEAK_CHECK", + "ambient Go process env must NOT leak — host binaries see only the runner env") } func TestHostExecForwardsHomeAndLang(t *testing.T) { - t.Setenv("HOME", "/tmp/host-exec-home") - t.Setenv("LANG", "C.UTF-8") - - r, stdout, _ := newHostExecRunner(t, []string{"host:env=/usr/bin/env"}, nil) + r, stdout, _ := newHostExecRunnerWithEnv(t, + []string{"host:env=/usr/bin/env"}, nil, + []string{"HOME=/tmp/host-exec-home", "LANG=C.UTF-8"}, + ) err := r.Run(context.Background(), parseHostExecScript(t, `env`)) require.NoError(t, err) out := stdout.String() @@ -186,6 +203,43 @@ func TestHostExecForwardsHomeAndLang(t *testing.T) { assert.Contains(t, out, "LANG=C.UTF-8") } +// TestHostExecDefaultEnvIsEmpty verifies the runner's "no host env +// inherited by default" guarantee: with no interp.Env option, even +// allowlisted names (PATH/HOME/LANG) must not be forwarded to the host +// binary, regardless of the ambient Go process env. Regression for the +// previous os.LookupEnv-based filter that ignored runner env. +func TestHostExecDefaultEnvIsEmpty(t *testing.T) { + // Plant ambient env that must be ignored. + t.Setenv("PATH", "/should/not/leak") + t.Setenv("HOME", "/should/not/leak/home") + t.Setenv("LANG", "should-not-leak") + + r, stdout, _ := newHostExecRunner(t, []string{"host:env=/usr/bin/env"}, nil) + err := r.Run(context.Background(), parseHostExecScript(t, `env`)) + require.NoError(t, err) + out := stdout.String() + assert.NotContains(t, out, "/should/not/leak", + "runner default env is empty; host binary must not see ambient PATH/HOME/LANG") +} + +// TestHostExecInlineAssignmentTakesEffect verifies that inline command +// assignments (PATH=/safe hostcmd) are visible to host binaries. They +// flow through r.writeEnv before call() dispatches, which is exactly +// where filterHostEnv now reads from. +func TestHostExecInlineAssignmentTakesEffect(t *testing.T) { + r, stdout, _ := newHostExecRunnerWithEnv(t, + []string{"host:env=/usr/bin/env"}, nil, + []string{"PATH=/initial"}, + ) + err := r.Run(context.Background(), parseHostExecScript(t, `PATH=/inline-override env`)) + require.NoError(t, err) + out := stdout.String() + assert.Contains(t, out, "PATH=/inline-override", + "inline assignment must override the runner env for that command") + assert.NotContains(t, out, "PATH=/initial", + "the original PATH must not also appear") +} + // TestHostExecBuiltinTakesPrecedence verifies that when a name is both an // allowlisted builtin (rshell:echo) AND a configured host: entry, the // builtin runs and the host binary is never exec'd. The host: entry @@ -200,8 +254,10 @@ func TestHostExecBuiltinTakesPrecedence(t *testing.T) { assert.Equal(t, "from-builtin\n", stdout.String()) } -// Sanity: make sure os.Environ has an unrelated value we can rely on for the -// negative side of TestHostExecEnvFilteredToAllowlist. +// Sanity: make sure t.Setenv actually plants the var in the ambient Go +// process env. Used as a baseline for tests that assert host binaries +// do NOT see ambient process env (so we can be sure the negative +// assertion isn't trivially satisfied by t.Setenv being a no-op). func TestHostExecOSEnvironHasUnrelatedVar(t *testing.T) { t.Setenv("RSHELL_HOST_EXEC_LEAK_CHECK", "yes") found := false From d4bd28b3f052a60baef8120495921bb862f9a60e Mon Sep 17 00:00:00 2001 From: Jules Macret Date: Wed, 6 May 2026 17:03:12 +0200 Subject: [PATCH 6/6] fix(interp): require vr.Exported in host-exec env filter to match bash Address Codex review (PR #228, host_exec_linux.go:101). Previously filterHostEnv accepted any Declared() variable in hostEnvAllowlist. That diverged from bash: a script-level assignment without a prior export, e.g. LANG=fresh hostcmd is shell-local in bash and is NOT propagated to child processes. Verified with `bash -c 'CUSTOM_VAR=/tmp; env | grep CUSTOM_VAR'` which prints nothing. filterHostEnv now requires both vr.Declared() and vr.Exported. The two valid sources of host-forwarded env are unaffected: - interp.Env(...) values are stored as exported (that is their purpose as the initial environment). - Inline command assignments (`PATH=/safe hostcmd`) propagate because call() at runner_exec.go:110 forces vr.Exported = true before dispatch. New test TestHostExecUnexportedAssignmentNotForwarded verifies the divergent case ('LANG=should-not-leak; env' must not show LANG in the host binary's environment). Passes on debian:bookworm-slim. Co-Authored-By: Claude Opus 4.7 (1M context) --- interp/host_exec_linux.go | 16 ++++++++++------ interp/host_exec_test.go | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/interp/host_exec_linux.go b/interp/host_exec_linux.go index ff7dd61b5..ed2864655 100644 --- a/interp/host_exec_linux.go +++ b/interp/host_exec_linux.go @@ -86,19 +86,23 @@ func (r *Runner) runHostCommand(ctx context.Context, path string, args []string) // filterHostEnv builds a minimal env slice for host binaries from the // runner's environment overlay (r.writeEnv) — NOT the ambient Go -// process env — forwarding only the names in hostEnvAllowlist. +// process env — forwarding only the names in hostEnvAllowlist that +// are also marked Exported. Matches bash semantics: a script-level +// assignment like `PATH=/tmp; hostcmd` does not propagate to the +// child unless PATH was previously exported (via interp.Env or an +// `export` statement). Inline command assignments +// (`PATH=/safe hostcmd`) propagate because call() forces +// vr.Exported = true before dispatch. +// // Reading from r.writeEnv is what makes the runner's documented // "empty by default, no host env inherited" guarantee hold for host // binaries: an unset PATH/HOME/LANG in the runner is simply omitted, -// regardless of what the surrounding Go process exports. It also lets -// caller-provided values (interp.Env) and inline assignments -// (PATH=/safe hostcmd) take effect — those flow through r.writeEnv, -// where call() applies inline assigns before dispatch. +// regardless of what the surrounding Go process exports. func (r *Runner) filterHostEnv() []string { out := make([]string, 0, len(hostEnvAllowlist)) for _, name := range hostEnvAllowlist { vr := r.writeEnv.Get(name) - if !vr.Declared() { + if !vr.Declared() || !vr.Exported { continue } out = append(out, name+"="+vr.String()) diff --git a/interp/host_exec_test.go b/interp/host_exec_test.go index 50661df0b..b473ca5a7 100644 --- a/interp/host_exec_test.go +++ b/interp/host_exec_test.go @@ -240,6 +240,26 @@ func TestHostExecInlineAssignmentTakesEffect(t *testing.T) { "the original PATH must not also appear") } +// TestHostExecUnexportedAssignmentNotForwarded mirrors bash semantics: +// a standalone assignment without a prior export (e.g. +// `LANG=fresh; hostcmd`) is shell-local and must NOT propagate to +// child processes. Verified against +// `bash -c 'CUSTOM_VAR=/tmp; env | grep CUSTOM_VAR'` which prints +// nothing. filterHostEnv now requires vr.Exported in addition to +// vr.Declared(). +func TestHostExecUnexportedAssignmentNotForwarded(t *testing.T) { + // LANG is not in the runner's initial env, so a script-level + // assignment to it creates a fresh, unexported variable. + r, stdout, _ := newHostExecRunner(t, []string{"host:env=/usr/bin/env"}, nil) + err := r.Run(context.Background(), + parseHostExecScript(t, `LANG=should-not-leak +env`)) + require.NoError(t, err) + out := stdout.String() + assert.NotContains(t, out, "should-not-leak", + "unexported script-level assignment must not propagate to host binaries (matches bash)") +} + // TestHostExecBuiltinTakesPrecedence verifies that when a name is both an // allowlisted builtin (rshell:echo) AND a configured host: entry, the // builtin runs and the host binary is never exec'd. The host: entry