databrickslabs · dgokeeffe · May 13, 2026 · May 13, 2026 · May 13, 2026 · May 13, 2026
diff --git a/.gitignore b/.gitignore
@@ -42,3 +42,11 @@ uv.lock
 
 # Agent-plane reference clone
 agent-plane-ref/
+
+# Playwright e2e SSO session state (workspace cookies — DO NOT commit)
+tests/e2e/auth.json
+tests/e2e/.cache/
+
+# Playwright browser snapshots / traces from failed runs
+tests/e2e/test-results/
+tests/e2e/playwright-report/
diff --git a/Makefile b/Makefile
@@ -22,12 +22,29 @@ APP_NAME      ?= coding-agents
 USER_EMAIL    = $(shell databricks current-user me --profile $(PROFILE) --output json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('userName',''))")
 WORKSPACE_PATH = /Workspace/Users/$(USER_EMAIL)/apps/$(APP_NAME)
 
-.PHONY: help test deploy redeploy create-app create-pat sync deploy-app status open clean
+.PHONY: help test integration-test e2e-test e2e-auth deploy redeploy create-app create-pat sync deploy-app status open clean enterprise-doctor
 
 # ── Help ─────────────────────────────────────────────
 
-test: ## Run unit tests
-	uv run pytest tests/ -v
+test: ## Run unit tests (fast — excludes Docker integration + Playwright e2e)
+	uv run pytest tests/ -v --ignore=tests/integration --ignore=tests/e2e
+
+integration-test: ## Run Docker-based pipeline integration test (~3-5 min wall time)
+	uv run pytest tests/integration/ -v -s -rs
+
+e2e-test: ## Run Playwright e2e against live deployed app (needs `make e2e-auth` first)
+	uv run pytest tests/e2e/ -v -s
+
+e2e-auth: ## Record SSO session for e2e tests (one-time per cookie expiry)
+	@# Resolve the app URL via the configured profile, then launch a headed
+	@# Chromium that saves storage state to tests/e2e/auth.json.
+	@url=$$(databricks apps get coding-agents --profile $(PROFILE) --output json 2>/dev/null \
+		| python3 -c "import sys,json; print(json.load(sys.stdin)['url'])") && \
+	echo "Recording SSO session against $$url ..." && \
+	uv run playwright codegen --save-storage tests/e2e/auth.json "$$url"
+	@echo ""
+	@echo "Auth state saved to tests/e2e/auth.json (gitignored)."
+	@echo "Run `make e2e-test PROFILE=$(PROFILE)` to execute the suite."
 
 help: ## Show this help
 	@grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-18s\033[0m %s\n", $$1, $$2}'
@@ -92,6 +109,17 @@ open: ## Open the app in browser
 		| python3 -c "import sys,json; print(json.load(sys.stdin).get('url',''))" \
 		| xargs open
 
+# ── Enterprise mode ─────────────────────────────────
+
+enterprise-doctor: ## Probe configured enterprise mirrors (PyPI, npm, GitHub) for reachability
+	@# Use the existing venv directly so the doctor doesn't itself trigger a uv resolve
+	@# (which would fail if PyPI is firewalled — the exact scenario this target diagnoses).
+	@if [ -x .venv/bin/python ]; then \
+		.venv/bin/python scripts/enterprise_doctor.py; \
+	else \
+		uv run python scripts/enterprise_doctor.py; \
+	fi
+
 # ── Cleanup (destructive) ───────────────────────────
 
 clean: ## Remove the app (destructive)

diff --git a/app.py b/app.py
@@ -21,6 +21,7 @@
 import requests
 
 import app_state
+import enterprise_config
 from utils import ensure_https, get_gateway_host
 from pat_rotator import PATRotator
 from telemetry import log_telemetry, set_product_info
@@ -161,6 +162,52 @@ def _run_step(step_id, command):
         _update_step(step_id, status="error", completed_at=time.time(), error=str(e))
 
 
+def _build_terminal_shell_env(base_env: dict) -> dict:
+    """Build the env dict for a user terminal PTY.
+
+    Starts from ``base_env`` (typically ``os.environ``) and strips the
+    credentials and CLI-state vars that should never reach a user shell:
+
+    - ``CLAUDECODE`` / ``CLAUDE_CODE_SESSION`` — would mark the terminal as
+      a nested-Claude session.
+    - ``DATABRICKS_TOKEN`` / ``DATABRICKS_HOST`` — forces CLIs to read
+      ``~/.databrickscfg`` per-request so they pick up rotated PATs without
+      an env-snapshot rewrite.
+    - ``GEMINI_API_KEY`` — same pattern, read from config file instead.
+    - ``NPM_TOKEN`` / ``UV_DEFAULT_INDEX`` / ``UV_INDEX_*_PASSWORD`` /
+      ``UV_INDEX_*_USERNAME`` / ``npm_config_//host/:_authToken`` —
+      deployer-level credentials from app.yaml that must not be readable
+      via ``env`` inside the user terminal. The user's npm/uv operations
+      still work because ``~/.npmrc`` (written by
+      ``enterprise_config.bootstrap``) holds the registry config — they
+      just can't see the bearer token in plaintext. (F-01)
+    """
+    shell_env = base_env.copy()
+    shell_env["TERM"] = "xterm-256color"
+
+    # Always-strip fixed names
+    for key in (
+        "CLAUDECODE", "CLAUDE_CODE_SESSION",
+        "DATABRICKS_TOKEN", "DATABRICKS_HOST",
+        "GEMINI_API_KEY",
+        "NPM_TOKEN", "UV_DEFAULT_INDEX",
+    ):
+        shell_env.pop(key, None)
+
+    # Pattern-strip operator-named registry credentials
+    for key in list(shell_env.keys()):
+        if (
+            key.startswith("npm_config_//")  # derived registry-auth tokens
+            or (
+                key.startswith("UV_INDEX_")
+                and (key.endswith("_PASSWORD") or key.endswith("_USERNAME"))
+            )
+        ):
+            shell_env.pop(key, None)
+
+    return shell_env
+
+
 def _setup_git_config():
     """Configure git identity and hooks by writing files directly (no subprocess)."""
     home = os.environ.get("HOME", "/app/python/source_code")
@@ -343,6 +390,12 @@ def run_setup():
         setup_state["status"] = "running"
         setup_state["started_at"] = time.time()
 
+    # Apply enterprise (proxy/registry) config before any subprocess runs:
+    # writes ~/.npmrc, pushes derived env vars (npm_config_registry, CURL_CA_BUNDLE,
+    # etc.) into os.environ so every child process inherits them, and logs a
+    # banner of the effective config. No-op when no enterprise env vars are set.
+    enterprise_config.bootstrap()
+
     # Probe AI Gateway once; result is cached in _GATEWAY_RESOLVED for subprocesses
     from utils import resolve_and_cache_gateway
     resolve_and_cache_gateway()
@@ -1009,21 +1062,11 @@ def create_session():
     label = data.get("label", "")
     try:
         master_fd, slave_fd = pty.openpty()
-        # Set up environment for the shell
-        shell_env = os.environ.copy()
-        shell_env["TERM"] = "xterm-256color"
-        # Remove Claude Code env vars so the browser terminal isn't seen as nested
-        shell_env.pop("CLAUDECODE", None)
-        shell_env.pop("CLAUDE_CODE_SESSION", None)
-        # Remove DATABRICKS_TOKEN and DATABRICKS_HOST so CLI/SDK reads from
-        # ~/.databrickscfg (always current after rotation) instead of inheriting
-        # a stale env var snapshot. The SDK skips config file loading when
-        # DATABRICKS_HOST is set in env (even without credentials).
-        shell_env.pop("DATABRICKS_TOKEN", None)
-        shell_env.pop("DATABRICKS_HOST", None)
-        # Also strip CLI-specific API keys so they read from config files
-        # (always current after rotation) instead of stale env snapshots.
-        shell_env.pop("GEMINI_API_KEY", None)
+        # Set up environment for the shell — strips PAT, SP creds, registry
+        # tokens, and other secrets that must not be readable from the
+        # user's terminal. See _build_terminal_shell_env docstring for the
+        # full list.
+        shell_env = _build_terminal_shell_env(os.environ)
         # Ensure HOME is set correctly
         if not shell_env.get("HOME") or shell_env["HOME"] == "/":
             shell_env["HOME"] = "/app/python/source_code"

diff --git a/app.yaml b/app.yaml
@@ -21,3 +21,54 @@ env:
     value: 0
   - name: MAX_CONCURRENT_SESSIONS
     value: "5"
+
+  # ─── Enterprise mode (proxy / registry redirects) ────────────────────────
+  # Uncomment and set the env vars below to run CoDA in locked-down enterprise
+  # networks. All are optional — set only what your environment requires.
+  # See docs/enterprise.md for the full contract, JFrog mirror conventions,
+  # and troubleshooting.
+  #
+  # Master switch — when true, logs a startup banner and warns on missing
+  # recommended mirrors. Behavioural overrides are still driven by the
+  # individual vars below.
+  # - name: ENTERPRISE_MODE
+  #   value: "true"
+  #
+  # Corporate egress proxy + TLS root CA.
+  # - name: HTTPS_PROXY
+  #   value: http://proxy.corp.example.com:3128
+  # - name: NO_PROXY
+  #   value: localhost,127.0.0.1,.corp.example.com
+  # - name: REQUESTS_CA_BUNDLE
+  #   value: /etc/ssl/certs/corp-root.pem
+  # - name: NODE_EXTRA_CA_CERTS
+  #   value: /etc/ssl/certs/corp-root.pem
+  #
+  # Internal PyPI proxy (e.g. JFrog pypi-virtual).
+  # - name: UV_DEFAULT_INDEX
+  #   value: https://jfrog.example.com/api/pypi/pypi-virtual/simple/
+  #
+  # Internal npm registry. NPM_TOKEN should be a Databricks secret reference.
+  # - name: NPM_REGISTRY
+  #   value: https://jfrog.example.com/api/npm/npm-virtual/
+  # - name: NPM_TOKEN
+  #   valueFrom: <secret>
+  #
+  # GitHub release mirror — must serve the same path tail as github.com.
+  # - name: GITHUB_RELEASE_MIRROR
+  #   value: https://jfrog.example.com/artifactory/github-mirror
+  # - name: GITHUB_API_BASE
+  #   value: https://ghe.example.com/api/v3
+  #
+  # Claude installer + Hermes package spec — override when the upstream URLs
+  # are firewalled.
+  # - name: CLAUDE_INSTALLER_URL
+  #   value: https://mirror.example.com/claude-install.sh
+  # - name: HERMES_PIP_URL
+  #   value: hermes-agent==1.2.3
+  #
+  # Drop public MCP servers (DeepWiki, Exa) entirely by setting these to "".
+  # - name: DEEPWIKI_MCP_URL
+  #   value: ""
+  # - name: EXA_MCP_URL
+  #   value: ""