Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,11 @@ uv.lock

# Agent-plane reference clone
agent-plane-ref/

# Playwright e2e SSO session state (workspace cookies — DO NOT commit)
tests/e2e/auth.json
tests/e2e/.cache/

# Playwright browser snapshots / traces from failed runs
tests/e2e/test-results/
tests/e2e/playwright-report/
34 changes: 31 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,29 @@ APP_NAME ?= coding-agents
USER_EMAIL = $(shell databricks current-user me --profile $(PROFILE) --output json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('userName',''))")
WORKSPACE_PATH = /Workspace/Users/$(USER_EMAIL)/apps/$(APP_NAME)

.PHONY: help test deploy redeploy create-app create-pat sync deploy-app status open clean
.PHONY: help test integration-test e2e-test e2e-auth deploy redeploy create-app create-pat sync deploy-app status open clean enterprise-doctor

# ── Help ─────────────────────────────────────────────

test: ## Run unit tests
uv run pytest tests/ -v
test: ## Run unit tests (fast — excludes Docker integration + Playwright e2e)
uv run pytest tests/ -v --ignore=tests/integration --ignore=tests/e2e

integration-test: ## Run Docker-based pipeline integration test (~3-5 min wall time)
uv run pytest tests/integration/ -v -s -rs

e2e-test: ## Run Playwright e2e against live deployed app (needs `make e2e-auth` first)
uv run pytest tests/e2e/ -v -s

e2e-auth: ## Record SSO session for e2e tests (one-time per cookie expiry)
@# Resolve the app URL via the configured profile, then launch a headed
@# Chromium that saves storage state to tests/e2e/auth.json.
@url=$$(databricks apps get coding-agents --profile $(PROFILE) --output json 2>/dev/null \
| python3 -c "import sys,json; print(json.load(sys.stdin)['url'])") && \
echo "Recording SSO session against $$url ..." && \
uv run playwright codegen --save-storage tests/e2e/auth.json "$$url"
@echo ""
@echo "Auth state saved to tests/e2e/auth.json (gitignored)."
@echo "Run `make e2e-test PROFILE=$(PROFILE)` to execute the suite."

help: ## Show this help
@grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-18s\033[0m %s\n", $$1, $$2}'
Expand Down Expand Up @@ -92,6 +109,17 @@ open: ## Open the app in browser
| python3 -c "import sys,json; print(json.load(sys.stdin).get('url',''))" \
| xargs open

# ── Enterprise mode ─────────────────────────────────

enterprise-doctor: ## Probe configured enterprise mirrors (PyPI, npm, GitHub) for reachability
@# Use the existing venv directly so the doctor doesn't itself trigger a uv resolve
@# (which would fail if PyPI is firewalled — the exact scenario this target diagnoses).
@if [ -x .venv/bin/python ]; then \
.venv/bin/python scripts/enterprise_doctor.py; \
else \
uv run python scripts/enterprise_doctor.py; \
fi

# ── Cleanup (destructive) ───────────────────────────

clean: ## Remove the app (destructive)
Expand Down
73 changes: 58 additions & 15 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import requests

import app_state
import enterprise_config
from utils import ensure_https, get_gateway_host
from pat_rotator import PATRotator
from telemetry import log_telemetry, set_product_info
Expand Down Expand Up @@ -161,6 +162,52 @@ def _run_step(step_id, command):
_update_step(step_id, status="error", completed_at=time.time(), error=str(e))


def _build_terminal_shell_env(base_env: dict) -> dict:
"""Build the env dict for a user terminal PTY.

Starts from ``base_env`` (typically ``os.environ``) and strips the
credentials and CLI-state vars that should never reach a user shell:

- ``CLAUDECODE`` / ``CLAUDE_CODE_SESSION`` — would mark the terminal as
a nested-Claude session.
- ``DATABRICKS_TOKEN`` / ``DATABRICKS_HOST`` — forces CLIs to read
``~/.databrickscfg`` per-request so they pick up rotated PATs without
an env-snapshot rewrite.
- ``GEMINI_API_KEY`` — same pattern, read from config file instead.
- ``NPM_TOKEN`` / ``UV_DEFAULT_INDEX`` / ``UV_INDEX_*_PASSWORD`` /
``UV_INDEX_*_USERNAME`` / ``npm_config_//host/:_authToken`` —
deployer-level credentials from app.yaml that must not be readable
via ``env`` inside the user terminal. The user's npm/uv operations
still work because ``~/.npmrc`` (written by
``enterprise_config.bootstrap``) holds the registry config — they
just can't see the bearer token in plaintext. (F-01)
"""
shell_env = base_env.copy()
shell_env["TERM"] = "xterm-256color"

# Always-strip fixed names
for key in (
"CLAUDECODE", "CLAUDE_CODE_SESSION",
"DATABRICKS_TOKEN", "DATABRICKS_HOST",
"GEMINI_API_KEY",
"NPM_TOKEN", "UV_DEFAULT_INDEX",
):
shell_env.pop(key, None)

# Pattern-strip operator-named registry credentials
for key in list(shell_env.keys()):
if (
key.startswith("npm_config_//") # derived registry-auth tokens
or (
key.startswith("UV_INDEX_")
and (key.endswith("_PASSWORD") or key.endswith("_USERNAME"))
)
):
shell_env.pop(key, None)

return shell_env


def _setup_git_config():
"""Configure git identity and hooks by writing files directly (no subprocess)."""
home = os.environ.get("HOME", "/app/python/source_code")
Expand Down Expand Up @@ -343,6 +390,12 @@ def run_setup():
setup_state["status"] = "running"
setup_state["started_at"] = time.time()

# Apply enterprise (proxy/registry) config before any subprocess runs:
# writes ~/.npmrc, pushes derived env vars (npm_config_registry, CURL_CA_BUNDLE,
# etc.) into os.environ so every child process inherits them, and logs a
# banner of the effective config. No-op when no enterprise env vars are set.
enterprise_config.bootstrap()

# Probe AI Gateway once; result is cached in _GATEWAY_RESOLVED for subprocesses
from utils import resolve_and_cache_gateway
resolve_and_cache_gateway()
Expand Down Expand Up @@ -1009,21 +1062,11 @@ def create_session():
label = data.get("label", "")
try:
master_fd, slave_fd = pty.openpty()
# Set up environment for the shell
shell_env = os.environ.copy()
shell_env["TERM"] = "xterm-256color"
# Remove Claude Code env vars so the browser terminal isn't seen as nested
shell_env.pop("CLAUDECODE", None)
shell_env.pop("CLAUDE_CODE_SESSION", None)
# Remove DATABRICKS_TOKEN and DATABRICKS_HOST so CLI/SDK reads from
# ~/.databrickscfg (always current after rotation) instead of inheriting
# a stale env var snapshot. The SDK skips config file loading when
# DATABRICKS_HOST is set in env (even without credentials).
shell_env.pop("DATABRICKS_TOKEN", None)
shell_env.pop("DATABRICKS_HOST", None)
# Also strip CLI-specific API keys so they read from config files
# (always current after rotation) instead of stale env snapshots.
shell_env.pop("GEMINI_API_KEY", None)
# Set up environment for the shell — strips PAT, SP creds, registry
# tokens, and other secrets that must not be readable from the
# user's terminal. See _build_terminal_shell_env docstring for the
# full list.
shell_env = _build_terminal_shell_env(os.environ)
# Ensure HOME is set correctly
if not shell_env.get("HOME") or shell_env["HOME"] == "/":
shell_env["HOME"] = "/app/python/source_code"
Expand Down
51 changes: 51 additions & 0 deletions app.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,54 @@ env:
value: 0
- name: MAX_CONCURRENT_SESSIONS
value: "5"

# ─── Enterprise mode (proxy / registry redirects) ────────────────────────
# Uncomment and set the env vars below to run CoDA in locked-down enterprise
# networks. All are optional — set only what your environment requires.
# See docs/enterprise.md for the full contract, JFrog mirror conventions,
# and troubleshooting.
#
# Master switch — when true, logs a startup banner and warns on missing
# recommended mirrors. Behavioural overrides are still driven by the
# individual vars below.
# - name: ENTERPRISE_MODE
# value: "true"
#
# Corporate egress proxy + TLS root CA.
# - name: HTTPS_PROXY
# value: http://proxy.corp.example.com:3128
# - name: NO_PROXY
# value: localhost,127.0.0.1,.corp.example.com
# - name: REQUESTS_CA_BUNDLE
# value: /etc/ssl/certs/corp-root.pem
# - name: NODE_EXTRA_CA_CERTS
# value: /etc/ssl/certs/corp-root.pem
#
# Internal PyPI proxy (e.g. JFrog pypi-virtual).
# - name: UV_DEFAULT_INDEX
# value: https://jfrog.example.com/api/pypi/pypi-virtual/simple/
#
# Internal npm registry. NPM_TOKEN should be a Databricks secret reference.
# - name: NPM_REGISTRY
# value: https://jfrog.example.com/api/npm/npm-virtual/
# - name: NPM_TOKEN
# valueFrom: <secret>
#
# GitHub release mirror — must serve the same path tail as github.com.
# - name: GITHUB_RELEASE_MIRROR
# value: https://jfrog.example.com/artifactory/github-mirror
# - name: GITHUB_API_BASE
# value: https://ghe.example.com/api/v3
#
# Claude installer + Hermes package spec — override when the upstream URLs
# are firewalled.
# - name: CLAUDE_INSTALLER_URL
# value: https://mirror.example.com/claude-install.sh
# - name: HERMES_PIP_URL
# value: hermes-agent==1.2.3
#
# Drop public MCP servers (DeepWiki, Exa) entirely by setting these to "".
# - name: DEEPWIKI_MCP_URL
# value: ""
# - name: EXA_MCP_URL
# value: ""
Loading