diff --git a/.env.example b/.env.example
index ef8f6bc6..b089ed59 100644
--- a/.env.example
+++ b/.env.example
@@ -11,70 +11,114 @@
 #   python scripts/run_text_only.py --record-id 1.1.2
 
 # ==============================================
-# Required: API Keys
+# API Configs
 # ==============================================
 
-# --- ElevenLabs (user simulator) ---
+# --- ElevenLabs ---
+#i ElevenLabs API key for the user simulator.
+#d secret
 ELEVENLABS_API_KEY=your_elevenlabs_api_key_here
 
-# ElevenLabs Conversational AI agent IDs for user simulation.
-# Create a Conversational AI agent at https://elevenlabs.io/conversational-ai and copy its agent ID.
-# You need two agents: one with a female voice (persona 1) and one with a male voice (persona 2).
-# These are used to simulate different caller personas during benchmark conversations.
+# --- LLM / Text Judge ---
+#i OpenAI key for assistant LLM and text judge metrics.
+#d secret
+OPENAI_API_KEY=your_openai_api_key_here
 
-EVA_DEFAULT_USER_F=your_elevenlabs_agent_id_for_default_user_f
-EVA_DEFAULT_USER_M=your_elevenlabs_agent_id_for_default_user_m
+# --- Audio Judge (Gemini via GCP) ---
+#i Path to GCP service-account JSON for Gemini audio judge metrics.
+#d path
+GOOGLE_APPLICATION_CREDENTIALS=path/to/your/service-account-credentials.json
 
-# --- LLM (assistant + text judge metrics) ---
-OPENAI_API_KEY=your_openai_api_key_here
+# --- Faithfulness Metric (Claude via Bedrock) ---
+#i AWS access key for Claude via Bedrock (faithfulness metric).
+#d secret
+AWS_ACCESS_KEY_ID=your_aws_access_key_id_here
+
+#i AWS secret access key.
+#d secret
+AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key_here
+
+# --- Alternative providers (optional) ---
+# If you only have an OpenAI key you can skip AWS and set JUDGE_MODEL=gpt-5.2
+# to override all text judges. Audio judge metrics still require Gemini.
+
+#i Azure OpenAI key (alternative to direct OpenAI).
+#d secret
+#v AZURE_OPENAI_API_KEY=your_azure_openai_api_key_here
+
+#i Azure OpenAI endpoint URL.
+#d string
+#v AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
+
+#i Google API key (alternative to service-account credentials for Gemini).
+#d secret
+#v GOOGLE_API_KEY=your_google_api_key_here
+
+# ==============================================
+# Voice Pipeline
+# ==============================================
 
-# --- STT/TTS (voice pipeline) ---
-# The API key and model for your chosen provider must be passed via the *_PARAMS JSON.
+# Pipeline mode is controlled by the UI radio (LLM / S2S / AudioLLM).
+# The #x conditions below ensure each variable is only active for the right mode.
 
-# STT provider: assemblyai | cartesia | deepgram | deepgram-flux | elevenlabs | nvidia | nvidia-baseten | openai
+# --- LLM mode: STT ---
+#i STT provider for the voice pipeline.
+#d enum
+#e assemblyai,cartesia,deepgram,deepgram-flux,elevenlabs,nvidia,nvidia-baseten,openai
+#x pipeline_mode=LLM
 EVA_MODEL__STT=cartesia
-# Must include "api_key" and "model" for your chosen provider:
+
+#i STT provider parameters. Must include "api_key" and "model".
+#d json_object
+#x pipeline_mode=LLM
 EVA_MODEL__STT_PARAMS='{"api_key": "your_cartesia_api_key", "model": "ink-whisper"}'
 
-# TTS provider: cartesia | chatterbox | elevenlabs | gemini | kokoro | nvidia-baseten | openai | xtts
+# --- LLM mode: TTS ---
+#i TTS provider for the voice pipeline.
+#d enum
+#e cartesia,chatterbox,elevenlabs,gemini,kokoro,nvidia-baseten,openai,xtts
+#x pipeline_mode=LLM
 EVA_MODEL__TTS=cartesia
-# Must include "api_key" and "model" for your chosen provider:
-EVA_MODEL__TTS_PARAMS='{"api_key": "your_cartesia_api_key", "model": "sonic"}'
-# For round-robin load balancing, use "urls" instead of "url":
-#   EVA_MODEL__TTS_PARAMS='{"api_key": "...", "model": "sonic", "urls": ["http://server1/v1", "http://server2/v1"]}'
-
-# --- Metrics judge models ---
-# Google credentials (audio judge metrics default to Gemini)
-GOOGLE_APPLICATION_CREDENTIALS=path/to/your/service-account-credentials.json
 
-# AWS credentials (faithfulness metric defaults to Claude via Bedrock)
-AWS_ACCESS_KEY_ID=your_aws_access_key_id_here
-AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key_here
+#i TTS provider parameters. Must include "api_key" and "model". Use "urls" for round-robin load balancing.
+#d json_object
+#x pipeline_mode=LLM
+EVA_MODEL__TTS_PARAMS='{"api_key": "your_cartesia_api_key", "model": "sonic"}'
 
-# If you only have an OpenAI key, you can skip the AWS credentials above and
-# override all text judge models (including faithfulness) to use OpenAI instead
-# (results may be less accurate):
-#   JUDGE_MODEL=gpt-5.2
-# Audio judge metrics (agent_speech_fidelity, user_speech_fidelity) still require
-# Gemini. To skip them, run only text-based metrics, e.g.:
-#   EVA_METRICS=task_completion,faithfulness,conciseness,turn_taking
+# --- S2S mode ---
+#i Speech-to-speech model name.
+#d string
+#x pipeline_mode=S2S
+#v EVA_MODEL__S2S=openai
+
+#i Speech-to-speech model parameters.
+#d json_object
+#x pipeline_mode=S2S
+#v EVA_MODEL__S2S_PARAMS='{"model": "gpt-realtime-mini", "api_key": ""}'
+
+# --- AudioLLM mode ---
+#i Audio-input LLM model name.
+#d string
+#x pipeline_mode=AudioLLM
+#v EVA_MODEL__AUDIO_LLM=
+
+#i Audio-input LLM model parameters.
+#d json_object
+#x pipeline_mode=AudioLLM
+#v EVA_MODEL__AUDIO_LLM_PARAMS='{"url": "", "api_key": ""}'
+
+# --- Framework (S2S / AudioLLM) ---
+#i Base framework for S2S or AudioLLM pipelines.
+#d enum
+#e pipecat,openai_realtime,gemini_live,elevenlabs
+#v EVA_FRAMEWORK=openai_realtime
 
 # ==============================================
-# Required: Model Deployments
+# LiteLLM Deployments
 # ==============================================
-#
-# EVA_MODEL_LIST: JSON array of LiteLLM Router deployments.
-#   - model_name: alias your code uses (e.g., "gpt-5.2")
-#   - litellm_params.model: provider-specific identifier (e.g., "openai/gpt-4o")
-#   - Use "os.environ/VAR_NAME" syntax to reference other env vars
-#
-# EVA needs at minimum:
-#   1. An LLM for the assistant (matches EVA_MODEL__LLM below)
-#   2. Gemini for audio judge metrics
-#   3. Claude (Bedrock) for the faithfulness metric
-#
-# See docs/llm_configuration.md for more provider examples and load balancing.
 
+#i LiteLLM Router deployments. Use "os.environ/VAR_NAME" to reference other env vars.
+#d json_deployment_list
 EVA_MODEL_LIST='[
   {
     "model_name": "gpt-5.2",
@@ -106,222 +150,233 @@ EVA_MODEL_LIST='[
   }
 ]'
 
-# --- Optional: additional model deployments ---
-# Uncomment and add to EVA_MODEL_LIST above as needed.
-#
-# Azure OpenAI (alternative to direct OpenAI):
-# {
-#   "model_name": "gpt-5.2",
-#   "litellm_params": {
-#     "model": "azure/gpt-5.2",
-#     "api_key": "os.environ/AZURE_OPENAI_API_KEY",
-#     "api_base": "https://your-resource.openai.azure.com",
-#     "max_parallel_requests": 5
-#   },
-#   "model_info": {"base_model": "gpt-5.2"}
-# }
-#
-# OpenAI Responses API (for multi-turn encrypted reasoning on o-series / gpt-5.x models):
-#   Add "use_responses_api": true at the top level of the deployment (not inside litellm_params).
-#   Also set "reasoning_effort" inside litellm_params to enable reasoning.
-#   {
-#     "model_name": "gpt-5.2",
-#     "litellm_params": {
-#       "model": "azure/gpt-5.2",
-#       "api_key": "os.environ/AZURE_OPENAI_API_KEY",
-#       "api_base": "https://your-resource.openai.azure.com",
-#       "reasoning_effort": "low",
-#       "max_parallel_requests": 5
-#     },
-#     "use_responses_api": true
-#   }
-#
-# Self-hosted model (e.g., vLLM, NVIDIA NIM):
-#   {
-#     "model_name": "my-model",
-#     "litellm_params": {
-#       "model": "openai/my-model-name",
-#       "api_key": "os.environ/MY_MODEL_KEY",
-#       "api_base": "http://my-server:8000/v1",
-#       "max_parallel_requests": 5
-#     }
-#   }
-#
-# Load balancing (multiple endpoints for the same model):
-#   {
-#     "model_name": "my-model",
-#     "litellm_params": {"model": "openai/my-model", "api_base": "http://server1:8000/v1", ...}
-#   },
-#   {
-#     "model_name": "my-model",
-#     "litellm_params": {"model": "openai/my-model", "api_base": "http://server2:8000/v1", ...}
-#   }
+#i LLM model alias for the assistant. Must match a model_name in EVA_MODEL_LIST.
+#d enum
+#x pipeline_mode=LLM
+EVA_MODEL__LLM=gpt-5.2
 
 # ==============================================
-# Required: Framework Configuration
+# Framework & Runtime
 # ==============================================
 
-# Domain name — determines dataset, agent config, and scenario paths:
-#   data/{domain}_dataset.jsonl
-#   configs/agents/{domain}_agent.yaml
-#   data/{domain}_scenarios/
-# The included sample domain is "airline".
-# EVA_DOMAIN=airline
+#i Domain determines dataset, agent config, and scenario paths (data/{domain}_dataset.jsonl etc).
+#d enum
+#e airline,itsm,medical_hr
+#v EVA_DOMAIN=airline
 
-# LLM model name — must match a model_name in EVA_MODEL_LIST above.
-EVA_MODEL__LLM=gpt-5.2
+#i Maximum number of concurrent conversations.
+#d int
+#r 1,100,1
+#v EVA_MAX_CONCURRENT_CONVERSATIONS=1
 
-# ==============================================
-# Optional: Alternative LLM Provider Keys
-# ==============================================
+#i Conversation timeout in seconds.
+#d int
+#r 30,10000,10
+#v EVA_CONVERSATION_TIMEOUT_SECONDS=360
 
-# Azure OpenAI (alternative to direct OpenAI)
-# AZURE_OPENAI_API_KEY=your_azure_openai_api_key_here
-# AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
+#i Maximum rerun attempts for failed records.
+#d int
+#r 0,20,1
+#v EVA_MAX_RERUN_ATTEMPTS=3
 
-# Google API key (alternative to service account credentials for Gemini)
-# GOOGLE_API_KEY=your_google_api_key_here
-
-# ==============================================
-# Optional: Speech-to-Speech / Audio-LLM Configuration
-# ==============================================
-# Only needed if benchmarking speech-to-speech models.
+#i Output directory for results.
+#d path
+#v EVA_OUTPUT_DIR=output
 
-# Base framework (pipecat, openai_realtime, gemini_live)
-# EVA_FRAMEWORK=openai_realtime
+#i Starting port for WebSocket servers.
+#d int
+#r 1024,65000,1
+#v EVA_BASE_PORT=10000
 
-# EVA_MODEL__S2S=openai
-# EVA_MODEL__S2S_PARAMS='{"model": "gpt-realtime-mini", "api_key": ""}'
+#i Number of ports in the pool.
+#d int
+#r 10,500,1
+#v EVA_PORT_POOL_SIZE=150
 
-# EVA_MODEL__AUDIO_LLM=
-# EVA_MODEL__AUDIO_LLM_PARAMS='{"url": "", "api_key": ""}'
+#i Comma-separated metric names to run. Leave empty to run all metrics.
+#d csv_list
+#v EVA_METRICS=
 
 # ==============================================
-# Optional: Execution Settings
+# Turn Detection & VAD
 # ==============================================
 
-# Maximum number of concurrent conversations (1-100, default: 1)
-# EVA_MAX_CONCURRENT_CONVERSATIONS=1
+# Leave all of these inactive to use smart defaults.
 
-# Conversation timeout in seconds (30-10000, default: 360)
-# EVA_CONVERSATION_TIMEOUT_SECONDS=360
+#i Turn start strategy: when to consider the user has started speaking.
+#d enum
+#e vad,transcription,external
+#v EVA_MODEL__TURN_START_STRATEGY=vad
 
-# Maximum number of rerun attempts for failed records (0-20, default: 3)
-# EVA_MAX_RERUN_ATTEMPTS=3
+#i Turn start strategy parameters (JSON).
+#d json_object
+#v EVA_MODEL__TURN_START_STRATEGY_PARAMS='{}'
 
-# Output directory for results (default: output)
-# EVA_OUTPUT_DIR=output
+#i Turn stop strategy: when to consider the user has finished speaking.
+#d enum
+#e turn_analyzer,speech_timeout,external
+#v EVA_MODEL__TURN_STOP_STRATEGY=turn_analyzer
 
-# Starting port for WebSocket servers (1024-65000, default: 10000)
-# EVA_BASE_PORT=10000
+#i Turn stop strategy parameters. For speech_timeout: {"user_speech_timeout": 0.8}.
+#d json_object
+#v EVA_MODEL__TURN_STOP_STRATEGY_PARAMS='{}'
 
-# Number of ports in the pool (10-500, default: 150)
-# EVA_PORT_POOL_SIZE=150
+#i VAD (Voice Activity Detection) analyzer.
+#d enum
+#e silero,none
+#v EVA_MODEL__VAD=silero
 
-# Comma-separated list of metrics to run (empty = no metrics, default: all metrics)
-# EVA_METRICS=
-
-# Debug mode: run only 1 record regardless of dataset size (true | false, default: false)
-# EVA_DEBUG=true
-
-# Comma-separated list of specific record IDs to run (empty = run all)
-# EVA_RECORD_IDS=1.2.1,1.2.2,1.3.1
-
-# Logging level (DEBUG | INFO | WARNING | ERROR | CRITICAL, default: INFO)
-# EVA_LOG_LEVEL=DEBUG
+#i VAD parameters. Keys: confidence (0-1), start_secs, stop_secs, min_volume (0-1).
+#d json_object
+#v EVA_MODEL__VAD_PARAMS='{"start_secs": 0.2, "stop_secs": 0.2, "min_volume": 0.6, "confidence": 0.7}'
 
 # ==============================================
-# Optional: Turn Detection & VAD Configuration
+# User Config
 # ==============================================
-# Fine-tune user turn detection and voice activity detection.
-# Leave commented to use smart defaults.
-
-# User turn start strategy: vad | transcription | external
-# - vad: Start turn when VAD detects speech (default)
-# - transcription: Start turn when STT produces transcription
-# - external: Delegate to external service (e.g., Deepgram Flux)
-# EVA_MODEL__TURN_START_STRATEGY=vad
-
-# User turn start strategy parameters (JSON)
-# EVA_MODEL__TURN_START_STRATEGY_PARAMS='{}'
-
-# User turn stop strategy: turn_analyzer | speech_timeout | external
-# - turn_analyzer: Use smart turn analyzer to detect natural turn end (default)
-# - speech_timeout: Stop after fixed silence duration
-# - external: Delegate to external service
-# EVA_MODEL__TURN_STOP_STRATEGY=turn_analyzer
-
-# User turn stop strategy parameters (JSON)
-# For speech_timeout: {"user_speech_timeout": 0.8}
-# For turn_analyzer: automatically uses smart turn detection
-# EVA_MODEL__TURN_STOP_STRATEGY_PARAMS='{}'
-
-# Note: For services with built-in turn detection (e.g., Deepgram Flux), set both to 'external':
-#   EVA_MODEL__TURN_START_STRATEGY=external
-#   EVA_MODEL__TURN_STOP_STRATEGY=external
-
-# VAD (Voice Activity Detection) analyzer: silero | none
-# EVA_MODEL__VAD=silero
-
-# VAD parameters (JSON)
-# - confidence: Minimum confidence threshold (0.0-1.0, default: 0.7)
-# - start_secs: Duration to wait before confirming voice start (default: 0.2)
-# - stop_secs: Duration to wait before confirming voice stop (default: 0.2)
-# - min_volume: Minimum audio volume threshold (0.0-1.0, default: 0.6)
-# EVA_MODEL__VAD_PARAMS='{"start_secs": 0.2, "stop_secs": 0.2, "min_volume": 0.6, "confidence": 0.7}'
 
-# ==============================================
-# Optional: Perturbations
-# ==============================================
-# Perturb the simulated user to stress-test the assistant under realistic
-# conditions. Four independent axes, all optional:
-#
-#   1. background_noise         — ambient audio mixed into user speech
-#   2. accent                   — swaps the ElevenLabs user agent to an accented voice
-#   3. behavior                 — swaps the agent + modifies persona prompt
-#   4. connection_degradation   — stacks VoIP artifacts (codec, packet loss, gain jitter)
-#
+# --- Default user simulator agents ---
+#i ElevenLabs agent ID for the default female-voice user persona.
+#d string
+EVA_DEFAULT_USER_F=your_elevenlabs_agent_id_for_default_user_f
+
+#i ElevenLabs agent ID for the default male-voice user persona.
+#d string
+EVA_DEFAULT_USER_M=your_elevenlabs_agent_id_for_default_user_m
+
+# --- Perturbations ---
 # accent and behavior are MUTUALLY EXCLUSIVE (each claims the agent ID slot).
 # background_noise and connection_degradation can stack with either.
+
 # --- Background noise ---
-# Options: airport_gate | baby_crying | background_music | bad_connection_static |
-#          coffee_shop | loud_construction | nyc_street | road_noise
-# Requires assets in assets/noise/. Download with:
-#   python scripts/download_noise_assets.py
-# EVA_PERTURBATION__BACKGROUND_NOISE=coffee_shop
-#
-# Signal-to-noise ratio in dB (higher = cleaner user speech; default: 15)
-# EVA_PERTURBATION__SNR_DB=15
+# Requires assets in assets/noise/. Download with: python scripts/download_noise_assets.py
+#i Ambient noise to mix into user speech.
+#d enum
+#e airport_gate,baby_crying,background_music,bad_connection_static,coffee_shop,loud_construction,nyc_street,road_noise
+#v EVA_PERTURBATION__BACKGROUND_NOISE=coffee_shop
+
+#i Signal-to-noise ratio in dB. Higher = cleaner user speech.
+#d float
+#r 0,40,1
+#v EVA_PERTURBATION__SNR_DB=15
 
 # --- Connection degradation ---
-# G.711 codec quantisation + gaussian static + 3% packet loss + random gain.
-# No assets required.
-# EVA_PERTURBATION__CONNECTION_DEGRADATION=false
+#i Apply G.711 codec + gaussian static + 3% packet loss + random gain.
+#d bool
+#v EVA_PERTURBATION__CONNECTION_DEGRADATION=false
+
+# --- Accent (mutually exclusive with Behavior) ---
+#i Accent to apply to the user simulator. Requires matching agent IDs below.
+#d enum
+#e french,indian,spanish,chinese
+#x perturbation_mode=Accent
+#v EVA_PERTURBATION__ACCENT=french
+
+# --- Accent agent IDs ---
+#i ElevenLabs agent ID — French accent, female voice.
+#d string
+#x perturbation_mode=Accent
+#x EVA_PERTURBATION__ACCENT=french
+#v EVA_FRENCH_ACCENT_USER_F=
+
+#i ElevenLabs agent ID — French accent, male voice.
+#d string
+#x perturbation_mode=Accent
+#x EVA_PERTURBATION__ACCENT=french
+#v EVA_FRENCH_ACCENT_USER_M=
+
+#i ElevenLabs agent ID — Indian accent, female voice.
+#d string
+#x perturbation_mode=Accent
+#x EVA_PERTURBATION__ACCENT=indian
+#v EVA_INDIAN_ACCENT_USER_F=
+
+#i ElevenLabs agent ID — Indian accent, male voice.
+#d string
+#x perturbation_mode=Accent
+#x EVA_PERTURBATION__ACCENT=indian
+#v EVA_INDIAN_ACCENT_USER_M=
+
+#i ElevenLabs agent ID — Spanish accent, female voice.
+#d string
+#x perturbation_mode=Accent
+#x EVA_PERTURBATION__ACCENT=spanish
+#v EVA_SPANISH_ACCENT_USER_F=
+
+#i ElevenLabs agent ID — Spanish accent, male voice.
+#d string
+#x perturbation_mode=Accent
+#x EVA_PERTURBATION__ACCENT=spanish
+#v EVA_SPANISH_ACCENT_USER_M=
+
+#i ElevenLabs agent ID — Chinese accent, female voice.
+#d string
+#x perturbation_mode=Accent
+#x EVA_PERTURBATION__ACCENT=chinese
+#v EVA_CHINESE_ACCENT_USER_F=
+
+#i ElevenLabs agent ID — Chinese accent, male voice.
+#d string
+#x perturbation_mode=Accent
+#x EVA_PERTURBATION__ACCENT=chinese
+#v EVA_CHINESE_ACCENT_USER_M=
+
+# --- Behavior (mutually exclusive with Accent) ---
+#i Behavior persona for the user simulator. Requires matching agent IDs below.
+#d enum
+#e aggressive_impatient,elderly_slow,forgetful_disorganized
+#x perturbation_mode=Behavior
+#v EVA_PERTURBATION__BEHAVIOR=forgetful_disorganized
+
+# --- Behavior agent IDs ---
+#i ElevenLabs agent ID — Aggressive/impatient persona, female voice.
+#d string
+#x perturbation_mode=Behavior
+#x EVA_PERTURBATION__BEHAVIOR=aggressive_impatient
+#v EVA_AGGRESSIVE_IMPATIENT_USER_F=
+
+#i ElevenLabs agent ID — Aggressive/impatient persona, male voice.
+#d string
+#x perturbation_mode=Behavior
+#x EVA_PERTURBATION__BEHAVIOR=aggressive_impatient
+#v EVA_AGGRESSIVE_IMPATIENT_USER_M=
+
+#i ElevenLabs agent ID — Elderly/slow persona, female voice.
+#d string
+#x perturbation_mode=Behavior
+#x EVA_PERTURBATION__BEHAVIOR=elderly_slow
+#v EVA_ELDERLY_SLOW_USER_F=
+
+#i ElevenLabs agent ID — Elderly/slow persona, male voice.
+#d string
+#x perturbation_mode=Behavior
+#x EVA_PERTURBATION__BEHAVIOR=elderly_slow
+#v EVA_ELDERLY_SLOW_USER_M=
+
+#i ElevenLabs agent ID — Forgetful/disorganized persona, female voice.
+#d string
+#x perturbation_mode=Behavior
+#x EVA_PERTURBATION__BEHAVIOR=forgetful_disorganized
+#v EVA_FORGETFUL_DISORGANIZED_USER_F=
+
+#i ElevenLabs agent ID — Forgetful/disorganized persona, male voice.
+#d string
+#x perturbation_mode=Behavior
+#x EVA_PERTURBATION__BEHAVIOR=forgetful_disorganized
+#v EVA_FORGETFUL_DISORGANIZED_USER_M=
 
-# --- Accent (mutually exclusive with behavior) ---
-# Options: french | indian | spanish | chinese
-# EVA_PERTURBATION__ACCENT=french
-#
-# Accent-specific ElevenLabs agent IDs. Pattern: EVA_{ACCENT}_ACCENT_USER_{F|M}
-# Only the pair matching EVA_PERTURBATION__ACCENT needs to be set.
-# EVA_FRENCH_ACCENT_USER_F=
-# EVA_FRENCH_ACCENT_USER_M=
-# EVA_INDIAN_ACCENT_USER_F=
-# EVA_INDIAN_ACCENT_USER_M=
-# EVA_SPANISH_ACCENT_USER_F=
-# EVA_SPANISH_ACCENT_USER_M=
-# EVA_CHINESE_ACCENT_USER_F=
-# EVA_CHINESE_ACCENT_USER_M=
-
-# --- Behavior (mutually exclusive with accent) ---
-# Options: aggressive_impatient | elderly_slow | forgetful_disorganized
-# EVA_PERTURBATION__BEHAVIOR=forgetful_disorganized
-#
-# Behavior-specific ElevenLabs agent IDs. Pattern: EVA_{BEHAVIOR}_USER_{F|M}
-# Only the pair matching EVA_PERTURBATION__BEHAVIOR needs to be set.
-# EVA_AGGRESSIVE_IMPATIENT_USER_F=
-# EVA_AGGRESSIVE_IMPATIENT_USER_M=
-# EVA_ELDERLY_SLOW_USER_F=
-# EVA_ELDERLY_SLOW_USER_M=
-# EVA_FORGETFUL_DISORGANIZED_USER_F=
-# EVA_FORGETFUL_DISORGANIZED_USER_M=
+# ==============================================
+# Debug & Logging
+# ==============================================
+
+#i Run only 1 record regardless of dataset size.
+#d bool
+#v EVA_DEBUG=false
+
+#i Comma-separated record IDs to run. Empty = run all.
+#d csv_list
+#v EVA_RECORD_IDS=
+
+#i Logging verbosity.
+#d enum
+#e DEBUG,INFO,WARNING,ERROR,CRITICAL
+#v EVA_LOG_LEVEL=INFO
diff --git a/README.md b/README.md
index 221486b3..a9d5edd1 100644
--- a/README.md
+++ b/README.md
@@ -153,6 +153,16 @@ eva \
     --metrics task_completion,faithfulness,conciseness
 ```
 
+### Configuring EVA
+
+EVA includes a Streamlit config editor for building your `.env` file interactively:
+
+```bash
+streamlit run apps/config_editor.py
+```
+
+The editor covers all variables grouped by tab (API keys, voice pipeline, model deployments, runtime settings, perturbations, etc.), with proper widgets for each type. See [`apps/README.md`](apps/README.md) for details.
+
 ### Exploring Results
 
 EVA includes a Streamlit analysis app for visualizing and comparing results:
diff --git a/apps/README.md b/apps/README.md
index 5ed18405..69a524c2 100644
--- a/apps/README.md
+++ b/apps/README.md
@@ -1,6 +1,20 @@
 # EVA Apps
 
-Streamlit applications for exploring EVA results.
+Streamlit applications for exploring and configuring EVA.
+
+## Config Editor
+
+Interactive UI for building and editing `.env` configuration files without hand-editing JSON or looking up variable names.
+
+### Usage
+
+```bash
+streamlit run apps/config_editor.py
+```
+
+The app reads `.env.example` for the full variable set and loads existing values from `.env` if present. Each variable's widget type, enum options, ranges, and tooltips are declared directly in `.env.example` using annotation prefixes (`#i`, `#d`, `#e`, `#r`, `#x`, `#v`). Use the **Preview** button to inspect the generated file before saving, or **Download** to export it without writing to disk.
+
+---
 
 ## Analysis App
 
diff --git a/apps/analysis.py b/apps/analysis.py
index 121b8d11..101fdcdc 100644
--- a/apps/analysis.py
+++ b/apps/analysis.py
@@ -1585,7 +1585,7 @@ def _pivot_metric_label(m: str) -> str:
                 height=max(350, 80 + 40 * len(y_labels)),
                 margin={"l": 20, "r": 20, "t": 50, "b": 120},
             )
-            st.plotly_chart(heatmap_fig, use_container_width=True)
+            st.plotly_chart(heatmap_fig, width="stretch")
 
 
 def render_run_overview(run_dir: Path):
diff --git a/apps/config_editor.py b/apps/config_editor.py
new file mode 100644
index 00000000..f59995b4
--- /dev/null
+++ b/apps/config_editor.py
@@ -0,0 +1,655 @@
+"""Streamlit app for editing EVA's .env file with a friendly UI.
+
+Run with:
+
+    streamlit run apps/config_editor.py
+
+The app reads .env.example to discover variables and their metadata
+(widget type, options, ranges, tooltips, conditions) from annotation
+prefixes (#i, #d, #e, #r, #g, #x, #v).  .env is read on startup to
+prefill values and written on save.
+"""
+
+from __future__ import annotations
+
+import html as html_module
+import json
+import sys
+from pathlib import Path
+from typing import Any
+
+import streamlit as st
+import streamlit.components.v1 as st_components
+from config_io import (
+    AnnotatedVar,
+    ParsedEnvExample,
+    compute_disabled,
+    load_env,
+    parse_env_example,
+    serialize_env,
+)
+from config_schema import (
+    GROUP_API_CONFIGS,
+    GROUP_DEPLOYMENTS,
+    GROUP_MISC,
+    GROUP_PERTURBATIONS,
+    GROUP_RUNTIME,
+    GROUPS,
+    MUTEX_RADIOS,
+)
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO_ROOT))
+
+ENV_EXAMPLE_PATH = REPO_ROOT / ".env.example"
+ENV_PATH = REPO_ROOT / ".env"
+
+
+# ---------------------------------------------------------------------------
+# Initialisation
+# ---------------------------------------------------------------------------
+
+
+def _coerce(widget: str, raw: str) -> Any:
+    if not raw:
+        return _empty_for(widget)
+    raw = raw.strip()
+    try:
+        if widget == "bool":
+            return raw.lower() in ("true", "1", "yes", "on")
+        if widget == "int":
+            return int(raw)
+        if widget == "float":
+            return float(raw)
+        if widget == "csv_list":
+            return [x.strip() for x in raw.split(",") if x.strip()]
+        if widget in ("json_object", "json_deployment_list"):
+            s = raw.strip()
+            if s.startswith("'") and s.endswith("'"):
+                s = s[1:-1]
+            return json.loads(s)
+    except Exception:
+        return _empty_for(widget)
+    return raw
+
+
+def _empty_for(widget: str) -> Any:
+    if widget == "bool":
+        return False
+    if widget in ("int", "float"):
+        return None
+    if widget in ("csv_list", "json_deployment_list"):
+        return []
+    if widget == "json_object":
+        return {}
+    return ""
+
+
+def _detect_pipeline_mode(env: dict[str, str]) -> str:
+    if env.get("EVA_MODEL__S2S"):
+        return "S2S"
+    if env.get("EVA_MODEL__AUDIO_LLM"):
+        return "AudioLLM"
+    return "LLM"
+
+
+def _detect_perturbation_mode(env: dict[str, str]) -> str:
+    if env.get("EVA_PERTURBATION__ACCENT"):
+        return "Accent"
+    if env.get("EVA_PERTURBATION__BEHAVIOR"):
+        return "Behavior"
+    return "None"
+
+
+def _init_state() -> None:
+    if "initialized" in st.session_state:
+        return
+    parsed = parse_env_example(ENV_EXAMPLE_PATH)
+    st.session_state.parsed = parsed
+    existing = load_env(ENV_PATH)
+    values: dict[str, Any] = {}
+    for var in parsed.vars:
+        raw = existing.get(var.name)
+        if raw is None and var.is_active:
+            raw = var.example_value.strip().strip("'\"")
+        values[var.name] = _coerce(var.widget, raw or "")
+    for name, raw in existing.items():
+        if name not in {v.name for v in parsed.vars}:
+            values[name] = raw
+    st.session_state.field_values = values
+    st.session_state.loaded_keys = set(existing.keys())
+    st.session_state.pipeline_mode = _detect_pipeline_mode(existing)
+    st.session_state.perturbation_mode = _detect_perturbation_mode(existing)
+    # Initialise all mutex radio states
+    for mx in MUTEX_RADIOS:
+        if mx.state_key not in st.session_state:
+            st.session_state[mx.state_key] = st.session_state.get(mx.state_key, mx.default)
+    st.session_state.initialized = True
+
+
+# ---------------------------------------------------------------------------
+# Visibility
+# ---------------------------------------------------------------------------
+
+
+def _is_visible_av(var: AnnotatedVar) -> bool:
+    """Return True when all #x conditions for this var are satisfied."""
+    for cond_key, cond_val in var.conditions:
+        actual = st.session_state.get(cond_key)
+        if actual is None:
+            actual = st.session_state.get("field_values", {}).get(cond_key)
+        if actual != cond_val:
+            return False
+    return True
+
+
+# ---------------------------------------------------------------------------
+# Widget renderers
+# ---------------------------------------------------------------------------
+
+
+def _render_annotated_var(var: AnnotatedVar) -> None:
+    if not _is_visible_av(var):
+        return
+    values = st.session_state.field_values
+    current = values.get(var.name)
+    help_text = var.info or None
+
+    if var.widget in ("string", "path"):
+        values[var.name] = st.text_input(var.name, value=current or "", help=help_text, key=f"w_{var.name}")
+    elif var.widget == "secret":
+        values[var.name] = st.text_input(
+            var.name, value=current or "", help=help_text, type="password", key=f"w_{var.name}"
+        )
+    elif var.widget == "bool":
+        values[var.name] = st.checkbox(var.name, value=bool(current), help=help_text, key=f"w_{var.name}")
+    elif var.widget == "int":
+        rng = var.range
+        v = current if isinstance(current, int) else (int(rng[0]) if rng else 0)
+        values[var.name] = st.number_input(
+            var.name,
+            value=v,
+            min_value=int(rng[0]) if rng else None,
+            max_value=int(rng[1]) if rng and len(rng) > 1 else None,
+            step=int(rng[2]) if rng and len(rng) > 2 else 1,
+            help=help_text,
+            key=f"w_{var.name}",
+        )
+    elif var.widget == "float":
+        rng = var.range
+        v = float(current) if isinstance(current, (int, float)) else (float(rng[0]) if rng else 0.0)
+        values[var.name] = st.number_input(
+            var.name,
+            value=float(v),
+            min_value=float(rng[0]) if rng else None,
+            max_value=float(rng[1]) if rng and len(rng) > 1 else None,
+            step=float(rng[2]) if rng and len(rng) > 2 else 0.1,
+            help=help_text,
+            key=f"w_{var.name}",
+        )
+    elif var.widget == "enum":
+        options = _enum_options_for(var)
+        display = ["(unset)"] + options
+        idx = (options.index(current) + 1) if current in options else 0
+        choice = st.selectbox(var.name, display, index=idx, help=help_text, key=f"w_{var.name}")
+        values[var.name] = "" if choice == "(unset)" else choice
+    elif var.widget == "multi_enum":
+        choices = current if isinstance(current, list) else []
+        values[var.name] = st.multiselect(var.name, var.options, default=choices, help=help_text, key=f"w_{var.name}")
+    elif var.widget == "csv_list":
+        as_text = ",".join(current) if isinstance(current, list) else (current or "")
+        text = st.text_input(var.name, value=as_text, help=help_text, key=f"w_{var.name}")
+        values[var.name] = [x.strip() for x in text.split(",") if x.strip()]
+    elif var.widget == "json_object":
+        _render_json_object(var.name, var.info, current or {})
+    elif var.widget == "json_deployment_list":
+        _render_deployment_list(var.name, var.info, current or [])
+
+
+def _enum_options_for(var: AnnotatedVar) -> list[str]:
+    if var.name == "EVA_MODEL__LLM":
+        deployments = st.session_state.field_values.get("EVA_MODEL_LIST") or []
+        return sorted({d.get("model_name", "") for d in deployments if isinstance(d, dict)} - {""})
+    return var.options
+
+
+def _render_json_object(name: str, info: str, current: dict) -> None:
+    st.markdown(f"**{name}**" + (f" — {info}" if info else ""))
+    raw_key = f"raw_{name}"
+    if raw_key not in st.session_state:
+        st.session_state[raw_key] = json.dumps(current, indent=2) if current else ""
+
+    rows = [{"key": k, "value": _scalar_to_str(v)} for k, v in current.items()] or [{"key": "", "value": ""}]
+    edited = st.data_editor(
+        rows,
+        num_rows="dynamic",
+        width="stretch",
+        column_config={
+            "key": st.column_config.TextColumn("key", required=False),
+            "value": st.column_config.TextColumn("value", required=False),
+        },
+        key=f"de_{name}",
+    )
+    parsed_kv: dict[str, Any] = {}
+    for row in edited:
+        k = (row.get("key") or "").strip()
+        if k:
+            parsed_kv[k] = _str_to_scalar(row.get("value"))
+
+    with st.expander("Raw JSON", expanded=False):
+        text = st.text_area(
+            "Edit as JSON", value=json.dumps(parsed_kv, indent=2) if parsed_kv else "", key=raw_key, height=140
+        )
+        if text.strip():
+            try:
+                parsed_kv = json.loads(text)
+            except json.JSONDecodeError as e:
+                st.warning(f"Invalid JSON: {e}")
+
+    st.session_state.field_values[name] = parsed_kv
+
+
+def _scalar_to_str(v: Any) -> str:
+    if isinstance(v, (dict, list)):
+        return json.dumps(v)
+    if isinstance(v, bool):
+        return "true" if v else "false"
+    if v is None:
+        return ""
+    return str(v)
+
+
+def _str_to_scalar(v: Any) -> Any:
+    if not isinstance(v, str):
+        return v
+    s = v.strip()
+    if s == "":
+        return ""
+    if s.lower() in ("true", "false"):
+        return s.lower() == "true"
+    if s.startswith(("{", "[")):
+        try:
+            return json.loads(s)
+        except json.JSONDecodeError:
+            return s
+    try:
+        return int(s) if "." not in s else float(s)
+    except ValueError:
+        return s
+
+
+def _render_deployment_list(name: str, info: str, current: list) -> None:
+    st.markdown(f"**{name}**" + (f" — {info}" if info else ""))
+    deployments: list[dict] = [d for d in current if isinstance(d, dict)]
+
+    st.caption("All deployments — add / remove rows here, then select one below to edit its params.")
+    summary_rows = [
+        {"model_name": d.get("model_name", ""), "provider/model": (d.get("litellm_params") or {}).get("model", "")}
+        for d in deployments
+    ] or [{"model_name": "", "provider/model": ""}]
+
+    edited_summary = st.data_editor(
+        summary_rows,
+        num_rows="dynamic",
+        width="stretch",
+        column_config={
+            "model_name": st.column_config.TextColumn("model_name (alias)", required=False),
+            "provider/model": st.column_config.TextColumn("provider/model (litellm_params.model)", required=False),
+        },
+        key=f"de_summary_{name}",
+    )
+
+    old_by_name = {d.get("model_name", ""): d for d in deployments}
+    merged: list[dict] = []
+    for idx, row in enumerate(edited_summary):
+        rname = (row.get("model_name") or "").strip()
+        if not rname:
+            continue
+        base = dict(
+            old_by_name.get(rname) or old_by_name.get(list(old_by_name)[idx] if idx < len(old_by_name) else "") or {}
+        )
+        base["model_name"] = rname
+        lp = dict(base.get("litellm_params") or {})
+        pm = (row.get("provider/model") or "").strip()
+        if pm:
+            lp["model"] = pm
+        base["litellm_params"] = lp
+        merged.append(base)
+    deployments = merged
+
+    model_names = [d.get("model_name", "") for d in deployments if d.get("model_name")]
+    if not model_names:
+        st.session_state.field_values[name] = deployments
+        return
+
+    sel_key = f"_depl_sel_{name}"
+    prev = st.session_state.get(sel_key)
+    default_idx = model_names.index(prev) if prev in model_names else 0
+    selected = st.selectbox("Edit deployment", options=model_names, index=default_idx, key=sel_key)
+
+    sel_idx = model_names.index(selected)
+    depl = deployments[sel_idx]
+
+    st.markdown("**litellm_params**")
+    lp = depl.get("litellm_params") or {}
+    lp_rows = [{"key": k, "value": _scalar_to_str(v)} for k, v in lp.items()] or [{"key": "", "value": ""}]
+    edited_lp = st.data_editor(
+        lp_rows,
+        num_rows="dynamic",
+        width="stretch",
+        column_config={
+            "key": st.column_config.TextColumn("key", required=False),
+            "value": st.column_config.TextColumn("value", required=False),
+        },
+        key=f"de_lp_{name}_{selected}",
+    )
+    new_lp: dict[str, Any] = {
+        (r.get("key") or "").strip(): _str_to_scalar(r.get("value")) for r in edited_lp if (r.get("key") or "").strip()
+    }
+    depl["litellm_params"] = new_lp
+
+    extra_fields = {k: v for k, v in depl.items() if k not in ("model_name", "litellm_params")}
+    if extra_fields or st.checkbox("Add extra top-level fields", key=f"_extra_chk_{name}_{selected}"):
+        st.markdown("**Extra top-level fields** (e.g. `model_info`)")
+        extra_rows = [{"key": k, "value": _scalar_to_str(v)} for k, v in extra_fields.items()] or [
+            {"key": "", "value": ""}
+        ]
+        edited_extra = st.data_editor(
+            extra_rows,
+            num_rows="dynamic",
+            width="stretch",
+            column_config={
+                "key": st.column_config.TextColumn("key", required=False),
+                "value": st.column_config.TextColumn("value", required=False),
+            },
+            key=f"de_extra_{name}_{selected}",
+        )
+        edited_extra_keys = {(r.get("key") or "").strip() for r in edited_extra if (r.get("key") or "").strip()}
+        for k in list(extra_fields):
+            if k not in edited_extra_keys:
+                depl.pop(k, None)
+        for r in edited_extra:
+            k = (r.get("key") or "").strip()
+            if k:
+                depl[k] = _str_to_scalar(r.get("value"))
+
+    deployments[sel_idx] = depl
+    st.session_state.field_values[name] = deployments
+
+
+# ---------------------------------------------------------------------------
+# Group / tab rendering
+# ---------------------------------------------------------------------------
+
+
+def _auto_group_for(name: str) -> str | None:
+    """Auto-assign group for vars not in the parsed template."""
+    if "KEY" in name or "URL" in name:
+        return GROUP_API_CONFIGS
+    if "_USER" in name and "DEFAULT" not in name:
+        return GROUP_PERTURBATIONS
+    if name.startswith("EVA_"):
+        return GROUP_RUNTIME
+    return None
+
+
+def _render_unmapped_var(name: str) -> None:
+    values = st.session_state.field_values
+    v = values.get(name, "")
+    if not isinstance(v, str):
+        v = json.dumps(v) if v else ""
+    widget_type = "password" if "KEY" in name else "default"
+    values[name] = st.text_input(name, value=v, key=f"w_{name}", type=widget_type)
+
+
+def _render_add_var_widget(context: str) -> None:
+    st.divider()
+    st.markdown("**Add a new variable**")
+    counter_key = f"_add_var_counter_{context}"
+    if counter_key not in st.session_state:
+        st.session_state[counter_key] = 0
+    input_key = f"_add_var_input_{context}_{st.session_state[counter_key]}"
+    col_input, col_btn = st.columns([4, 1])
+    with col_input:
+        new_name = st.text_input(
+            "Variable name",
+            key=input_key,
+            label_visibility="collapsed",
+            placeholder="e.g. MY_API_KEY",
+        )
+    with col_btn:
+        if st.button("Add", key=f"_add_var_btn_{context}", width="stretch"):
+            name = new_name.strip().upper()
+            if not name:
+                st.warning("Please enter a variable name.")
+            elif name.startswith("EVA_"):
+                st.error(
+                    "`EVA_*` variables are managed via `apps/config_schema.py`. Add it there to get a proper widget."
+                )
+            elif name in st.session_state.field_values or name in {v.name for v in st.session_state.parsed.vars}:
+                st.warning(f"`{name}` already exists.")
+            else:
+                st.session_state.field_values[name] = ""
+                st.session_state[counter_key] += 1
+                st.rerun()
+    st.caption(
+        "Variables containing **KEY** or **URL** are placed under *API Configs*. "
+        "Everything else stays here. `EVA_*` variables cannot be added here — "
+        "add them to `.env.example` instead."
+    )
+
+
+def _render_group(group: str) -> None:
+    parsed: ParsedEnvExample = st.session_state.parsed
+
+    # Render mutex radio buttons for this group
+    for mx in MUTEX_RADIOS:
+        if mx.group == group:
+            options = mx.options
+            current = st.session_state.get(mx.state_key, mx.default)
+            idx = options.index(current) if current in options else 0
+            st.session_state[mx.state_key] = st.radio(
+                mx.label,
+                options=options,
+                index=idx,
+                horizontal=True,
+                help=mx.help,
+                key=f"radio_{mx.state_key}",
+            )
+            st.divider()
+
+    # Template vars for this group
+    group_vars = [v for v in parsed.vars if v.group == group]
+
+    # Auto-routed unmapped vars (from loaded .env, not in template)
+    all_known = set(parsed.by_name)
+    auto_names = [n for n in st.session_state.field_values if n not in all_known and _auto_group_for(n) == group]
+
+    if group == GROUP_API_CONFIGS:
+        # Sort alphabetically so KEYs and URLs cluster
+        schema_map = {v.name: v for v in group_vars}
+        for name in sorted(set(schema_map) | set(auto_names)):
+            if name in schema_map:
+                _render_annotated_var(schema_map[name])
+            else:
+                _render_unmapped_var(name)
+        _render_add_var_widget("api")
+    else:
+        for var in group_vars:
+            _render_annotated_var(var)
+        for name in auto_names:
+            _render_unmapped_var(name)
+
+    # Cross-field validation for deployments tab
+    if group == GROUP_DEPLOYMENTS:
+        deployments = st.session_state.field_values.get("EVA_MODEL_LIST") or []
+        chosen = st.session_state.field_values.get("EVA_MODEL__LLM")
+        names = {d.get("model_name") for d in deployments if isinstance(d, dict)}
+        if chosen and chosen not in names:
+            st.error(
+                f"EVA_MODEL__LLM = `{chosen}` does not match any deployment in EVA_MODEL_LIST. "
+                "Add it above or pick a different alias."
+            )
+
+
+def _render_misc_tab(parsed: ParsedEnvExample) -> None:
+    known = set(parsed.by_name)
+    truly_misc = [n for n in st.session_state.field_values if n not in known and _auto_group_for(n) is None]
+    # Also add template vars with no group assignment
+    for var in parsed.vars:
+        if var.group is None and var.name not in list(truly_misc):
+            truly_misc.append(var.name)
+
+    if not truly_misc:
+        st.info("No unmapped variables. 🎉")
+    else:
+        st.warning(
+            f"Found {len(truly_misc)} variable(s) not covered by the template. "
+            "Add them to `.env.example` for proper widgets."
+        )
+        for name in truly_misc:
+            _render_unmapped_var(name)
+    _render_add_var_widget("misc")
+
+
+# ---------------------------------------------------------------------------
+# Serialization
+# ---------------------------------------------------------------------------
+
+
+def _is_meaningful(name: str, value: Any) -> bool:
+    if name in st.session_state.get("loaded_keys", set()):
+        return True
+    if value is None:
+        return False
+    if isinstance(value, str) and value == "":
+        return False
+    if isinstance(value, bool) and value is False:
+        return False
+    if isinstance(value, (list, dict)) and len(value) == 0:
+        return False
+    if isinstance(value, (int, float)) and value == 0:
+        return False
+    return True
+
+
+def _build_serialized() -> str:
+    values = {k: v for k, v in st.session_state.field_values.items() if _is_meaningful(k, v)}
+    parsed: ParsedEnvExample = st.session_state.parsed
+    known = set(parsed.by_name)
+    # csv_list → comma-separated string for serializer
+    for var in parsed.vars:
+        if var.widget == "csv_list" and isinstance(values.get(var.name), list):
+            values[var.name] = ",".join(values[var.name])
+    # Collect current mode state for condition evaluation
+    mode_state: dict[str, str] = {}
+    for mx in MUTEX_RADIOS:
+        mode_state[mx.state_key] = st.session_state.get(mx.state_key, mx.default)
+    mode_state.update({k: str(v) for k, v in values.items() if isinstance(v, str)})
+    disabled = compute_disabled(parsed, **mode_state)
+    # Split extras by auto-routing: inline into their parent section or fall through to Misc
+    extras = {k: v for k, v in values.items() if k not in known}
+    api_extras = {k: v for k, v in extras.items() if _auto_group_for(k) == GROUP_API_CONFIGS}
+    runtime_extras = {k: v for k, v in extras.items() if _auto_group_for(k) == GROUP_RUNTIME}
+    section_extras: dict[str, dict] = {}
+    if api_extras:
+        section_extras[GROUP_API_CONFIGS] = dict(sorted(api_extras.items()))
+    if runtime_extras:
+        section_extras[GROUP_RUNTIME] = dict(sorted(runtime_extras.items()))
+    # vars with no auto-route → auto-collected into Misc by serialize_env
+    serializer_values = {k: v for k, v in values.items() if k in known or _auto_group_for(k) is None}
+    return serialize_env(serializer_values, parsed, disabled=disabled, section_extras=section_extras or None)
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+def main() -> None:
+    st.set_page_config(page_title="EVA Config Editor", layout="wide", page_icon="⚙️")
+    _init_state()
+
+    st.markdown(
+        """
+        <style>
+        div[role="tablist"] {
+            overflow-x: auto !important;
+            scrollbar-width: thin;
+            scrollbar-color: #888 transparent;
+            padding-bottom: 2px;
+        }
+        div[role="tablist"]::-webkit-scrollbar { height: 4px; }
+        div[role="tablist"]::-webkit-scrollbar-thumb { background: #888; border-radius: 2px; }
+        div[role="tablist"]::-webkit-scrollbar-track { background: transparent; }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+
+    st.title("EVA Config Editor")
+    if not ENV_PATH.exists():
+        st.info(
+            f"No `.env` file found at `{ENV_PATH.relative_to(REPO_ROOT)}`. "
+            "Fill in your values below and click **Save to .env** to create it.",
+            icon="ℹ️",
+        )
+    st.caption(
+        f"Reading variable set from `{ENV_EXAMPLE_PATH.relative_to(REPO_ROOT)}`. "
+        + (
+            f"Loaded existing values from `{ENV_PATH.relative_to(REPO_ROOT)}`."
+            if ENV_PATH.exists()
+            else "Defaults seeded from `.env.example`."
+        )
+    )
+
+    left, right = st.columns([2, 1], gap="large")
+
+    with left:
+        tabs = st.tabs(GROUPS + [GROUP_MISC])
+        for tab, group in zip(tabs[:-1], GROUPS):
+            with tab:
+                _render_group(group)
+        with tabs[-1]:
+            _render_misc_tab(st.session_state.parsed)
+
+    with right:
+        st.subheader("Preview & Save")
+        text = _build_serialized()
+        st.download_button(
+            "⬇️ Download .env",
+            data=text,
+            file_name=".env",
+            mime="text/plain",
+            width="stretch",
+        )
+        data_attr = html_module.escape(json.dumps(text), quote=True)
+        st_components.html(
+            f"""
+            <button data-content="{data_attr}"
+                onclick="navigator.clipboard.writeText(JSON.parse(this.dataset.content)).then(()=>{{
+                    this.textContent='✅ Copied!';
+                    setTimeout(()=>this.textContent='📋 Copy to clipboard',1500);
+                }})"
+                style="width:100%;padding:0.4rem 0.8rem;font-size:0.875rem;
+                    border:1px solid #d1d5db;border-radius:0.375rem;background:#fff;
+                    cursor:pointer;font-family:inherit;">
+              📋 Copy to clipboard
+            </button>
+            """,
+            height=42,
+        )
+        if st.button("💾 Save to .env", width="stretch", type="primary"):
+            ENV_PATH.write_text(text)
+            st.success(f"Wrote {ENV_PATH}")
+        if st.button("👁️ View preview", width="stretch"):
+            _show_preview(text)
+
+
+@st.dialog("Preview .env", width="large")
+def _show_preview(text: str) -> None:
+    st.code(text, language="ini")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/apps/config_io.py b/apps/config_io.py
new file mode 100644
index 00000000..dbf53f0b
--- /dev/null
+++ b/apps/config_io.py
@@ -0,0 +1,391 @@
+"""Parsing and serialization helpers for the EVA config editor.
+
+Annotation prefix scheme for .env.example:
+
+  # <text>     True comment — ignored by editor, preserved verbatim.
+  #i <text>    Info/tooltip text for the following variable.
+  #d <type>    Widget datatype: secret|bool|int|float|string|path|enum|
+               multi_enum|csv_list|json_object|json_deployment_list
+  #e <opts>    Comma-separated enum options for enum/multi_enum.
+  #r <range>   Numeric range: min,max  or  min,max,step
+  #g <group>   Override tab/group assignment for this variable.
+  #x <cond>    Visibility condition VAR=value (AND semantics; multiple lines ok).
+  #v <var=val> Inactive variable definition (off by default, fully configurable).
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
+
+
+@dataclass
+class AnnotatedVar:
+    name: str
+    is_active: bool  # False = declared with #v
+    example_value: str  # raw default from file
+    widget: str  # from #d or inferred
+    info: str  # from #i lines (joined)
+    options: list[str]  # from #e
+    range: tuple[float, ...] | None  # (min, max[, step]) from #r
+    group: str | None  # from #g or section header
+    conditions: list[tuple[str, str]]  # from #x lines (AND semantics)
+    line_start: int
+    line_end: int
+
+
+@dataclass
+class ParsedEnvExample:
+    lines: list[str]
+    vars: list[AnnotatedVar]
+    by_name: dict[str, AnnotatedVar] = field(default_factory=dict)
+
+    def __post_init__(self) -> None:
+        if not self.by_name:
+            self.by_name = {v.name: v for v in self.vars}
+
+    # ── back-compat shim so old tests still compile ───────────────────────
+    @property
+    def specs(self) -> list[AnnotatedVar]:
+        return self.vars
+
+
+def _is_section_rule(line: str) -> bool:
+    s = line.strip()
+    return bool(re.match(r"^\s*#\s*={3,}\s*$", s))
+
+
+def _consume_quoted_continuation(lines: list[str], start_idx: int, value_head: str) -> int:
+    """If value_head opens an unterminated single/double-quoted string, scan forward."""
+    stripped = value_head.strip()
+    if not stripped:
+        return start_idx
+    quote = stripped[0]
+    if quote not in ("'", '"'):
+        return start_idx
+    rest = stripped[1:]
+    if quote in rest:
+        return start_idx
+    for j in range(start_idx + 1, len(lines)):
+        if quote in lines[j]:
+            return j
+    return len(lines) - 1
+
+
+def _infer_widget(name: str, value: str) -> str:
+    """Best-effort widget type from variable name and example value."""
+    n = name.upper()
+    v = value.strip().lower()
+    if any(x in n for x in ("KEY", "SECRET", "TOKEN", "PASSWORD")):
+        return "secret"
+    if "CREDENTIALS" in n or n.endswith("_PATH") or n.endswith("_DIR"):
+        return "path"
+    if v in ("true", "false"):
+        return "bool"
+    raw = v.strip("'\"")
+    if raw.startswith("["):
+        return "json_deployment_list" if "model_name" in raw else "json_object"
+    if raw.startswith("{"):
+        return "json_object"
+    try:
+        int(raw)
+        return "int"
+    except ValueError:
+        pass
+    try:
+        float(raw)
+        return "float"
+    except ValueError:
+        pass
+    return "string"
+
+
+def parse_env_example(path: str | Path) -> ParsedEnvExample:
+    """Parse a .env.example file that uses the annotation prefix scheme."""
+    text = Path(path).read_text()
+    raw_lines = text.splitlines(keepends=False)
+
+    vars_list: list[AnnotatedVar] = []
+    seen: set[str] = set()
+    current_section: str | None = None
+
+    ann_info: list[str] = []
+    ann_widget: str | None = None
+    ann_options: list[str] = []
+    ann_range: tuple[float, ...] | None = None
+    ann_group: str | None = None
+    ann_conditions: list[tuple[str, str]] = []
+
+    def reset_ann() -> None:
+        nonlocal ann_info, ann_widget, ann_options, ann_range, ann_group, ann_conditions
+        ann_info = []
+        ann_widget = None
+        ann_options = []
+        ann_range = None
+        ann_group = None
+        ann_conditions = []
+
+    def emit_var(name: str, is_active: bool, value_head: str, line_start: int) -> int:
+        end_idx = _consume_quoted_continuation(raw_lines, line_start, value_head)
+        raw_value = (
+            "\n".join([value_head, *raw_lines[line_start + 1 : end_idx + 1]]) if end_idx > line_start else value_head
+        )
+        widget = ann_widget or _infer_widget(name, raw_value)
+        vars_list.append(
+            AnnotatedVar(
+                name=name,
+                is_active=is_active,
+                example_value=raw_value,
+                widget=widget,
+                info=" ".join(ann_info),
+                options=list(ann_options),
+                range=ann_range,
+                group=ann_group or current_section,
+                conditions=list(ann_conditions),
+                line_start=line_start,
+                line_end=end_idx,
+            )
+        )
+        seen.add(name)
+        reset_ann()
+        return end_idx
+
+    i = 0
+    while i < len(raw_lines):
+        line = raw_lines[i]
+        stripped = line.strip()
+
+        # Section header block (# ===...=== / # Title / # ===...===)
+        if _is_section_rule(line):
+            if i + 1 < len(raw_lines):
+                inner = raw_lines[i + 1].lstrip("#").strip()
+                if inner and not _is_section_rule(raw_lines[i + 1]):
+                    current_section = inner
+            reset_ann()
+            j = i + 1
+            while j < len(raw_lines) and not _is_section_rule(raw_lines[j]):
+                j += 1
+            i = j + 1 if j < len(raw_lines) else j
+            continue
+
+        # Annotation lines — accumulate until next variable or reset
+        if stripped.startswith("#i "):
+            ann_info.append(stripped[3:].strip())
+            i += 1
+            continue
+        if stripped.startswith("#d "):
+            ann_widget = stripped[3:].strip()
+            i += 1
+            continue
+        if stripped.startswith("#e "):
+            ann_options = [o.strip() for o in stripped[3:].split(",") if o.strip()]
+            i += 1
+            continue
+        if stripped.startswith("#r "):
+            parts = [p.strip() for p in stripped[3:].split(",")]
+            try:
+                ann_range = tuple(float(p) for p in parts[:3])  # type: ignore[assignment]
+            except ValueError:
+                pass
+            i += 1
+            continue
+        if stripped.startswith("#g "):
+            ann_group = stripped[3:].strip()
+            i += 1
+            continue
+        if stripped.startswith("#x "):
+            cond = stripped[3:].strip()
+            if "=" in cond:
+                k, _, v = cond.partition("=")
+                ann_conditions.append((k.strip(), v.strip()))
+            i += 1
+            continue
+
+        # Inactive variable: #v NAME=value
+        if stripped.startswith("#v "):
+            rest = stripped[3:].strip()
+            if "=" in rest:
+                name, _, value_head = rest.partition("=")
+                name = name.strip()
+                if _NAME_RE.match(name) and name not in seen:
+                    end_idx = emit_var(name, False, value_head, i)
+                    i = end_idx + 1
+                    continue
+            reset_ann()
+            i += 1
+            continue
+
+        # Active variable: NAME=value  (no leading #)
+        if not stripped.startswith("#") and "=" in stripped:
+            name, _, value_head = stripped.partition("=")
+            name = name.strip()
+            if _NAME_RE.match(name) and name not in seen:
+                end_idx = emit_var(name, True, value_head, i)
+                i = end_idx + 1
+                continue
+
+        # True comment or blank — reset annotation accumulator
+        reset_ann()
+        i += 1
+
+    return ParsedEnvExample(lines=raw_lines, vars=vars_list)
+
+
+def load_env(path: str | Path) -> dict[str, str]:
+    """Read an existing .env into a flat {NAME: value} dict.
+
+    Commented-out lines (including #v lines) are skipped.
+    Values have surrounding quotes stripped.
+    """
+    p = Path(path)
+    if not p.exists():
+        return {}
+    out: dict[str, str] = {}
+    i = 0
+    lines = p.read_text().splitlines(keepends=False)
+    while i < len(lines):
+        line = lines[i]
+        stripped = line.strip()
+        if stripped.startswith("#") or not stripped:
+            i += 1
+            continue
+        if "=" in stripped:
+            name, _, value_head = stripped.partition("=")
+            name = name.strip()
+            if _NAME_RE.match(name):
+                end_idx = _consume_quoted_continuation(lines, i, value_head)
+                raw = "\n".join([value_head, *lines[i + 1 : end_idx + 1]]) if end_idx > i else value_head
+                out[name] = _unquote(raw.strip())
+                i = end_idx + 1
+                continue
+        i += 1
+    return out
+
+
+def _unquote(value: str) -> str:
+    if len(value) >= 2 and value[0] == value[-1] and value[0] in ("'", '"'):
+        return value[1:-1]
+    return value
+
+
+def _format_value(value: Any) -> str:
+    if isinstance(value, bool):
+        return "true" if value else "false"
+    if isinstance(value, (int, float)):
+        return str(value)
+    if isinstance(value, (dict, list)):
+        return f"'{json.dumps(value)}'"
+    s = str(value)
+    if not s:
+        return ""
+    if any(c in s for c in (" ", "\t", "#", "'", '"', "$", "\n")):
+        if "'" not in s:
+            return f"'{s}'"
+        return json.dumps(s)
+    return s
+
+
+def _has_value(v: Any) -> bool:
+    if v is None:
+        return False
+    if isinstance(v, str) and v == "":
+        return False
+    if isinstance(v, (list, dict)) and len(v) == 0:
+        return False
+    return True
+
+
+def serialize_env(
+    values: dict[str, Any],
+    parsed: ParsedEnvExample,
+    disabled: set[str] | None = None,
+    section_extras: dict[str, dict[str, Any]] | None = None,
+) -> str:
+    """Produce a .env text using parsed as the structural template.
+
+    - Variables in values with a user-set entry → emitted as NAME=value (active).
+    - Variables in disabled with a value → emitted as #v NAME=value (inactive, value preserved).
+    - Everything else → original line(s) from the template verbatim.
+    - section_extras: {section_title: {name: value}} injected inline at the end of each
+      named section (just before the next section header starts).
+    - Any values not in the template and not in section_extras are auto-appended as Misc.
+    """
+    disabled = disabled or set()
+    section_extras = section_extras or {}
+    out: list[str] = []
+    handled: set[str] = set()
+    var_by_start = {v.line_start: v for v in parsed.vars}
+    current_section: str | None = None
+
+    def _flush_extras(section: str | None) -> None:
+        if not section or section not in section_extras:
+            return
+        for name, val in section_extras[section].items():
+            if _has_value(val):
+                out.append(f"{name}={_format_value(val)}")
+
+    i = 0
+    while i < len(parsed.lines):
+        line = parsed.lines[i]
+
+        # Detect the opening rule of a new section (rule whose next line is the title)
+        if _is_section_rule(line):
+            next_line = parsed.lines[i + 1] if i + 1 < len(parsed.lines) else ""
+            next_content = next_line.lstrip("#").strip()
+            if next_content and not _is_section_rule(next_line):
+                # Flush extras for the section we're leaving before writing the new header
+                _flush_extras(current_section)
+                current_section = next_content
+
+        if i in var_by_start:
+            var = var_by_start[i]
+            user_value = values.get(var.name)
+            if var.name in disabled:
+                if _has_value(user_value):
+                    out.append(f"#v {var.name}={_format_value(user_value)}")
+                else:
+                    out.append(f"#v {var.name}={var.example_value.strip()}")
+            elif _has_value(user_value):
+                out.append(f"{var.name}={_format_value(user_value)}")
+            else:
+                out.extend(parsed.lines[var.line_start : var.line_end + 1])
+            handled.add(var.name)
+            i = var.line_end + 1
+            continue
+
+        out.append(line)
+        i += 1
+
+    # Flush extras for the final section
+    _flush_extras(current_section)
+
+    # Auto-collect any values not in the template into a Misc section
+    extras = [name for name in values if name not in handled and _has_value(values[name])]
+    if extras:
+        out.append("")
+        out.append("# ==============================================")
+        out.append("# Misc / Unmapped (added by config editor)")
+        out.append("# ==============================================")
+        for name in extras:
+            out.append(f"{name}={_format_value(values[name])}")
+
+    return "\n".join(out) + "\n"
+
+
+def compute_disabled(parsed: ParsedEnvExample, **state_values: str) -> set[str]:
+    """Return names of vars whose #x conditions are not all satisfied.
+
+    Pass mode keys as kwargs, e.g. compute_disabled(parsed, pipeline_mode="LLM").
+    """
+    disabled: set[str] = set()
+    for var in parsed.vars:
+        for cond_key, cond_val in var.conditions:
+            if state_values.get(cond_key, "") != cond_val:
+                disabled.add(var.name)
+                break
+    return disabled
diff --git a/apps/config_schema.py b/apps/config_schema.py
new file mode 100644
index 00000000..1d0ce172
--- /dev/null
+++ b/apps/config_schema.py
@@ -0,0 +1,69 @@
+"""Schema constants for the EVA config editor.
+
+Variable metadata (widget types, options, ranges, tooltips, conditions) is
+now encoded directly in .env.example using annotation prefixes (#i, #d, #e,
+#r, #g, #x, #v).  This module retains only things that are inherently
+editor-behaviour rather than file-structure:
+
+- Tab group name constants and ordering.
+- Mutex radio-button definitions (pipeline mode, perturbation mode).
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+GROUP_API_CONFIGS = "API Configs"
+GROUP_VOICE_PIPELINE = "Voice Pipeline"
+GROUP_DEPLOYMENTS = "LiteLLM Deployments"
+GROUP_RUNTIME = "Framework & Runtime"
+GROUP_TURN = "Turn Detection & VAD"
+GROUP_PERTURBATIONS = "User Config"
+GROUP_DEBUG = "Debug & Logging"
+GROUP_MISC = "Misc / Unmapped"
+
+GROUPS: list[str] = [
+    GROUP_API_CONFIGS,
+    GROUP_VOICE_PIPELINE,
+    GROUP_DEPLOYMENTS,
+    GROUP_RUNTIME,
+    GROUP_TURN,
+    GROUP_PERTURBATIONS,
+    GROUP_DEBUG,
+]
+
+
+@dataclass
+class MutexRadio:
+    """A UI radio button that enforces mutual exclusion among a set of vars."""
+
+    state_key: str  # st.session_state key managed by this radio
+    group: str  # which tab renders this radio
+    label: str
+    options: list[str]
+    help: str = ""
+    default: str = field(default="")
+
+    def __post_init__(self) -> None:
+        if not self.default and self.options:
+            self.default = self.options[0]
+
+
+MUTEX_RADIOS: list[MutexRadio] = [
+    MutexRadio(
+        state_key="pipeline_mode",
+        group=GROUP_VOICE_PIPELINE,
+        label="Pipeline mode",
+        options=["LLM", "S2S", "AudioLLM"],
+        help="LLM = STT+LLM+TTS. S2S = speech-to-speech model. AudioLLM = audio-input LLM + TTS.",
+        default="LLM",
+    ),
+    MutexRadio(
+        state_key="perturbation_mode",
+        group=GROUP_PERTURBATIONS,
+        label="Perturbation persona",
+        options=["None", "Accent", "Behavior"],
+        help="Accent and Behavior are mutually exclusive (each claims the agent ID slot).",
+        default="None",
+    ),
+]
diff --git a/tests/unit/test_config_editor_integration.py b/tests/unit/test_config_editor_integration.py
new file mode 100644
index 00000000..08968efe
--- /dev/null
+++ b/tests/unit/test_config_editor_integration.py
@@ -0,0 +1,107 @@
+"""End-to-end check.
+
+A config built via the editor's serializer must construct a valid RunConfig
+for each pipeline mode.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from unittest.mock import patch
+
+from apps.config_io import compute_disabled, parse_env_example, serialize_env
+from eva.models.config import RunConfig
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+
+_MODEL_LIST = [
+    {
+        "model_name": "gpt-5.2",
+        "litellm_params": {"model": "openai/gpt-5.2", "api_key": "sk-test", "max_parallel_requests": 5},
+        "model_info": {"base_model": "gpt-5.2"},
+    },
+    {
+        "model_name": "gemini-3.1-pro-preview",
+        "litellm_params": {
+            "model": "vertex_ai/gemini-3.1-pro-preview",
+            "vertex_project": "p",
+            "vertex_location": "global",
+            "vertex_credentials": "/tmp/x.json",
+            "max_parallel_requests": 5,
+        },
+    },
+    {
+        "model_name": "us.anthropic.claude-opus-4-6",
+        "litellm_params": {
+            "model": "bedrock/us.anthropic.claude-opus-4-6-v1",
+            "aws_access_key_id": "k",
+            "aws_secret_access_key": "s",
+            "max_parallel_requests": 5,
+        },
+    },
+]
+
+
+def _serialize(values: dict, parsed, pipeline_mode: str = "LLM", perturbation_mode: str = "None") -> str:
+    disabled = compute_disabled(parsed, pipeline_mode=pipeline_mode, perturbation_mode=perturbation_mode)
+    return serialize_env(values, parsed, disabled=disabled)
+
+
+def _load_isolated(env_file: Path) -> RunConfig:
+    with patch.dict(os.environ, {"PATH": os.environ["PATH"]}, clear=True):
+        return RunConfig(_env_file=env_file, _cli_parse_args=False)
+
+
+def test_llm_pipeline_serialization_constructs_runconfig(tmp_path: Path) -> None:
+    parsed = parse_env_example(REPO_ROOT / ".env.example")
+    values = {
+        "EVA_MODEL_LIST": _MODEL_LIST,
+        "EVA_MODEL__LLM": "gpt-5.2",
+        "EVA_MODEL__STT": "deepgram",
+        "EVA_MODEL__TTS": "cartesia",
+        "EVA_MODEL__STT_PARAMS": {"api_key": "k", "model": "nova-2"},
+        "EVA_MODEL__TTS_PARAMS": {"api_key": "k", "model": "sonic"},
+        "EVA_DOMAIN": "airline",
+    }
+    env_file = tmp_path / ".env"
+    env_file.write_text(_serialize(values, parsed))
+    config = _load_isolated(env_file)
+    assert config.model.llm == "gpt-5.2"
+    assert config.model.stt == "deepgram"
+    assert config.model.tts == "cartesia"
+    assert config.domain == "airline"
+
+
+def test_s2s_pipeline_serialization_constructs_runconfig(tmp_path: Path) -> None:
+    parsed = parse_env_example(REPO_ROOT / ".env.example")
+    values = {
+        "EVA_MODEL_LIST": _MODEL_LIST,
+        "EVA_MODEL__S2S": "gpt-realtime-mini",
+        "EVA_MODEL__S2S_PARAMS": {"api_key": "k", "model": "gpt-realtime-mini"},
+        "EVA_DOMAIN": "airline",
+    }
+    env_file = tmp_path / ".env"
+    env_file.write_text(_serialize(values, parsed, pipeline_mode="S2S"))
+    config = _load_isolated(env_file)
+    assert config.model.s2s == "gpt-realtime-mini"
+
+
+def test_perturbation_accent_serialization_constructs_runconfig(tmp_path: Path) -> None:
+    parsed = parse_env_example(REPO_ROOT / ".env.example")
+    values = {
+        "EVA_MODEL_LIST": _MODEL_LIST,
+        "EVA_MODEL__LLM": "gpt-5.2",
+        "EVA_MODEL__STT": "deepgram",
+        "EVA_MODEL__TTS": "cartesia",
+        "EVA_MODEL__STT_PARAMS": {"api_key": "k", "model": "nova-2"},
+        "EVA_MODEL__TTS_PARAMS": {"api_key": "k", "model": "sonic"},
+        "EVA_DOMAIN": "airline",
+        "EVA_PERTURBATION__ACCENT": "french",
+    }
+    env_file = tmp_path / ".env"
+    env_file.write_text(_serialize(values, parsed, perturbation_mode="Accent"))
+    config = _load_isolated(env_file)
+    assert config.perturbation is not None
+    assert config.perturbation.accent == "french"
+    assert config.perturbation.behavior is None
diff --git a/tests/unit/test_config_io.py b/tests/unit/test_config_io.py
new file mode 100644
index 00000000..77d0baed
--- /dev/null
+++ b/tests/unit/test_config_io.py
@@ -0,0 +1,156 @@
+"""Unit tests for apps/config_io.py (annotation-aware env parser/serializer)."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from apps.config_io import load_env, parse_env_example, serialize_env
+from apps.config_schema import GROUP_MISC
+
+ENV_EXAMPLE = Path(__file__).resolve().parents[2] / ".env.example"
+
+
+def test_parses_active_var() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    spec = parsed.by_name["ELEVENLABS_API_KEY"]
+    assert spec.is_active is True
+    assert spec.widget == "secret"
+    assert "ElevenLabs" in spec.info
+
+
+def test_parses_inactive_var() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    spec = parsed.by_name["EVA_DOMAIN"]
+    assert spec.is_active is False
+    assert spec.widget == "enum"
+    assert "airline" in spec.options
+
+
+def test_parses_enum_options() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    spec = parsed.by_name["EVA_MODEL__STT"]
+    assert "deepgram" in spec.options
+    assert "cartesia" in spec.options
+
+
+def test_parses_range() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    spec = parsed.by_name["EVA_MAX_CONCURRENT_CONVERSATIONS"]
+    assert spec.range is not None
+    assert spec.range[0] == 1.0
+    assert spec.range[1] == 100.0
+
+
+def test_parses_condition() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    spec = parsed.by_name["EVA_MODEL__STT"]
+    assert ("pipeline_mode", "LLM") in spec.conditions
+
+
+def test_parses_multi_condition() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    spec = parsed.by_name["EVA_FRENCH_ACCENT_USER_F"]
+    assert ("perturbation_mode", "Accent") in spec.conditions
+    assert ("EVA_PERTURBATION__ACCENT", "french") in spec.conditions
+
+
+def test_group_from_section_header() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    assert parsed.by_name["ELEVENLABS_API_KEY"].group == "API Configs"
+    assert parsed.by_name["EVA_MODEL__LLM"].group == "LiteLLM Deployments"
+
+
+def test_dedupes_repeated_names() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    occurrences = [v for v in parsed.vars if v.name == "EVA_METRICS"]
+    assert len(occurrences) == 1
+
+
+def test_multiline_deployment_list() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    spec = parsed.by_name["EVA_MODEL_LIST"]
+    assert spec.widget == "json_deployment_list"
+    assert spec.line_end > spec.line_start
+
+
+def test_serialize_with_no_values_is_byte_identical_to_example() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    rendered = serialize_env({}, parsed)
+    original = ENV_EXAMPLE.read_text()
+    if not original.endswith("\n"):
+        original += "\n"
+    assert rendered == original
+
+
+def test_serialize_overrides_active_var(tmp_path: Path) -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    rendered = serialize_env({"OPENAI_API_KEY": "sk-test-123"}, parsed)
+    assert "OPENAI_API_KEY=sk-test-123" in rendered
+    assert "your_openai_api_key_here" not in rendered
+
+
+def test_serialize_activates_inactive_var() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    rendered = serialize_env({"EVA_DOMAIN": "airline"}, parsed)
+    lines = rendered.splitlines()
+    assert "EVA_DOMAIN=airline" in lines
+    assert "#v EVA_DOMAIN=airline" not in lines
+
+
+def test_serialize_json_blob_single_quoted() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    deployments = [{"model_name": "x", "litellm_params": {"model": "openai/x"}}]
+    rendered = serialize_env({"EVA_MODEL_LIST": deployments}, parsed)
+    assert "EVA_MODEL_LIST='" in rendered
+
+
+def test_serialize_bool_lowercases() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    rendered = serialize_env({"EVA_DEBUG": True}, parsed)
+    assert "EVA_DEBUG=true" in rendered
+
+
+def test_serialize_appends_misc_section_for_unknown_vars() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    rendered = serialize_env({"EVA_TOTALLY_NEW_VAR": "hello"}, parsed)
+    assert GROUP_MISC in rendered
+    assert "EVA_TOTALLY_NEW_VAR=hello" in rendered
+
+
+def test_serialize_disabled_var_uses_current_value() -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    rendered = serialize_env(
+        {"EVA_MODEL__STT": "deepgram"},
+        parsed,
+        disabled={"EVA_MODEL__STT"},
+    )
+    assert "#v EVA_MODEL__STT=deepgram" in rendered
+
+
+def test_load_env_reads_existing_file(tmp_path: Path) -> None:
+    p = tmp_path / ".env"
+    p.write_text("FOO=bar\n#v COMMENTED=skipme\nQUOTED='hello world'\nJSON='[{\"a\": 1}]'\n")
+    out = load_env(p)
+    assert out == {"FOO": "bar", "QUOTED": "hello world", "JSON": '[{"a": 1}]'}
+
+
+def test_load_env_missing_file_returns_empty(tmp_path: Path) -> None:
+    assert load_env(tmp_path / "does-not-exist") == {}
+
+
+def test_round_trip_through_load_env(tmp_path: Path) -> None:
+    parsed = parse_env_example(ENV_EXAMPLE)
+    written = serialize_env(
+        {
+            "OPENAI_API_KEY": "sk-abc",
+            "EVA_DEBUG": True,
+            "EVA_MAX_CONCURRENT_CONVERSATIONS": 8,
+        },
+        parsed,
+    )
+    p = tmp_path / ".env"
+    p.write_text(written)
+    loaded = load_env(p)
+    assert loaded["OPENAI_API_KEY"] == "sk-abc"
+    assert loaded["EVA_DEBUG"] == "true"
+    assert loaded["EVA_MAX_CONCURRENT_CONVERSATIONS"] == "8"