From bd0fc29206002480db6e1b08bc4277cbb5450eea Mon Sep 17 00:00:00 2001 From: Omkar Gaikwad Date: Fri, 10 Apr 2026 08:43:38 +0000 Subject: [PATCH 1/4] feat: add Dockerfile and Cloud Build pipeline for automated Evalbench testing of Cloud SQL PostgreSQL extension --- Dockerfile | 36 +++++++++++++++++++++++ cloudbuild.yaml | 64 +++++++++++++++++++++++++++++++++++++++++ evals/dataset.json | 15 ++++++++++ evals/model_config.yaml | 18 ++++++++++++ evals/run_config.yaml | 12 ++++++++ 5 files changed, 145 insertions(+) create mode 100644 Dockerfile create mode 100644 cloudbuild.yaml create mode 100644 evals/dataset.json create mode 100644 evals/model_config.yaml create mode 100644 evals/run_config.yaml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..39e5b3c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +# --- Stage 1: Build the binary from source (Latest Nightly) --- +FROM golang:1.25 AS builder + +WORKDIR /build + +# Clone the official genai-toolbox source code (always latest main branch) +RUN git clone --depth 1 https://github.com/googleapis/genai-toolbox.git . + +# Compile the binary with CGO ENABLED to support all upstream database drivers (Oracle, etc.) +RUN CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o toolbox . + +# --- Stage 2: Final Lightweight Runtime Image --- +# Using the exact same image (golang:1.25) for runtime to perfectly match GLIBC versions +FROM golang:1.25 + + +# Install necessary runtime certificates and standard C libraries for CGO binary +RUN apt-get update && apt-get install -y ca-certificates libc6 && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Copy the freshly compiled binary from the builder stage +COPY --from=builder /build/toolbox /app/toolbox +RUN chmod +x /app/toolbox + +# Copy the extension's skills and configuration into the container +COPY skills/ ./skills/ +COPY gemini-extension.json . + +# Add required tools.yaml placeholder to satisfy binary startup checks +RUN touch tools.yaml + +# Expose HTTP API and UI endpoints to successfully pass Cloud Run health checks +ENTRYPOINT ["/app/toolbox", "--prebuilt", "cloud-sql-postgres", "--address=0.0.0.0", "--port=8080", "--enable-api", "--ui"] + + diff --git a/cloudbuild.yaml b/cloudbuild.yaml new file mode 100644 index 0000000..44bb5ff --- /dev/null +++ b/cloudbuild.yaml @@ -0,0 +1,64 @@ +steps: + + # --- STEP 1: Build and Push Docker Image --- + - name: 'gcr.io/cloud-builders/docker' + args: + - 'build' + - '-t' + - 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest' + - '.' + + - name: 'gcr.io/cloud-builders/docker' + args: + - 'push' + - 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest' + + # --- STEP 2: Deploy to Cloud Run --- + - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk' + entrypoint: gcloud + args: + - 'run' + - 'deploy' + - 'cloud-sql-postgresql-server' + - '--image=us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest' + - '--region=us-central1' + - '--allow-unauthenticated' + - '--port=8080' + - '--timeout=300' + - '--set-env-vars=CLOUD_SQL_POSTGRES_PROJECT=omkar-playground,CLOUD_SQL_POSTGRES_INSTANCE=omkar-demo-postgres-1,CLOUD_SQL_POSTGRES_REGION=us-central1,CLOUD_SQL_POSTGRES_DATABASE=postgres,CLOUD_SQL_POSTGRES_USER=postgres,CLOUD_SQL_POSTGRES_PASSWORD=7`[EP^`U"_frcD;q,CLOUD_SQL_POSTGRES_IP_TYPE=PUBLIC' + + # --- STEP 3: Run Eval Server in Background --- + - name: 'gcr.io/cloud-builders/docker' + args: + - 'run' + - '-d' + - '--network=cloudbuild' + - '--name=eval_server' + - 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/eval_server:latest' + + # --- STEP 4: Run Evalbench Evaluation Client --- + # - name: 'python:3.10' + # entrypoint: 'bash' + # args: + # - '-c' + # - | + # # Clone Evalbench + # git clone https://github.com/GoogleCloudPlatform/evalbench.git + # cd evalbench + + # # Install Dependencies + # pip install -r requirements.txt + + # # Setup Environment Variables + # export EVAL_GCP_PROJECT_ID=omkar-playground + # export EVAL_GCP_PROJECT_REGION=us-central1 + # export EVAL_CONFIG=../evals/run_config.yaml + + # # Compile required protobuf modules and Run Evaluation Client against the eval_server container + # make proto + # ./run_client.sh --endpoint=eval_server:50051 + + +options: + env: + - 'DOCKER_BUILDKIT=1' diff --git a/evals/dataset.json b/evals/dataset.json new file mode 100644 index 0000000..42af644 --- /dev/null +++ b/evals/dataset.json @@ -0,0 +1,15 @@ +{ + "scenarios": [ + { + "id": "cloud-sql-debug-01", + "starting_prompt": "I need to debug the database.", + "conversation_plan": "Ask the agent to list instances in project omkar-playground. Once listed, ask it to check the CPU usage of the first instance. Finally, ask if that usage is considered high.", + "expected_trajectory": [ + "list_instances", + "get_metrics" + ], + "kind": "tool", + "max_turns": 15 + } + ] +} \ No newline at end of file diff --git a/evals/model_config.yaml b/evals/model_config.yaml new file mode 100644 index 0000000..dbb2dc5 --- /dev/null +++ b/evals/model_config.yaml @@ -0,0 +1,18 @@ +gemini_cli_version: "@google/gemini-cli@0.26.0" +generator: gemini_cli +env: + GOOGLE_CLOUD_PROJECT: "omkar-playground" + GOOGLE_CLOUD_LOCATION: "us-central1" + GOOGLE_GENAI_USE_VERTEXAI: "true" + GEMINI_API_MODEL: "gemini-2.5-pro" +setup: + extensions: + "https://github.com/gemini-cli-extensions/cloud-sql-postgresql": + settings: + CLOUD_SQL_POSTGRES_PROJECT: "omkar-playground" + CLOUD_SQL_POSTGRES_INSTANCE: "omkar-demo-postgres-1" + CLOUD_SQL_POSTGRES_REGION: "us-central1" + CLOUD_SQL_POSTGRES_DATABASE: "postgres" + CLOUD_SQL_POSTGRES_USER: "postgres" + CLOUD_SQL_POSTGRES_PASSWORD: '7`[EP^`U"_frcD;q' + CLOUD_SQL_POSTGRES_IP_TYPE: "PUBLIC" diff --git a/evals/run_config.yaml b/evals/run_config.yaml new file mode 100644 index 0000000..a631de9 --- /dev/null +++ b/evals/run_config.yaml @@ -0,0 +1,12 @@ +dataset_config: /workspace/evals/dataset.json +dataset_format: gemini-cli-format + +orchestrator: geminicli +model_config: /workspace/evals/model_config.yaml +# You can reference default simulated user models provided by the evalbench repo: +simulated_user_model_config: datasets/model_configs/gemini_2.5_pro_model.yaml + +scorers: + trajectory_matcher: {} + goal_completion: + model_config: datasets/model_configs/gemini_2.5_pro_model.yaml From bbfd3305dcf027fd290e6b1ce8efe7c95c965372 Mon Sep 17 00:00:00 2001 From: Omkar Gaikwad Date: Mon, 13 Apr 2026 10:22:54 +0000 Subject: [PATCH 2/4] feat: integrate full evaluation pipeline in cloudbuild and update model configurations --- cloudbuild.yaml | 96 ++++++++++++++++++++++----------- evals/dataset.json | 3 +- evals/gemini_2.5_pro_model.yaml | 4 ++ evals/model_config.yaml | 2 +- evals/run_config.yaml | 4 +- 5 files changed, 73 insertions(+), 36 deletions(-) create mode 100644 evals/gemini_2.5_pro_model.yaml diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 44bb5ff..90bbcd8 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -25,40 +25,74 @@ steps: - '--allow-unauthenticated' - '--port=8080' - '--timeout=300' - - '--set-env-vars=CLOUD_SQL_POSTGRES_PROJECT=omkar-playground,CLOUD_SQL_POSTGRES_INSTANCE=omkar-demo-postgres-1,CLOUD_SQL_POSTGRES_REGION=us-central1,CLOUD_SQL_POSTGRES_DATABASE=postgres,CLOUD_SQL_POSTGRES_USER=postgres,CLOUD_SQL_POSTGRES_PASSWORD=7`[EP^`U"_frcD;q,CLOUD_SQL_POSTGRES_IP_TYPE=PUBLIC' + - '--set-env-vars=CLOUD_SQL_POSTGRES_PROJECT=omkar-playground,CLOUD_SQL_POSTGRES_INSTANCE=omkar-demo-postgres-1,CLOUD_SQL_POSTGRES_REGION=us-central1,CLOUD_SQL_POSTGRES_DATABASE=postgres,CLOUD_SQL_POSTGRES_USER=postgres,CLOUD_SQL_POSTGRES_PASSWORD=[PASSWORD],CLOUD_SQL_POSTGRES_IP_TYPE=PUBLIC' - # --- STEP 3: Run Eval Server in Background --- - - name: 'gcr.io/cloud-builders/docker' + # --- STEP 3: Fully Integrated Evaluation to Persist Results --- + - name: 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/eval_server:latest' + entrypoint: 'bash' args: - - 'run' - - '-d' - - '--network=cloudbuild' - - '--name=eval_server' - - 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/eval_server:latest' + - '-c' + - | + set -e + cd /evalbench + + export EVAL_GCP_PROJECT_ID=omkar-playground + export EVAL_GCP_PROJECT_REGION=us-central1 + + echo "Compiling protobuf files..." + python3 -m grpc_tools.protoc --proto_path=evalbench/evalproto --python_out=evalbench/evalproto --grpc_python_out=evalbench/evalproto evalbench/evalproto/*.proto + + echo "Patching client to use insecure credentials..." + # sed -i 's/"localhost:50051"/"127.0.0.1:50051"/g' evalbench/client/eval_client.py + sed -i 's/grpc.alts_channel_credentials()/None/g' evalbench/client/eval_client.py + sed -i 's/grpc.aio.secure_channel(address, channel_creds)/grpc.aio.insecure_channel(address)/g' evalbench/client/eval_client.py + + echo "Patching server to listen on all IPv4 interfaces (0.0.0.0)..." + sed -i 's/"\[::\]:%s"/"0.0.0.0:%s"/g' /evalbench/evalbench/eval_server.py + echo "Checking bind success in server (writing to stderr)..." + sed -i 's|server.add_insecure_port("0.0.0.0:%s" % PORT)|bound_port = server.add_insecure_port("0.0.0.0:%s" % PORT)\n import sys\n sys.stderr.write(f"BOUND_PORT: {bound_port}\\n")\n if bound_port == 0: raise RuntimeError("Failed to bind to port!")|' /evalbench/evalbench/eval_server.py - # --- STEP 4: Run Evalbench Evaluation Client --- - # - name: 'python:3.10' - # entrypoint: 'bash' - # args: - # - '-c' - # - | - # # Clone Evalbench - # git clone https://github.com/GoogleCloudPlatform/evalbench.git - # cd evalbench + echo "Patching eval_service.py to fix TypeError in get_reporters..." + sed -i 's|reporters = get_reporters(config.get("reporting"), job_id, run_time)|reporters = get_reporters(config.get("reporting") or {}, job_id, run_time)|' /evalbench/evalbench/eval_service.py + + echo "Patching util/session.py to make ADK import lazy..." + sed -i 's|from google.adk.sessions import VertexAiSessionService||' /evalbench/evalbench/util/session.py + sed -i 's| def __init__(self, config):| def __init__(self, config):\n from google.adk.sessions import VertexAiSessionService|' /evalbench/evalbench/util/session.py + echo "Patching databases/util.py to make SecretManagerClient lazy..." + sed -i 's|CLIENT = secretmanager_v1.SecretManagerServiceClient()|CLIENT = None\ndef get_client():\n global CLIENT\n if CLIENT is None:\n CLIENT = secretmanager_v1.SecretManagerServiceClient()\n return CLIENT|' /evalbench/evalbench/databases/util.py || echo "Failed to patch databases/util.py" + sed -i 's|CLIENT.access_secret_version|get_client().access_secret_version|' /evalbench/evalbench/databases/util.py || echo "Failed to patch databases/util.py usage" + cd evalbench + export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + export PYTHONPATH=./evalproto:. + export CLOUD_RUN=True + export PORT=50051 + + + + echo "Starting Evaluation Server in background..." + # NEW: Added Date: Mon, 13 Apr 2026 11:32:38 +0000 Subject: [PATCH 3/4] chore: update Dockerfile to fetch binary, and parameterize Cloud Build variables with $PROJECT_ID --- Dockerfile | 30 +++++++++--------------------- cloudbuild.yaml | 12 ++++++------ 2 files changed, 15 insertions(+), 27 deletions(-) diff --git a/Dockerfile b/Dockerfile index 39e5b3c..47d3a31 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,26 +1,16 @@ -# --- Stage 1: Build the binary from source (Latest Nightly) --- -FROM golang:1.25 AS builder +# --- Final Runtime Image --- +# Using python:3.11 as the base image to support evaluations that require Python, +# while still running the pre-compiled Go binary for the toolbox server. +FROM python:3.11 -WORKDIR /build - -# Clone the official genai-toolbox source code (always latest main branch) -RUN git clone --depth 1 https://github.com/googleapis/genai-toolbox.git . - -# Compile the binary with CGO ENABLED to support all upstream database drivers (Oracle, etc.) -RUN CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o toolbox . - -# --- Stage 2: Final Lightweight Runtime Image --- -# Using the exact same image (golang:1.25) for runtime to perfectly match GLIBC versions -FROM golang:1.25 - - -# Install necessary runtime certificates and standard C libraries for CGO binary -RUN apt-get update && apt-get install -y ca-certificates libc6 && rm -rf /var/lib/apt/lists/* +# Install necessary runtime certificates, standard C libraries, and curl +RUN apt-get update && apt-get install -y ca-certificates libc6 curl && rm -rf /var/lib/apt/lists/* WORKDIR /app -# Copy the freshly compiled binary from the builder stage -COPY --from=builder /build/toolbox /app/toolbox +# Dynamically fetch the latest version and download the binary +RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/googleapis/mcp-toolbox/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \ + curl -L https://storage.googleapis.com/mcp-toolbox-for-databases/${LATEST_VERSION}/linux/amd64/toolbox -o /app/toolbox RUN chmod +x /app/toolbox # Copy the extension's skills and configuration into the container @@ -32,5 +22,3 @@ RUN touch tools.yaml # Expose HTTP API and UI endpoints to successfully pass Cloud Run health checks ENTRYPOINT ["/app/toolbox", "--prebuilt", "cloud-sql-postgres", "--address=0.0.0.0", "--port=8080", "--enable-api", "--ui"] - - diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 90bbcd8..68c06d8 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -5,13 +5,13 @@ steps: args: - 'build' - '-t' - - 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest' + - 'us-central1-docker.pkg.dev/$PROJECT_ID/toolbox-evals/cloud-sql-postgresql:latest' - '.' - name: 'gcr.io/cloud-builders/docker' args: - 'push' - - 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest' + - 'us-central1-docker.pkg.dev/$PROJECT_ID/toolbox-evals/cloud-sql-postgresql:latest' # --- STEP 2: Deploy to Cloud Run --- - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk' @@ -20,15 +20,15 @@ steps: - 'run' - 'deploy' - 'cloud-sql-postgresql-server' - - '--image=us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/cloud-sql-postgresql:latest' + - '--image=us-central1-docker.pkg.dev/$PROJECT_ID/toolbox-evals/cloud-sql-postgresql:latest' - '--region=us-central1' - '--allow-unauthenticated' - '--port=8080' - '--timeout=300' - - '--set-env-vars=CLOUD_SQL_POSTGRES_PROJECT=omkar-playground,CLOUD_SQL_POSTGRES_INSTANCE=omkar-demo-postgres-1,CLOUD_SQL_POSTGRES_REGION=us-central1,CLOUD_SQL_POSTGRES_DATABASE=postgres,CLOUD_SQL_POSTGRES_USER=postgres,CLOUD_SQL_POSTGRES_PASSWORD=[PASSWORD],CLOUD_SQL_POSTGRES_IP_TYPE=PUBLIC' + - '--set-env-vars=CLOUD_SQL_POSTGRES_PROJECT=$PROJECT_ID,CLOUD_SQL_POSTGRES_INSTANCE=omkar-demo-postgres-1,CLOUD_SQL_POSTGRES_REGION=us-central1,CLOUD_SQL_POSTGRES_DATABASE=postgres,CLOUD_SQL_POSTGRES_USER=postgres,CLOUD_SQL_POSTGRES_PASSWORD=[PASSWORD],CLOUD_SQL_POSTGRES_IP_TYPE=PUBLIC' # --- STEP 3: Fully Integrated Evaluation to Persist Results --- - - name: 'us-central1-docker.pkg.dev/omkar-playground/toolbox-evals/eval_server:latest' + - name: 'us-central1-docker.pkg.dev/$PROJECT_ID/toolbox-evals/eval_server:latest' entrypoint: 'bash' args: - '-c' @@ -36,7 +36,7 @@ steps: set -e cd /evalbench - export EVAL_GCP_PROJECT_ID=omkar-playground + export EVAL_GCP_PROJECT_ID=$PROJECT_ID export EVAL_GCP_PROJECT_REGION=us-central1 echo "Compiling protobuf files..." From e69c7b6dcc29f2b95464f131918db99a304597d7 Mon Sep 17 00:00:00 2001 From: Omkar Gaikwad Date: Mon, 13 Apr 2026 12:16:42 +0000 Subject: [PATCH 4/4] chore: enable BigQuery reporting in eval configuration --- evals/run_config.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/evals/run_config.yaml b/evals/run_config.yaml index ce09cfd..d2ba9cd 100644 --- a/evals/run_config.yaml +++ b/evals/run_config.yaml @@ -10,3 +10,6 @@ scorers: trajectory_matcher: {} goal_completion: model_config: /workspace/evals/gemini_2.5_pro_model.yaml + +reporting: + bigquery: {}