gemini-cli-extensions · omkargaikwad23 · Apr 10, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
@@ -0,0 +1,24 @@
+# --- Final Runtime Image ---
+# Using python:3.11 as the base image to support evaluations that require Python,
+# while still running the pre-compiled Go binary for the toolbox server.
+FROM python:3.11
+
+# Install necessary runtime certificates, standard C libraries, and curl
+RUN apt-get update && apt-get install -y ca-certificates libc6 curl && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Dynamically fetch the latest version and download the binary
+RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/googleapis/mcp-toolbox/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
+    curl -L https://storage.googleapis.com/mcp-toolbox-for-databases/${LATEST_VERSION}/linux/amd64/toolbox -o /app/toolbox
+RUN chmod +x /app/toolbox
+
+# Copy the extension's skills and configuration into the container
+COPY skills/ ./skills/
+COPY gemini-extension.json .
+
+# Add required tools.yaml placeholder to satisfy binary startup checks
+RUN touch tools.yaml
+
+# Expose HTTP API and UI endpoints to successfully pass Cloud Run health checks
+ENTRYPOINT ["/app/toolbox", "--prebuilt", "cloud-sql-postgres", "--address=0.0.0.0", "--port=8080", "--enable-api", "--ui"]
@@ -0,0 +1,98 @@
+steps:
+
+  # --- STEP 1: Build and Push Docker Image ---
+  - name: 'gcr.io/cloud-builders/docker'
+    args:
+      - 'build'
+      - '-t'
+      - 'us-central1-docker.pkg.dev/$PROJECT_ID/toolbox-evals/cloud-sql-postgresql:latest'
+      - '.'
+
+  - name: 'gcr.io/cloud-builders/docker'
+    args:
+      - 'push'
+      - 'us-central1-docker.pkg.dev/$PROJECT_ID/toolbox-evals/cloud-sql-postgresql:latest'
+
+  # --- STEP 2: Deploy to Cloud Run ---
+  - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk'
+    entrypoint: gcloud
+    args:
+      - 'run'
+      - 'deploy'
+      - 'cloud-sql-postgresql-server'
+      - '--image=us-central1-docker.pkg.dev/$PROJECT_ID/toolbox-evals/cloud-sql-postgresql:latest'
+      - '--region=us-central1'
+      - '--allow-unauthenticated'
+      - '--port=8080'
+      - '--timeout=300'
+      - '--set-env-vars=CLOUD_SQL_POSTGRES_PROJECT=$PROJECT_ID,CLOUD_SQL_POSTGRES_INSTANCE=omkar-demo-postgres-1,CLOUD_SQL_POSTGRES_REGION=us-central1,CLOUD_SQL_POSTGRES_DATABASE=postgres,CLOUD_SQL_POSTGRES_USER=postgres,CLOUD_SQL_POSTGRES_PASSWORD=[PASSWORD],CLOUD_SQL_POSTGRES_IP_TYPE=PUBLIC'
+
+  # --- STEP 3: Fully Integrated Evaluation to Persist Results ---
+  - name: 'us-central1-docker.pkg.dev/$PROJECT_ID/toolbox-evals/eval_server:latest'
+    entrypoint: 'bash'
+    args:
+      - '-c'
+      - |
+        set -e
+        cd /evalbench
+
+        export EVAL_GCP_PROJECT_ID=$PROJECT_ID
+        export EVAL_GCP_PROJECT_REGION=us-central1
+
+        echo "Compiling protobuf files..."
+        python3 -m grpc_tools.protoc --proto_path=evalbench/evalproto --python_out=evalbench/evalproto --grpc_python_out=evalbench/evalproto evalbench/evalproto/*.proto
+
+        echo "Patching client to use insecure credentials..."
+        # sed -i 's/"localhost:50051"/"127.0.0.1:50051"/g' evalbench/client/eval_client.py
+        sed -i 's/grpc.alts_channel_credentials()/None/g' evalbench/client/eval_client.py
+        sed -i 's/grpc.aio.secure_channel(address, channel_creds)/grpc.aio.insecure_channel(address)/g' evalbench/client/eval_client.py
+
+        echo "Patching server to listen on all IPv4 interfaces (0.0.0.0)..."
+        sed -i 's/"\[::\]:%s"/"0.0.0.0:%s"/g' /evalbench/evalbench/eval_server.py
+        echo "Checking bind success in server (writing to stderr)..."
+        sed -i 's|server.add_insecure_port("0.0.0.0:%s" % PORT)|bound_port = server.add_insecure_port("0.0.0.0:%s" % PORT)\n        import sys\n        sys.stderr.write(f"BOUND_PORT: {bound_port}\\n")\n        if bound_port == 0: raise RuntimeError("Failed to bind to port!")|' /evalbench/evalbench/eval_server.py
+
+        echo "Patching eval_service.py to fix TypeError in get_reporters..."
+        sed -i 's|reporters = get_reporters(config.get("reporting"), job_id, run_time)|reporters = get_reporters(config.get("reporting") or {}, job_id, run_time)|' /evalbench/evalbench/eval_service.py
+
+        echo "Patching util/session.py to make ADK import lazy..."
+        sed -i 's|from google.adk.sessions import VertexAiSessionService||' /evalbench/evalbench/util/session.py
+        sed -i 's|    def __init__(self, config):|    def __init__(self, config):\n        from google.adk.sessions import VertexAiSessionService|' /evalbench/evalbench/util/session.py
+        echo "Patching databases/util.py to make SecretManagerClient lazy..."
+        sed -i 's|CLIENT = secretmanager_v1.SecretManagerServiceClient()|CLIENT = None\ndef get_client():\n    global CLIENT\n    if CLIENT is None:\n        CLIENT = secretmanager_v1.SecretManagerServiceClient()\n    return CLIENT|' /evalbench/evalbench/databases/util.py || echo "Failed to patch databases/util.py"
+        sed -i 's|CLIENT.access_secret_version|get_client().access_secret_version|' /evalbench/evalbench/databases/util.py || echo "Failed to patch databases/util.py usage"
+        cd evalbench
+        export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
+        export PYTHONPATH=./evalproto:.
+        export CLOUD_RUN=True
+        export PORT=50051
+
+
+
+        echo "Starting Evaluation Server in background..."
+        # NEW: Added </dev/null in case it was waiting for input
+        python3 -u ./eval_server.py --localhost </dev/null &
+        SERVER_PID=$$!
+
+        echo "Waiting for port 50051 to open..."
+        python3 -c "
+        import socket
+        import time
+        for i in range(20):
+            try:
+                s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+                s.connect(('127.0.0.1', 50051))
+                print('Port is open!')
+                exit(0)
+            except Exception as e:
+                print(f'Port not open yet: {e}')
+                time.sleep(1)
+        print('Port failed to open')
+        exit(1)
+        " || { echo "Server failed to bind port. Check logs above."; exit 1; }
+
+        echo "Server is running. Launching Evaluation Client..."
+        cd /evalbench
+        export PYTHONPATH=./evalbench:./evalbench/evalproto
+
+        python3 evalbench/client/eval_client.py --experiment=/workspace/evals/run_config.yaml --endpoint=local || { echo "Client failed! Server logs:"; cat /evalbench/evalbench/server.log; exit 1; }
@@ -0,0 +1,14 @@
+{
+  "scenarios": [
+    {
+      "id": "cloud-sql-debug-01",
+      "starting_prompt": "I need to debug the database.",
+      "conversation_plan": "Ask the agent to list instances in project omkar-playground. Once listed, ask it to check the CPU usage of the first instance. Finally, ask if that usage is considered high.",
+      "expected_trajectory": [
+        "list_instances"
+      ],
+      "kind": "tool",
+      "max_turns": 15
+    }
+  ]
+}
@@ -0,0 +1,4 @@
+generator: gcp_vertex_gemini
+vertex_model: gemini-2.5-pro
+base_prompt: ""
+execs_per_minute: 5
@@ -0,0 +1,18 @@
+gemini_cli_version: "@google/gemini-cli@0.26.0"
+generator: gemini_cli
+env:
+  GOOGLE_CLOUD_PROJECT: "omkar-playground"
+  GOOGLE_CLOUD_LOCATION: "us-central1"
+  GOOGLE_GENAI_USE_VERTEXAI: "true"
+  GEMINI_API_MODEL: "gemini-2.5-pro"
+setup:
+  extensions:
+    "https://github.com/gemini-cli-extensions/cloud-sql-postgresql":
+      settings:
+        CLOUD_SQL_POSTGRES_PROJECT: "omkar-playground"
+        CLOUD_SQL_POSTGRES_INSTANCE: "omkar-demo-postgres-1"
+        CLOUD_SQL_POSTGRES_REGION: "us-central1"
+        CLOUD_SQL_POSTGRES_DATABASE: "postgres"
+        CLOUD_SQL_POSTGRES_USER: "postgres"
+        CLOUD_SQL_POSTGRES_PASSWORD: ${CLOUD_SQL_POSTGRES_PASSWORD}
+        CLOUD_SQL_POSTGRES_IP_TYPE: "PUBLIC"
@@ -0,0 +1,15 @@
+dataset_config: /workspace/evals/dataset.json
+dataset_format: gemini-cli-format
+
+orchestrator: geminicli
+model_config: /workspace/evals/model_config.yaml
+# You can reference default simulated user models provided by the evalbench repo:
+simulated_user_model_config: /workspace/evals/gemini_2.5_pro_model.yaml
+
+scorers:
+  trajectory_matcher: {}
+  goal_completion:
+    model_config: /workspace/evals/gemini_2.5_pro_model.yaml
+
+reporting:
+  bigquery: {}