From d1e4257c0117d2774441df1564f2f2c834f3aed3 Mon Sep 17 00:00:00 2001
From: A Vertex SDK engineer <vertex-sdk-bot@google.com>
Date: Sat, 28 Feb 2026 15:19:36 -0800
Subject: [PATCH] fix: Only include CandidateResponse if a response is present

PiperOrigin-RevId: 876784701
---
 .../replays/test_create_evaluation_run.py     | 54 +++++++++++++++----
 vertexai/_genai/_evals_common.py              | 21 ++++----
 2 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
index cd97ab042c..e8e12b3149 100644
--- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
+++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
@@ -63,6 +63,9 @@
         )
     ),
 )
+INFERENCE_CONFIG = types.EvaluationRunInferenceConfig(
+    model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
+)
 
 
 def test_create_eval_run_data_source_evaluation_set(client):
@@ -189,9 +192,6 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
 def test_create_eval_run_with_inference_configs(client):
     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs."""
     client._api_client._http_options.api_version = "v1beta1"
-    inference_config = types.EvaluationRunInferenceConfig(
-        model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
-    )
     evaluation_run = client.evals.create_evaluation_run(
         name="test_inference_config",
         display_name="test_inference_config",
@@ -200,7 +200,7 @@ def test_create_eval_run_with_inference_configs(client):
         ),
         dest=GCS_DEST,
         metrics=[GENERAL_QUALITY_METRIC],
-        inference_configs={"model_1": inference_config},
+        inference_configs={"model_1": INFERENCE_CONFIG},
         labels={"label1": "value1"},
     )
     assert isinstance(evaluation_run, types.EvaluationRun)
@@ -216,7 +216,7 @@ def test_create_eval_run_with_inference_configs(client):
         ),
         metrics=[GENERAL_QUALITY_METRIC],
     )
-    assert evaluation_run.inference_configs["model_1"] == inference_config
+    assert evaluation_run.inference_configs["model_1"] == INFERENCE_CONFIG
     assert evaluation_run.labels == {
         "label1": "value1",
     }
@@ -318,6 +318,43 @@ def test_create_eval_run_with_inference_configs(client):
 #         )
 #     assert evaluation_run.error is None
 
+# def test_create_eval_run_data_source_evaluation_dataset_inference_config(client):
+#     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset."""
+#     input_df = pd.DataFrame(
+#         {
+#             "prompt": ["prompt1", "prompt2"],
+#             "reference": ["reference1", "reference2"],
+#         }
+#     )
+#     evaluation_run = client.evals.create_evaluation_run(
+#         name="test9",
+#         display_name="test9",
+#         dataset=types.EvaluationDataset(
+#             candidate_name="candidate_1",
+#             eval_dataset_df=input_df,
+#         ),
+#         dest=GCS_DEST,
+#         metrics=[GENERAL_QUALITY_METRIC],
+#         inference_configs={"candidate_1": INFERENCE_CONFIG},
+#     )
+#     assert isinstance(evaluation_run, types.EvaluationRun)
+#     assert evaluation_run.display_name == "test9"
+#     assert evaluation_run.state == types.EvaluationRunState.PENDING
+#     assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
+#     # Check evaluation set
+#     assert evaluation_run.data_source.evaluation_set
+#     eval_set = client.evals.get_evaluation_set(
+#         name=evaluation_run.data_source.evaluation_set
+#     )
+#     assert len(eval_set.evaluation_items) == 2
+#     assert evaluation_run.inference_configs["candidate_1"] == INFERENCE_CONFIG
+#     # Check evaluation items
+#     for i, eval_item_name in enumerate(eval_set.evaluation_items):
+#         eval_item = client.evals.get_evaluation_item(name=eval_item_name)
+#         assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST
+#         assert eval_item.evaluation_request.prompt.text == input_df.iloc[i]["prompt"]
+#         assert eval_item.evaluation_request.candidate_responses == []
+#     assert evaluation_run.error is None
 
 pytest_plugins = ("pytest_asyncio",)
 
@@ -370,9 +407,6 @@ async def test_create_eval_run_async(client):
 async def test_create_eval_run_async_with_inference_configs(client):
     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs asynchronously."""
     client._api_client._http_options.api_version = "v1beta1"
-    inference_config = types.EvaluationRunInferenceConfig(
-        model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
-    )
     evaluation_run = await client.aio.evals.create_evaluation_run(
         name="test_inference_config_async",
         display_name="test_inference_config_async",
@@ -381,7 +415,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
         ),
         dest=GCS_DEST,
         metrics=[GENERAL_QUALITY_METRIC],
-        inference_configs={"model_1": inference_config},
+        inference_configs={"model_1": INFERENCE_CONFIG},
         labels={"label1": "value1"},
     )
     assert isinstance(evaluation_run, types.EvaluationRun)
@@ -397,7 +431,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
         ),
         metrics=[GENERAL_QUALITY_METRIC],
     )
-    assert evaluation_run.inference_configs["model_1"] == inference_config
+    assert evaluation_run.inference_configs["model_1"] == INFERENCE_CONFIG
     assert evaluation_run.labels == {
         "label1": "value1",
     }
diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py
index 0bc28994ed..a0e986612b 100644
--- a/vertexai/_genai/_evals_common.py
+++ b/vertexai/_genai/_evals_common.py
@@ -1962,6 +1962,15 @@ def _create_evaluation_set_from_dataframe(
             for event in row[_evals_constant.INTERMEDIATE_EVENTS]:
                 if CONTENT in event:
                     intermediate_events.append(event[CONTENT])
+        candidate_responses = []
+        if _evals_constant.RESPONSE in row:
+            candidate_responses.append(
+                types.CandidateResponse(
+                    candidate=candidate_name or "Candidate 1",
+                    text=row[_evals_constant.RESPONSE],
+                    events=intermediate_events or None,
+                )
+            )
         eval_item_requests.append(
             types.EvaluationItemRequest(
                 prompt=(
@@ -1974,17 +1983,7 @@ def _create_evaluation_set_from_dataframe(
                     if _evals_constant.REFERENCE in row
                     else None
                 ),
-                candidate_responses=[
-                    types.CandidateResponse(
-                        candidate=candidate_name or "Candidate 1",
-                        text=row.get(_evals_constant.RESPONSE, None),
-                        events=(
-                            intermediate_events
-                            if len(intermediate_events) > 0
-                            else None
-                        ),
-                    )
-                ],
+                candidate_responses=candidate_responses,
             )
         )
     logger.info("Writing evaluation item requests to GCS.")