From 54ded80baad2e5c68fbdad0d7664c0b903fcefb0 Mon Sep 17 00:00:00 2001 From: Vanhci Date: Wed, 20 May 2026 20:18:12 +0800 Subject: [PATCH] fix: defer Pydantic parsing in streaming responses until terminal status is known Previously, when using client.responses.stream() with a Pydantic text_format, the SDK parsed output text on response.output_text.done event BEFORE the terminal response.incomplete status was known. If the API later emitted response.incomplete (e.g., due to max_output_tokens truncation), the SDK raised a Pydantic JSON validation error that masked the real upstream failure. Changes: - src/openai/_exceptions.py: Add IncompleteResponseError exception that provides clear error message with response_id and incomplete_details.reason - src/openai/__init__.py: Export IncompleteResponseError - src/openai/lib/streaming/responses/_responses.py: - Don't parse text on response.output_text.done (set parsed=None instead) - Handle response.incomplete by raising IncompleteResponseError - Parsing is now deferred until response.completed via parse_response() Fixes openai/openai-python#3263 --- src/openai/__init__.py | 2 ++ src/openai/_exceptions.py | 24 +++++++++++++++++++ .../lib/streaming/responses/_responses.py | 12 +++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/openai/__init__.py b/src/openai/__init__.py index cbaef0615f..5fa5325479 100644 --- a/src/openai/__init__.py +++ b/src/openai/__init__.py @@ -35,6 +35,7 @@ InvalidWebhookSignatureError, ContentFilterFinishReasonError, WebSocketConnectionClosedError, + IncompleteResponseError, ) from ._base_client import DefaultHttpxClient, DefaultAioHttpClient, DefaultAsyncHttpxClient from ._utils._logs import setup_logging as _setup_logging @@ -71,6 +72,7 @@ "LengthFinishReasonError", "ContentFilterFinishReasonError", "InvalidWebhookSignatureError", + "IncompleteResponseError", "Timeout", "RequestOptions", "Client", diff --git a/src/openai/_exceptions.py b/src/openai/_exceptions.py index 86f44b0e15..611a790b07 100644 --- a/src/openai/_exceptions.py +++ b/src/openai/_exceptions.py @@ -30,6 +30,7 @@ "SubjectTokenProviderError", "WebSocketConnectionClosedError", "WebSocketQueueFullError", + "IncompleteResponseError", ] @@ -205,3 +206,26 @@ class WebSocketQueueFullError(OpenAIError): """Raised when the outgoing WebSocket message queue exceeds its byte-size limit.""" pass + + +class IncompleteResponseError(OpenAIError): + """Raised when a streaming response ends with incomplete status. + + This typically occurs when the response is truncated due to max_output_tokens + or content_filter restrictions. + """ + + response_id: str + incomplete_details_reason: Optional[Literal["max_output_tokens", "content_filter"]] + + def __init__( + self, + *, + response_id: str, + incomplete_details_reason: Optional[Literal["max_output_tokens", "content_filter"]], + ) -> None: + reason_str = incomplete_details_reason or "unknown" + message = f"Response {response_id} is incomplete: {reason_str}" + super().__init__(message) + self.response_id = response_id + self.incomplete_details_reason = incomplete_details_reason diff --git a/src/openai/lib/streaming/responses/_responses.py b/src/openai/lib/streaming/responses/_responses.py index 6975a9260d..22e48ad3d9 100644 --- a/src/openai/lib/streaming/responses/_responses.py +++ b/src/openai/lib/streaming/responses/_responses.py @@ -25,6 +25,7 @@ ParsedResponseOutputMessage, ParsedResponseFunctionToolCall, ) +from ...._exceptions import IncompleteResponseError class ResponseStream(Generic[TextFormatT]): @@ -276,6 +277,8 @@ def handle_event(self, event: RawResponseStreamEvent) -> List[ResponseStreamEven content = output.content[event.content_index] assert content.type == "output_text" + # Don't parse here - defer parsing until response.completed or response.incomplete + # is received, so we can properly handle incomplete responses events.append( build( ResponseTextDoneEvent[TextFormatT], @@ -286,7 +289,7 @@ def handle_event(self, event: RawResponseStreamEvent) -> List[ResponseStreamEven logprobs=event.logprobs, type="response.output_text.done", text=event.text, - parsed=parse_text(event.text, text_format=self._text_format), + parsed=None, # type: ignore[arg-type] ) ) elif event.type == "response.function_call_arguments.delta": @@ -317,6 +320,13 @@ def handle_event(self, event: RawResponseStreamEvent) -> List[ResponseStreamEven response=response, ) ) + elif event.type == "response.incomplete": + # Raise an error for incomplete responses instead of letting + # Pydantic JSON validation errors bubble up later + raise IncompleteResponseError( + response_id=event.response.id, + incomplete_details_reason=event.response.incomplete_details.reason if event.response.incomplete_details else None, + ) else: events.append(event)