From fa0246e7d4cc34d0febfb2b61b2847a0303402ec Mon Sep 17 00:00:00 2001 From: Ghanshyam Agrawal Date: Tue, 19 May 2026 12:01:44 +0530 Subject: [PATCH 1/2] fix: enable TCP keepalive on default httpx transports to prevent NAT timeout drops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Long-running non-streaming inference calls (Responses API, o-series and GPT-5.x reasoning models) hold a TCP connection idle for 300–600 s while the server generates. NAT gateways silently drop idle connections in this window — AWS NAT Gateway at ~350 s, GCP Cloud NAT at ~120 s, home routers at 60–300 s — causing the client to hang indefinitely (the default SDK timeout never fires because it measures time since the last received byte, and a NAT-dropped connection sends no further bytes). Enable SO_KEEPALIVE with 60 s idle/interval probes on the default httpx transport for both sync and async clients. This matches the pattern already used by the Anthropic Python SDK. Applied via kwargs.setdefault so any caller that passes a custom transport is completely unaffected. Co-Authored-By: Claude Sonnet 4.6 --- src/openai/_base_client.py | 23 +++++++++++++++++++++++ tests/test_client.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py index 216b36aabd..bbf7cc7d95 100644 --- a/src/openai/_base_client.py +++ b/src/openai/_base_client.py @@ -5,6 +5,7 @@ import time import uuid import email +import socket import asyncio import inspect import logging @@ -831,11 +832,32 @@ def _idempotency_key(self) -> str: return f"stainless-python-retry-{uuid.uuid4()}" +def _build_keepalive_socket_options() -> list[tuple[int, int, int]]: + # Enable TCP keepalive to prevent silent connection drops by NAT gateways and + # load balancers during long-running inference calls (generation can hold a + # TCP connection idle for 300–600 s, exceeding the ~350 s AWS NAT Gateway + # idle timeout). Mirrors the pattern used by the Anthropic Python SDK. + options: list[tuple[int, int, int]] = [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)] + # TCP_KEEPIDLE (Linux) / TCP_KEEPALIVE (macOS): seconds idle before first probe + if hasattr(socket, "TCP_KEEPIDLE"): + options.append((socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 60)) + elif hasattr(socket, "TCP_KEEPALIVE"): + options.append((socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, 60)) + # TCP_KEEPINTVL: seconds between subsequent probes + if hasattr(socket, "TCP_KEEPINTVL"): + options.append((socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 60)) + # TCP_KEEPCNT: drop the connection after this many unanswered probes + if hasattr(socket, "TCP_KEEPCNT"): + options.append((socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 5)) + return options + + class _DefaultHttpxClient(httpx.Client): def __init__(self, **kwargs: Any) -> None: kwargs.setdefault("timeout", DEFAULT_TIMEOUT) kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS) kwargs.setdefault("follow_redirects", True) + kwargs.setdefault("transport", httpx.HTTPTransport(socket_options=_build_keepalive_socket_options())) super().__init__(**kwargs) @@ -1423,6 +1445,7 @@ def __init__(self, **kwargs: Any) -> None: kwargs.setdefault("timeout", DEFAULT_TIMEOUT) kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS) kwargs.setdefault("follow_redirects", True) + kwargs.setdefault("transport", httpx.AsyncHTTPTransport(socket_options=_build_keepalive_socket_options())) super().__init__(**kwargs) diff --git a/tests/test_client.py b/tests/test_client.py index 2d8955a58e..185d1206eb 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1304,6 +1304,22 @@ def test_default_client_creation(self) -> None: limits=httpx.Limits(max_connections=100, max_keepalive_connections=20), ) + def test_default_transport_has_tcp_keepalive(self) -> None: + import socket as socket_module + + client = OpenAI(base_url=base_url, api_key=api_key) + transport = client._client._transport + assert isinstance(transport, httpx.HTTPTransport) + socket_options = transport._pool._socket_options + assert any( + opt == (socket_module.SOL_SOCKET, socket_module.SO_KEEPALIVE, 1) for opt in socket_options + ), "Default sync transport should have SO_KEEPALIVE enabled to survive NAT idle timeouts" + + def test_custom_http_client_transport_is_not_overridden(self) -> None: + with httpx.Client() as http_client: + client = OpenAI(base_url=base_url, api_key=api_key, http_client=http_client) + assert client._client is http_client, "A caller-supplied http_client must not be replaced" + @pytest.mark.respx(base_url=base_url) def test_follow_redirects(self, respx_mock: MockRouter, client: OpenAI) -> None: # Test that the default follow_redirects=True allows following redirects @@ -2564,6 +2580,22 @@ async def test_default_client_creation(self) -> None: limits=httpx.Limits(max_connections=100, max_keepalive_connections=20), ) + async def test_default_transport_has_tcp_keepalive(self) -> None: + import socket as socket_module + + client = AsyncOpenAI(base_url=base_url, api_key=api_key) + transport = client._client._transport + assert isinstance(transport, httpx.AsyncHTTPTransport) + socket_options = transport._pool._socket_options + assert any( + opt == (socket_module.SOL_SOCKET, socket_module.SO_KEEPALIVE, 1) for opt in socket_options + ), "Default async transport should have SO_KEEPALIVE enabled to survive NAT idle timeouts" + + async def test_custom_async_http_client_transport_is_not_overridden(self) -> None: + async with httpx.AsyncClient() as http_client: + client = AsyncOpenAI(base_url=base_url, api_key=api_key, http_client=http_client) + assert client._client is http_client, "A caller-supplied http_client must not be replaced" + @pytest.mark.respx(base_url=base_url) async def test_follow_redirects(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None: # Test that the default follow_redirects=True allows following redirects From f12ef9df9ac777ca3a0bcce96e46b137373ac513 Mon Sep 17 00:00:00 2001 From: Ghanshyam Agrawal Date: Tue, 19 May 2026 12:20:23 +0530 Subject: [PATCH 2/2] fix: forward limits kwarg and bump httpx lower bound to 0.25.0 Address two review findings: - Bump httpx lower bound from 0.23.0 to 0.25.0; socket_options on HTTPTransport/AsyncHTTPTransport was added in httpx 0.25.0 and would raise TypeError on older allowed installs - Build the keepalive transport with limits from kwargs so the SDK's DEFAULT_CONNECTION_LIMITS (1000) is preserved; caller-supplied transport is still respected via the "transport" not in kwargs guard Co-Authored-By: Claude Sonnet 4.6 --- pyproject.toml | 2 +- src/openai/_base_client.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 452ac3125a..4125292125 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ authors = [ ] dependencies = [ - "httpx>=0.23.0, <1", + "httpx>=0.25.0, <1", "pydantic>=1.9.0, <3", "typing-extensions>=4.11, <5", "typing-extensions>=4.14, <5", "anyio>=3.5.0, <5", diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py index bbf7cc7d95..96fe1b82cf 100644 --- a/src/openai/_base_client.py +++ b/src/openai/_base_client.py @@ -857,7 +857,11 @@ def __init__(self, **kwargs: Any) -> None: kwargs.setdefault("timeout", DEFAULT_TIMEOUT) kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS) kwargs.setdefault("follow_redirects", True) - kwargs.setdefault("transport", httpx.HTTPTransport(socket_options=_build_keepalive_socket_options())) + if "transport" not in kwargs: + kwargs["transport"] = httpx.HTTPTransport( + limits=kwargs.get("limits", DEFAULT_CONNECTION_LIMITS), + socket_options=_build_keepalive_socket_options(), + ) super().__init__(**kwargs) @@ -1445,7 +1449,11 @@ def __init__(self, **kwargs: Any) -> None: kwargs.setdefault("timeout", DEFAULT_TIMEOUT) kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS) kwargs.setdefault("follow_redirects", True) - kwargs.setdefault("transport", httpx.AsyncHTTPTransport(socket_options=_build_keepalive_socket_options())) + if "transport" not in kwargs: + kwargs["transport"] = httpx.AsyncHTTPTransport( + limits=kwargs.get("limits", DEFAULT_CONNECTION_LIMITS), + socket_options=_build_keepalive_socket_options(), + ) super().__init__(**kwargs)