From 0692f655a30fede9bc21b64849a4b873df063931 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 15:18:44 +0200 Subject: [PATCH 01/25] x402 support: initial draft --- README.rst | 13 ++++++++++- docs/index.rst | 1 + docs/intro/basic.rst | 8 +++++++ docs/use/api.rst | 5 ++-- docs/use/cli.rst | 5 ++-- docs/use/x402.rst | 55 ++++++++++++++++++++++++++++++++++++++++++++ setup.py | 6 +++++ zyte_api/__main__.py | 32 +++++++++++++++++++++++--- zyte_api/_async.py | 51 +++++++++++++++++++++++++++++++--------- zyte_api/_sync.py | 12 +++++++--- zyte_api/_x402.py | 53 ++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 219 insertions(+), 22 deletions(-) create mode 100644 docs/use/x402.rst create mode 100644 zyte_api/_x402.py diff --git a/README.rst b/README.rst index e52e951..ba99831 100644 --- a/README.rst +++ b/README.rst @@ -35,6 +35,14 @@ Installation pip install zyte-api +Or, to use x402_: + +.. _x402: https://www.x402.org/ + +.. code-block:: shell + + pip install zyte-api[x402] + .. note:: Python 3.9+ is required. .. install-end @@ -42,7 +50,7 @@ Installation Basic usage =========== -.. basic-start +.. basic-key-start Set your API key ---------------- @@ -54,6 +62,9 @@ After you `sign up for a Zyte API account `_. .. key-get-end +.. basic-key-end + +.. basic-start Use the command-line client diff --git a/docs/index.rst b/docs/index.rst index 7fe3951..6d43a94 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -18,6 +18,7 @@ python-zyte-api :maxdepth: 1 use/key + use/x402 use/cli use/api diff --git a/docs/intro/basic.rst b/docs/intro/basic.rst index 32ff015..5caadbf 100644 --- a/docs/intro/basic.rst +++ b/docs/intro/basic.rst @@ -4,6 +4,14 @@ Basic usage =========== +.. include:: /../README.rst + :start-after: basic-key-start + :end-before: basic-key-end + +To use x402_ instead, see :ref:`x402`. + +.. _x402: https://www.x402.org/ + .. include:: /../README.rst :start-after: basic-start :end-before: basic-end diff --git a/docs/use/api.rst b/docs/use/api.rst index 0d27233..d66ab0f 100644 --- a/docs/use/api.rst +++ b/docs/use/api.rst @@ -6,8 +6,9 @@ Python client library ===================== -Once you have :ref:`installed python-zyte-api ` and :ref:`configured -your API key `, you can use one of its APIs from Python code: +Once you have :ref:`installed python-zyte-api ` and configured your +:ref:`API key ` or :ref:`Ethereum private key `, you can use one +of its APIs from Python code: - The :ref:`sync API ` can be used to build simple, proof-of-concept or debugging Python scripts. diff --git a/docs/use/cli.rst b/docs/use/cli.rst index abf2479..f650ec1 100644 --- a/docs/use/cli.rst +++ b/docs/use/cli.rst @@ -4,8 +4,9 @@ Command-line client =================== -Once you have :ref:`installed python-zyte-api ` and :ref:`configured -your API key `, you can use the ``zyte-api`` command-line client. +Once you have :ref:`installed python-zyte-api ` and configured your +:ref:`API key ` or :ref:`Ethereum private key `, you can use the +``zyte-api`` command-line client. To use ``zyte-api``, pass an :ref:`input file ` as the first parameter and specify an :ref:`output file ` with ``--output``. diff --git a/docs/use/x402.rst b/docs/use/x402.rst new file mode 100644 index 0000000..0859e9d --- /dev/null +++ b/docs/use/x402.rst @@ -0,0 +1,55 @@ +.. _x402: + +==== +x402 +==== + +It is possible to use :ref:`Zyte API ` without a Zyte API account by +using the x402_ protocol to handle payments. + +During :ref:`installation `, make sure to install the ``x402`` extra. + +Then, configure the *private* key of your Ethereum_ account as follows so that +it can be used to authorize payments. + +.. _Ethereum: https://ethereum.org/ + +It is recommended to configure your Ethereum private key through an environment +variable, so that it can be picked by both the :ref:`command-line client +` and the :ref:`Python client library `: + +- On Windows’ CMD: + + .. code-block:: shell + + > set ZYTE_API_ETH_KEY=YOUR_ETH_PRIVATE_KEY + +- On macOS and Linux: + + .. code-block:: shell + + $ export ZYTE_API_ETH_KEY=YOUR_ETH_PRIVATE_KEY + +Alternatively, you may pass your Ethereum private key to the clients directly: + +- To pass your Ethereum private key directly to the command-line client, use + the ``--eth-key`` switch: + + .. code-block:: shell + + zyte-api --eth-key YOUR_ETH_PRIVATE_KEY … + +- To pass your Ethereum private key directly to the Python client classes, + use the ``eth_key`` parameter when creating a client object: + + .. code-block:: python + + from zyte_api import ZyteAPI + + client = ZyteAPI(eth_key="YOUR_ETH_PRIVATE_KEY") + + .. code-block:: python + + from zyte_api import AsyncZyteAPI + + client = AsyncZyteAPI(eth_key="YOUR_ETH_PRIVATE_KEY") diff --git a/setup.py b/setup.py index a80bb81..c9e4b88 100755 --- a/setup.py +++ b/setup.py @@ -24,6 +24,12 @@ "tqdm", "w3lib >= 2.1.1", ], + extras_require={ + "x402": [ + "eth-account", + "x402", + ] + }, classifiers=[ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", diff --git a/zyte_api/__main__.py b/zyte_api/__main__.py index e3bd8cc..db55f46 100644 --- a/zyte_api/__main__.py +++ b/zyte_api/__main__.py @@ -37,6 +37,7 @@ async def run( api_key=None, retry_errors=True, store_errors=None, + eth_key=None, ): if stop_on_errors is not _UNSET: warn( @@ -54,8 +55,13 @@ def write_output(content): pbar.update() retrying = None if retry_errors else DontRetryErrorsFactory().build() + auth_kwargs = {} + if api_key: + auth_kwargs["api_key"] = api_key + elif eth_key: + auth_kwargs["eth_key"] = eth_key client = AsyncZyteAPI( - n_conn=n_conn, api_key=api_key, api_url=api_url, retrying=retrying + n_conn=n_conn, api_url=api_url, retrying=retrying, **auth_kwargs ) async with create_session(connection_pool_size=n_conn) as session: result_iter = client.iter( @@ -154,9 +160,28 @@ def _get_argument_parser(program_name="zyte-api"): default=20, help=("Number of concurrent connections to use (default: %(default)s)."), ) - p.add_argument( + group = p.add_mutually_exclusive_group(required=False) + group.add_argument( "--api-key", - help="Zyte API key.", + help=( + "Zyte API key.\n" + "\n" + "If not specified, it is read from the ZYTE_API_KEY environment " + "variable." + "\n" + "Cannot be combined with --eth-key." + ), + ) + group.add_argument( + "--eth-key", + help=( + "Ethereum private key, as a hexadecimal string.\n" + "\n" + "If not specified, it is read from the ZYTE_API_ETH_KEY " + "environment variable." + "\n" + "Cannot be combined with --api-key." + ), ) p.add_argument( "--api-url", help="Zyte API endpoint (default: %(default)s).", default=API_URL @@ -218,6 +243,7 @@ def _main(program_name="zyte-api"): n_conn=args.n_conn, api_url=args.api_url, api_key=args.api_key, + eth_key=args.eth_key, retry_errors=not args.dont_retry_errors, store_errors=args.store_errors, ) diff --git a/zyte_api/_async.py b/zyte_api/_async.py index a5890ed..0999a21 100644 --- a/zyte_api/_async.py +++ b/zyte_api/_async.py @@ -9,10 +9,12 @@ import aiohttp from tenacity import AsyncRetrying +from zyte_api._x402 import _get_eth_key, _get_x402_headers + from ._errors import RequestError from ._retry import zyte_api_retrying from ._utils import _AIO_API_TIMEOUT, create_session -from .apikey import get_apikey +from .apikey import NoApiKey, get_apikey from .constants import API_URL from .stats import AggStats, ResponseStats from .utils import USER_AGENT, _process_query @@ -86,18 +88,35 @@ class AsyncZyteAPI: def __init__( self, *, - api_key=None, - api_url=API_URL, - n_conn=15, + api_key: str | None = None, + api_url: str = API_URL, + n_conn: int = 15, retrying: AsyncRetrying | None = None, user_agent: str | None = None, + eth_key: str | None = None, ): if retrying is not None and not isinstance(retrying, AsyncRetrying): raise ValueError( "The retrying parameter, if defined, must be an instance of " "AsyncRetrying." ) - self.api_key = get_apikey(api_key) + + try: + self.auth = get_apikey(api_key) + except NoApiKey: + try: + eth_key = _get_eth_key(eth_key) + except ValueError: + raise NoApiKey( + "You must provide either a Zyte API key or an Ethereum private key." + ) from None + + from eth_account import Account + from x402.clients import x402Client + + account = Account.from_key(eth_key) + self.auth = x402Client(account=account) + self.api_url = api_url self.n_conn = n_conn self.agg_stats = AggStats() @@ -117,9 +136,20 @@ async def get( """Asynchronous equivalent to :meth:`ZyteAPI.get`.""" retrying = retrying or self.retrying post = _post_func(session) - auth = aiohttp.BasicAuth(self.api_key) + + url = self.api_url + endpoint + query = _process_query(query) headers = {"User-Agent": self.user_agent, "Accept-Encoding": "br"} + auth_kwargs = {} + if isinstance(self.auth, str): + auth_kwargs["auth"] = aiohttp.BasicAuth(self.auth) + else: + x402_headers = await _get_x402_headers( + self.auth, url, query, headers, self._semaphore, post + ) + headers.update(x402_headers) + response_stats = [] start_global = time.perf_counter() @@ -127,12 +157,11 @@ async def request(): stats = ResponseStats.create(start_global) self.agg_stats.n_attempts += 1 - safe_query = _process_query(query) post_kwargs = { - "url": self.api_url + endpoint, - "json": safe_query, - "auth": auth, + "url": url, + "json": query, "headers": headers, + **auth_kwargs, } try: @@ -151,7 +180,7 @@ async def request(): message=resp.reason, headers=resp.headers, response_content=content, - query=safe_query, + query=query, ) response = await resp.json() diff --git a/zyte_api/_sync.py b/zyte_api/_sync.py index 8f3fdb1..66f24b8 100644 --- a/zyte_api/_sync.py +++ b/zyte_api/_sync.py @@ -83,6 +83,10 @@ class ZyteAPI: *api_key* is your Zyte API key. If not specified, it is read from the ``ZYTE_API_KEY`` environment variable. See :ref:`api-key`. + Alternatively, you can set an Ethereum private key through *eth_key* to use + Ethereum for payments. If not specified, it is read from the + ``ZYTE_API_ETH_KEY`` environment variable. See :ref:`x402`. + *api_url* is the Zyte API base URL. *n_conn* is the maximum number of concurrent requests to use. See @@ -101,11 +105,12 @@ class ZyteAPI: def __init__( self, *, - api_key=None, - api_url=API_URL, - n_conn=15, + api_key: str | None = None, + api_url: str = API_URL, + n_conn: int = 15, retrying: AsyncRetrying | None = None, user_agent: str | None = None, + eth_key: str | None = None, ): self._async_client = AsyncZyteAPI( api_key=api_key, @@ -113,6 +118,7 @@ def __init__( n_conn=n_conn, retrying=retrying, user_agent=user_agent, + eth_key=eth_key, ) def get( diff --git a/zyte_api/_x402.py b/zyte_api/_x402.py new file mode 100644 index 0000000..e8d9da7 --- /dev/null +++ b/zyte_api/_x402.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from os import environ +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from asyncio import Semaphore + from collections.abc import Callable + from contextlib import AbstractAsyncContextManager + + from aiohttp import ClientResponse + from x402.clients import x402Client + +ENV_VARIABLE = "ZYTE_API_ETH_KEY" + + +def _get_eth_key(key: str | None = None) -> str: + if key is not None: + return key + try: + return environ[ENV_VARIABLE] + except KeyError: + raise ValueError from None + + +async def _get_x402_headers( + client: x402Client, + url: str, + query: dict[str, Any], + headers: dict[str, str], + semaphore: Semaphore, + post_fn: Callable[..., AbstractAsyncContextManager[ClientResponse]], +) -> dict[str, str]: + from x402.types import x402PaymentRequiredResponse + + post_kwargs = {"url": url, "json": query, "headers": headers} + async with semaphore, post_fn(**post_kwargs) as response: + if response.status != 402: + raise ValueError( + "Expected 402 status code for X-402 authorization, got " + f"{response.status}" + ) + data = await response.json() + + payment_response = x402PaymentRequiredResponse(**data) + selected_requirements = client.select_payment_requirements(payment_response.accepts) + payment_header = client.create_payment_header( + selected_requirements, payment_response.x402_version + ) + return { + "Access-Control-Expose-Headers": "X-Payment-Response", + "X-Payment": payment_header, + } From b0be88947ddc71efe6c35e12329958b2f20d8cca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 16:56:05 +0200 Subject: [PATCH 02/25] x402: optimize out initial requests where possible --- zyte_api/_x402.py | 112 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 104 insertions(+), 8 deletions(-) diff --git a/zyte_api/_x402.py b/zyte_api/_x402.py index e8d9da7..83c9d1f 100644 --- a/zyte_api/_x402.py +++ b/zyte_api/_x402.py @@ -1,7 +1,10 @@ from __future__ import annotations +import json +from hashlib import md5 from os import environ from typing import TYPE_CHECKING, Any +from urllib.parse import urlparse if TYPE_CHECKING: from asyncio import Semaphore @@ -11,7 +14,49 @@ from aiohttp import ClientResponse from x402.clients import x402Client +CACHE: dict[bytes, tuple[Any, str]] = {} ENV_VARIABLE = "ZYTE_API_ETH_KEY" +EXTRACT_KEYS = { + "article", + "articleList", + "articleNavigation", + "forumThread", + "jobPosting", + "jobPostingNavigation", + "product", + "productList", + "productNavigation", + "serp", +} + + +def get_extract_from(query: dict[str, Any], data_type: str) -> str | Any: + options = query.get(f"{data_type}Options", {}) + default_extract_from = "httpResponseBody" if data_type == "serp" else None + return options.get("extractFrom", default_extract_from) + + +def get_extract_froms(query: dict[str, Any]) -> set[str]: + result = set() + for key in EXTRACT_KEYS: + if not query.get(key, False): + continue + result.add(get_extract_from(query, key)) + return result + + +def may_use_browser(query: dict[str, Any]) -> bool: + """Return ``False`` if *query* indicates with certainty that browser + rendering will not be used, or ``True`` otherwise.""" + for key in ("browserHtml", "screenshot"): + if query.get(key): + return True + extract_froms = get_extract_froms(query) + if "browserHtml" in extract_froms: + return True + if "httpResponseBody" in extract_froms: + return False + return not query.get("httpResponseBody") def _get_eth_key(key: str | None = None) -> str: @@ -23,16 +68,44 @@ def _get_eth_key(key: str | None = None) -> str: raise ValueError from None -async def _get_x402_headers( +def get_max_cost_hash(query: dict[str, Any]) -> bytes: + """Returns a hash based on *query* that should be the same for queries + whose estimate costs are the same. + + For open-ended costs, like actions, network capture or custom attributes, + we assume that Zyte API will not report a different cost based on e.g. the + number of actions or their parameters, or similar details of network + capture or custom attributes. + + See also: https://docs.zyte.com/zyte-api/pricing.html#request-costs + """ + data = { + "domain": urlparse(query["url"]).netloc, + "type": "browser" if may_use_browser(query) else "http", + } + for key in ( + *(k for k in EXTRACT_KEYS if k != "serp"), # serp does not affect cost + "actions", + "networkCapture", + "screenshot", + ): + if query.get(key): + data[key] = True + if query.get("customAttributes"): + data["customAttributesOptions.method"] = query.get( + "customAttributesOptions", {} + ).get("method", "generate") + return md5(json.dumps(data, sort_keys=True).encode()).digest() # noqa: S324 + + +async def fetch_requirements( client: x402Client, url: str, query: dict[str, Any], headers: dict[str, str], semaphore: Semaphore, post_fn: Callable[..., AbstractAsyncContextManager[ClientResponse]], -) -> dict[str, str]: - from x402.types import x402PaymentRequiredResponse - +) -> tuple[Any, str]: post_kwargs = {"url": url, "json": query, "headers": headers} async with semaphore, post_fn(**post_kwargs) as response: if response.status != 402: @@ -42,11 +115,34 @@ async def _get_x402_headers( ) data = await response.json() + from x402.types import x402PaymentRequiredResponse + payment_response = x402PaymentRequiredResponse(**data) - selected_requirements = client.select_payment_requirements(payment_response.accepts) - payment_header = client.create_payment_header( - selected_requirements, payment_response.x402_version - ) + requirements = client.select_payment_requirements(payment_response.accepts) + version = payment_response.x402_version + return requirements, version + + +async def _get_x402_headers( + client: x402Client, + url: str, + query: dict[str, Any], + headers: dict[str, str], + semaphore: Semaphore, + post_fn: Callable[..., AbstractAsyncContextManager[ClientResponse]], +) -> dict[str, str]: + if environ.get("ZYTE_API_ETH_MINIMIZE_REQUESTS") != "false": + max_cost_hash = get_max_cost_hash(query) + if max_cost_hash not in CACHE: + CACHE[max_cost_hash] = await fetch_requirements( + client, url, query, headers, semaphore, post_fn + ) + requirements, version = CACHE[max_cost_hash] + else: + requirements, version = await fetch_requirements( + client, url, query, headers, semaphore, post_fn + ) + payment_header = client.create_payment_header(requirements, version) return { "Access-Control-Expose-Headers": "X-Payment-Response", "X-Payment": payment_header, From 0df1f9274ad5ef55a4e9b4d0a4bc1abadc5c8ee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 17:30:30 +0200 Subject: [PATCH 03/25] Add a n_x402_req stat --- zyte_api/_async.py | 28 ++++------- zyte_api/_x402.py | 121 +++++++++++++++++++++++++++------------------ zyte_api/stats.py | 1 + 3 files changed, 83 insertions(+), 67 deletions(-) diff --git a/zyte_api/_async.py b/zyte_api/_async.py index 0999a21..bdaa6a2 100644 --- a/zyte_api/_async.py +++ b/zyte_api/_async.py @@ -9,7 +9,7 @@ import aiohttp from tenacity import AsyncRetrying -from zyte_api._x402 import _get_eth_key, _get_x402_headers +from zyte_api._x402 import _x402Handler from ._errors import RequestError from ._retry import zyte_api_retrying @@ -101,29 +101,23 @@ def __init__( "AsyncRetrying." ) + self.api_url = api_url + self.n_conn = n_conn + self.agg_stats = AggStats() + self.retrying = retrying or zyte_api_retrying + self.user_agent = user_agent or USER_AGENT + self._semaphore = asyncio.Semaphore(n_conn) + try: self.auth = get_apikey(api_key) except NoApiKey: try: - eth_key = _get_eth_key(eth_key) + self.auth = _x402Handler(eth_key, self._semaphore, self.agg_stats) except ValueError: raise NoApiKey( "You must provide either a Zyte API key or an Ethereum private key." ) from None - from eth_account import Account - from x402.clients import x402Client - - account = Account.from_key(eth_key) - self.auth = x402Client(account=account) - - self.api_url = api_url - self.n_conn = n_conn - self.agg_stats = AggStats() - self.retrying = retrying or zyte_api_retrying - self.user_agent = user_agent or USER_AGENT - self._semaphore = asyncio.Semaphore(n_conn) - async def get( self, query: dict, @@ -145,9 +139,7 @@ async def get( if isinstance(self.auth, str): auth_kwargs["auth"] = aiohttp.BasicAuth(self.auth) else: - x402_headers = await _get_x402_headers( - self.auth, url, query, headers, self._semaphore, post - ) + x402_headers = await self.auth.get_headers(url, query, headers, post) headers.update(x402_headers) response_stats = [] diff --git a/zyte_api/_x402.py b/zyte_api/_x402.py index 83c9d1f..44fef49 100644 --- a/zyte_api/_x402.py +++ b/zyte_api/_x402.py @@ -12,7 +12,8 @@ from contextlib import AbstractAsyncContextManager from aiohttp import ClientResponse - from x402.clients import x402Client + + from zyte_api.stats import AggStats CACHE: dict[bytes, tuple[Any, str]] = {} ENV_VARIABLE = "ZYTE_API_ETH_KEY" @@ -28,6 +29,7 @@ "productNavigation", "serp", } +MINIMIZE_REQUESTS = environ.get("ZYTE_API_ETH_MINIMIZE_REQUESTS") != "false" def get_extract_from(query: dict[str, Any], data_type: str) -> str | Any: @@ -98,52 +100,73 @@ def get_max_cost_hash(query: dict[str, Any]) -> bytes: return md5(json.dumps(data, sort_keys=True).encode()).digest() # noqa: S324 -async def fetch_requirements( - client: x402Client, - url: str, - query: dict[str, Any], - headers: dict[str, str], - semaphore: Semaphore, - post_fn: Callable[..., AbstractAsyncContextManager[ClientResponse]], -) -> tuple[Any, str]: - post_kwargs = {"url": url, "json": query, "headers": headers} - async with semaphore, post_fn(**post_kwargs) as response: - if response.status != 402: - raise ValueError( - "Expected 402 status code for X-402 authorization, got " - f"{response.status}" - ) - data = await response.json() - - from x402.types import x402PaymentRequiredResponse - - payment_response = x402PaymentRequiredResponse(**data) - requirements = client.select_payment_requirements(payment_response.accepts) - version = payment_response.x402_version - return requirements, version - - -async def _get_x402_headers( - client: x402Client, - url: str, - query: dict[str, Any], - headers: dict[str, str], - semaphore: Semaphore, - post_fn: Callable[..., AbstractAsyncContextManager[ClientResponse]], -) -> dict[str, str]: - if environ.get("ZYTE_API_ETH_MINIMIZE_REQUESTS") != "false": - max_cost_hash = get_max_cost_hash(query) - if max_cost_hash not in CACHE: - CACHE[max_cost_hash] = await fetch_requirements( - client, url, query, headers, semaphore, post_fn +class _x402Handler: + def __init__( + self, + eth_key: str | None, + semaphore: Semaphore, + stats: AggStats, + ): + from eth_account import Account + from x402.clients import x402Client + from x402.types import x402PaymentRequiredResponse + + eth_key = _get_eth_key(eth_key) + account = Account.from_key(eth_key) + self.client = x402Client(account=account) + self.semaphore = semaphore + self.stats = stats + self.x402PaymentRequiredResponse = x402PaymentRequiredResponse + + async def get_headers( + self, + url: str, + query: dict[str, Any], + headers: dict[str, str], + post_fn: Callable[..., AbstractAsyncContextManager[ClientResponse]], + ) -> dict[str, str]: + if MINIMIZE_REQUESTS: + max_cost_hash = get_max_cost_hash(query) + if max_cost_hash not in CACHE: + CACHE[max_cost_hash] = await self.fetch_requirements( + url, query, headers, post_fn, self.stats + ) + requirements, version = CACHE[max_cost_hash] + else: + requirements, version = await self.fetch_requirements( + url, query, headers, post_fn, self.stats ) - requirements, version = CACHE[max_cost_hash] - else: - requirements, version = await fetch_requirements( - client, url, query, headers, semaphore, post_fn - ) - payment_header = client.create_payment_header(requirements, version) - return { - "Access-Control-Expose-Headers": "X-Payment-Response", - "X-Payment": payment_header, - } + payment_header = self.client.create_payment_header(requirements, version) + return { + "Access-Control-Expose-Headers": "X-Payment-Response", + "X-Payment": payment_header, + } + + async def fetch_requirements( + self, + url: str, + query: dict[str, Any], + headers: dict[str, str], + post_fn: Callable[..., AbstractAsyncContextManager[ClientResponse]], + stats: AggStats, + ) -> tuple[Any, str]: + post_kwargs = {"url": url, "json": query, "headers": headers} + retried = False + while True: + stats.n_x402_req += 1 + async with self.semaphore, post_fn(**post_kwargs) as response: + if response.status >= 500 and not retried: + retried = True + continue + if response.status != 402: + raise ValueError( + "Expected 402 status code for X-402 authorization, got " + f"{response.status}" + ) + data = await response.json() + break + + payment_response = self.x402PaymentRequiredResponse(**data) + requirements = self.client.select_payment_requirements(payment_response.accepts) + version = payment_response.x402_version + return requirements, version diff --git a/zyte_api/stats.py b/zyte_api/stats.py index c293d3c..c4acf51 100644 --- a/zyte_api/stats.py +++ b/zyte_api/stats.py @@ -37,6 +37,7 @@ def __init__(self): ) self.n_429 = 0 # number of 429 (throttling) responses self.n_errors = 0 # number of errors, including errors which were retried + self.n_x402_req = 0 # x402 requests for payment requirements self.status_codes = Counter() self.exception_types = Counter() From 2f3c9c4bf376f05a483bfaa0e766584f04476f3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 17:39:51 +0200 Subject: [PATCH 04/25] Raise non-402 response as RequestError --- zyte_api/_x402.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/zyte_api/_x402.py b/zyte_api/_x402.py index 44fef49..1a65997 100644 --- a/zyte_api/_x402.py +++ b/zyte_api/_x402.py @@ -6,6 +6,8 @@ from typing import TYPE_CHECKING, Any from urllib.parse import urlparse +from zyte_api._errors import RequestError + if TYPE_CHECKING: from asyncio import Semaphore from collections.abc import Callable @@ -159,9 +161,16 @@ async def fetch_requirements( retried = True continue if response.status != 402: - raise ValueError( - "Expected 402 status code for X-402 authorization, got " - f"{response.status}" + content = await response.read() + response.release() + raise RequestError( + request_info=response.request_info, + history=response.history, + status=response.status, + message=response.reason, + headers=response.headers, + response_content=content, + query=query, ) data = await response.json() break From 53f9a4c7e6af5e59f6128d4f28c92c679020472d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 17:44:49 +0200 Subject: [PATCH 05/25] Handle import errors from x402 --- zyte_api/_async.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/zyte_api/_async.py b/zyte_api/_async.py index bdaa6a2..96e7f16 100644 --- a/zyte_api/_async.py +++ b/zyte_api/_async.py @@ -113,9 +113,11 @@ def __init__( except NoApiKey: try: self.auth = _x402Handler(eth_key, self._semaphore, self.agg_stats) - except ValueError: + except (ImportError, ValueError): raise NoApiKey( - "You must provide either a Zyte API key or an Ethereum private key." + "You must provide either a Zyte API key or an Ethereum " + "private key. For the latter, you must also install " + "zyte-api as zyte-api[x402]." ) from None async def get( From 33dcc3e1dd0057e48116fa1cc86ae565517626df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 19:20:07 +0200 Subject: [PATCH 06/25] Add x402 envs to tox and CI --- .github/workflows/test.yml | 15 +++++++++++++-- setup.py | 18 +++++++++--------- tests/test_x402.py | 36 ++++++++++++++++++++++++++++++++++++ tox.ini | 28 ++++++++++++++++++++++++++-- zyte_api/_async.py | 2 +- zyte_api/_x402.py | 8 +++----- 6 files changed, 88 insertions(+), 19 deletions(-) create mode 100644 tests/test_x402.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b056a24..1d7ee9e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,7 +16,18 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] + include: + - python-version: "3.9" + tox: min + - python-version: "3.9" + tox: min-x402 + - python-version: "3.9" + - python-version: "3.10" + - python-version: "3.11" + - python-version: "3.12" + - python-version: "3.13" + - python-version: "3.13" + tox: x402 steps: - uses: actions/checkout@v4 @@ -30,7 +41,7 @@ jobs: python -m pip install tox - name: tox run: | - tox -e py + tox -e ${{ matrix.tox || 'py' }} - name: coverage if: ${{ success() }} uses: codecov/codecov-action@v4.0.1 diff --git a/setup.py b/setup.py index c9e4b88..aa2f258 100755 --- a/setup.py +++ b/setup.py @@ -16,18 +16,18 @@ "console_scripts": ["zyte-api=zyte_api.__main__:_main"], }, install_requires=[ - "aiohttp >= 3.8.0", - "attrs", - "brotli", - "runstats", - "tenacity", - "tqdm", - "w3lib >= 2.1.1", + "aiohttp>=3.8.0", + "attrs>=20.1.0", + "brotli>=0.5.2", + "runstats>=0.0.1", + "tenacity>=8.2.0", + "tqdm>=4.16.0", + "w3lib>=2.1.1", ], extras_require={ "x402": [ - "eth-account", - "x402", + "eth-account>=0.13.7", + "x402>=0.1.1", ] }, classifiers=[ diff --git a/tests/test_x402.py b/tests/test_x402.py new file mode 100644 index 0000000..db47df3 --- /dev/null +++ b/tests/test_x402.py @@ -0,0 +1,36 @@ +import importlib.util +from os import environ +from unittest import mock + +import pytest + +from zyte_api.aio.client import AsyncClient + +HAS_X402 = importlib.util.find_spec("x402") is not None +KEY = "c85ef7d79691fe79573b1a7064c5232332f53bb1b44a08f1a737f57a68a4706e" + + +def test_eth_key_param(): + if HAS_X402: + AsyncClient(eth_key=KEY) + else: + with pytest.raises(ImportError, match="No module named 'eth_account'"): + AsyncClient(eth_key=KEY) + + +@mock.patch.dict(environ, {"ZYTE_API_ETH_KEY": KEY}) +def test_eth_key_env_var(): + if HAS_X402: + AsyncClient() + else: + with pytest.raises(ImportError, match="No module named 'eth_account'"): + AsyncClient() + + +def test_eth_key_short(): + if HAS_X402: + with pytest.raises(ValueError, match="must be exactly 32 bytes long"): + AsyncClient(eth_key="a") + else: + with pytest.raises(ImportError, match="No module named 'eth_account'"): + AsyncClient(eth_key="a") diff --git a/tox.ini b/tox.ini index 33663b7..a669d2b 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py39,py310,py311,py312,py313,mypy,docs,twine +envlist = min,min-x402,py39,py310,py311,py312,py313,x402,mypy,docs,twine [testenv] deps = @@ -10,13 +10,37 @@ deps = pytest-twisted responses twisted - commands = py.test \ --cov-report=term-missing --cov-report=html --cov-report=xml --cov=zyte_api \ --doctest-modules \ {posargs:zyte_api tests} +[testenv:x402] +extras = x402 + +[min] +deps = + {[testenv]deps} + aiohttp==3.8.0 + attrs==20.1.0 + brotli==0.5.2 + runstats==0.0.1 + tenacity==8.2.0 + tqdm==4.16.0 + w3lib==2.1.1 + +[testenv:min] +basepython = python3.9 +deps = {[min]deps} + +[testenv:min-x402] +basepython = python3.10 +deps = + {[min]deps} + eth_account==0.13.7 + x402==0.1.1 + [testenv:mypy] deps = mypy==1.12.0 diff --git a/zyte_api/_async.py b/zyte_api/_async.py index 96e7f16..c9d9655 100644 --- a/zyte_api/_async.py +++ b/zyte_api/_async.py @@ -113,7 +113,7 @@ def __init__( except NoApiKey: try: self.auth = _x402Handler(eth_key, self._semaphore, self.agg_stats) - except (ImportError, ValueError): + except KeyError: raise NoApiKey( "You must provide either a Zyte API key or an Ethereum " "private key. For the latter, you must also install " diff --git a/zyte_api/_x402.py b/zyte_api/_x402.py index 1a65997..935e420 100644 --- a/zyte_api/_x402.py +++ b/zyte_api/_x402.py @@ -66,10 +66,7 @@ def may_use_browser(query: dict[str, Any]) -> bool: def _get_eth_key(key: str | None = None) -> str: if key is not None: return key - try: - return environ[ENV_VARIABLE] - except KeyError: - raise ValueError from None + return environ[ENV_VARIABLE] def get_max_cost_hash(query: dict[str, Any]) -> bytes: @@ -109,11 +106,12 @@ def __init__( semaphore: Semaphore, stats: AggStats, ): + eth_key = _get_eth_key(eth_key) + from eth_account import Account from x402.clients import x402Client from x402.types import x402PaymentRequiredResponse - eth_key = _get_eth_key(eth_key) account = Account.from_key(eth_key) self.client = x402Client(account=account) self.semaphore = semaphore From 8970a7ff0491ba1da9e398fd2fe4edca4274e442 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 19:21:32 +0200 Subject: [PATCH 07/25] Keep mypy happy --- zyte_api/_async.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zyte_api/_async.py b/zyte_api/_async.py index c9d9655..91cf7e1 100644 --- a/zyte_api/_async.py +++ b/zyte_api/_async.py @@ -109,7 +109,7 @@ def __init__( self._semaphore = asyncio.Semaphore(n_conn) try: - self.auth = get_apikey(api_key) + self.auth: str | _x402Handler = get_apikey(api_key) except NoApiKey: try: self.auth = _x402Handler(eth_key, self._semaphore, self.agg_stats) From 115abc51569c524d59a4d329e7fa073d41cf02d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 21:08:12 +0200 Subject: [PATCH 08/25] Complete branch coverage --- pyproject.toml | 3 + tests/mockserver.py | 43 ++++- tests/test_main.py | 48 +++++- tests/test_x402.py | 394 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 483 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 34ebbc9..86666f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -154,6 +154,9 @@ ignore = [ "zyte_api/errors.py" = ["UP007"] "zyte_api/stats.py" = ["UP007"] +[tool.ruff.lint.flake8-pytest-style] +parametrize-values-type = "tuple" + [tool.ruff.lint.flake8-type-checking] runtime-evaluated-decorators = ["attr.s"] diff --git a/tests/mockserver.py b/tests/mockserver.py index 014f85d..773a241 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -14,6 +14,11 @@ from twisted.web.resource import Resource from twisted.web.server import NOT_DONE_YET, Site +SCREENSHOT = ( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAACklEQVR4nGMAAQAABQABDQott" + "AAAAABJRU5ErkJggg==" +) + # https://github.com/scrapy/scrapy/blob/02b97f98e74a994ad3e4d74e7ed55207e508a576/tests/mockserver.py#L27C1-L33C19 def getarg(request, name, default=None, type=None): @@ -62,6 +67,26 @@ def _delayedRender(self, request): request.finish() +RESPONSE_402 = { + "x402Version": 1, + "accepts": [ + { + "scheme": "exact", + "network": "base-sepolia", + "maxAmountRequired": "1000", + "resource": "https://api.zyte.com/v1/extract", + "description": "", + "mimeType": "", + "payTo": "0xFACEdD967ea0592bbb9410fA4877Df9AeB628CB7", + "maxTimeoutSeconds": 130, + "asset": "0xFACEbD53842c5426634e7929541eC2318f3dCF7e", + "extra": {"name": "USDC", "version": "2"}, + } + ], + "error": "Use basic auth or x402", +} + + class DefaultResource(Resource): request_count = 0 @@ -69,8 +94,6 @@ def getChild(self, path, request): return self def render_POST(self, request): - request_data = json.loads(request.content.read()) - request.responseHeaders.setRawHeaders( b"Content-Type", [b"application/json"], @@ -80,12 +103,17 @@ def render_POST(self, request): [b"abcd1234"], ) + request_data = json.loads(request.content.read()) + url = request_data["url"] domain = urlparse(url).netloc if domain == "e429.example": request.setResponseCode(429) response_data = {"status": 429, "type": "/limits/over-user-limit"} return json.dumps(response_data).encode() + if domain == "e404.example": + request.setResponseCode(404) + return b"" if domain == "e500.example": request.setResponseCode(500) return "" @@ -119,6 +147,12 @@ def render_POST(self, request): request.setResponseCode(500) return b'["foo"]' + auth_header = request.getHeader("Authorization") + payment_header = request.getHeader("X-Payment") + if not auth_header and not payment_header: + request.setResponseCode(402) + return json.dumps(RESPONSE_402).encode() + response_data: dict[str, Any] = { "url": url, } @@ -127,9 +161,12 @@ def render_POST(self, request): if "httpResponseBody" in request_data: body = b64encode(html.encode()).decode() response_data["httpResponseBody"] = body - else: + if "browserHtml" in request_data: assert "browserHtml" in request_data response_data["browserHtml"] = html + if "screenshot" in request_data: + assert "screenshot" in request_data + response_data["screenshot"] = SCREENSHOT return json.dumps(response_data).encode() diff --git a/tests/test_main.py b/tests/test_main.py index cacf11d..ba21bd5 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -10,6 +10,8 @@ from zyte_api.__main__ import run from zyte_api.aio.errors import RequestError +from .test_x402 import HAS_X402 + class MockRequestError(Exception): @property @@ -52,7 +54,7 @@ async def fake_exception(value=True): @pytest.mark.parametrize( ("queries", "expected_response", "store_errors", "exception"), ( - [ + ( # test if it stores the error(s) also by adding flag ( [ @@ -79,7 +81,7 @@ async def fake_exception(value=True): False, fake_exception, ), - ] + ) ), ) @pytest.mark.asyncio @@ -264,3 +266,45 @@ def test_run_non_json_response(mockserver): assert not result.returncode assert result.stdout == b"" assert b"json.decoder.JSONDecodeError" in result.stderr + + +@pytest.mark.skipif(not HAS_X402, reason="x402 not installed") +def test_eth_key(mockserver): + with NamedTemporaryFile("w") as url_list: + url_list.write("https://a.example\n") + url_list.flush() + result = subprocess.run( + [ + "python", + "-m", + "zyte_api", + "--eth-key", + "a", + "--api-url", + mockserver.urljoin("/"), + url_list.name, + ], + capture_output=True, + check=False, + ) + assert b"must be exactly 32 bytes long" in result.stderr + + +@pytest.mark.skipif(not HAS_X402, reason="x402 not installed") +def test_no_key(mockserver): + with NamedTemporaryFile("w") as url_list: + url_list.write("https://a.example\n") + url_list.flush() + result = subprocess.run( + [ + "python", + "-m", + "zyte_api", + "--api-url", + mockserver.urljoin("/"), + url_list.name, + ], + capture_output=True, + check=False, + ) + assert b"NoApiKey" in result.stderr diff --git a/tests/test_x402.py b/tests/test_x402.py index db47df3..8ec775c 100644 --- a/tests/test_x402.py +++ b/tests/test_x402.py @@ -1,12 +1,18 @@ +import contextlib import importlib.util from os import environ from unittest import mock import pytest +from zyte_api._errors import RequestError from zyte_api.aio.client import AsyncClient +from .mockserver import SCREENSHOT + +BODY = "PGh0bWw+PGJvZHk+SGVsbG88aDE+V29ybGQhPC9oMT48L2JvZHk+PC9odG1sPg==" HAS_X402 = importlib.util.find_spec("x402") is not None +HTML = "Hello

World!

" KEY = "c85ef7d79691fe79573b1a7064c5232332f53bb1b44a08f1a737f57a68a4706e" @@ -34,3 +40,391 @@ def test_eth_key_short(): else: with pytest.raises(ImportError, match="No module named 'eth_account'"): AsyncClient(eth_key="a") + + +@contextlib.contextmanager +def reset_x402_cache(): + from zyte_api import _x402 + + try: + yield _x402.CACHE + finally: + _x402.CACHE = {} + + +@pytest.mark.skipif(not HAS_X402, reason="x402 not installed") +@pytest.mark.asyncio +@pytest.mark.parametrize( + "scenario", + ( + # Identical + { + "i1": {"url": "https://a.example", "httpResponseBody": True}, + "o1": {"url": "https://a.example", "httpResponseBody": BODY}, + "i2": {"url": "https://a.example", "httpResponseBody": True}, + "o2": {"url": "https://a.example", "httpResponseBody": BODY}, + "cache": "hit", + }, + # Extra headers + { + "i1": {"url": "https://a.example", "httpResponseBody": True}, + "o1": {"url": "https://a.example", "httpResponseBody": BODY}, + "i2": { + "url": "https://a.example", + "httpResponseBody": True, + "customHttpRequestHeaders": [{"name": "foo", "value": "bar"}], + }, + "o2": {"url": "https://a.example", "httpResponseBody": BODY}, + "cache": "hit", + }, + # Different domain + { + "i1": {"url": "https://a.example", "httpResponseBody": True}, + "o1": {"url": "https://a.example", "httpResponseBody": BODY}, + "i2": {"url": "https://b.example", "httpResponseBody": True}, + "o2": {"url": "https://b.example", "httpResponseBody": BODY}, + "cache": "miss", + }, + # Different request type (HTTP vs browser) + { + "i1": {"url": "https://a.example", "httpResponseBody": True}, + "o1": {"url": "https://a.example", "httpResponseBody": BODY}, + "i2": {"url": "https://a.example", "browserHtml": True}, + "o2": {"url": "https://a.example", "browserHtml": HTML}, + "cache": "miss", + }, + # Screenshot + { + "i1": {"url": "https://a.example", "browserHtml": True}, + "o1": {"url": "https://a.example", "browserHtml": HTML}, + "i2": {"url": "https://a.example", "screenshot": True}, + "o2": {"url": "https://a.example", "screenshot": SCREENSHOT}, + "cache": "miss", + }, + # Actions: Empty actions count as no actions + { + "i1": {"url": "https://a.example", "browserHtml": True}, + "o1": {"url": "https://a.example", "browserHtml": HTML}, + "i2": {"url": "https://a.example", "browserHtml": True, "actions": []}, + "o2": {"url": "https://a.example", "browserHtml": HTML}, + "cache": "hit", + }, + # Actions: Actions vs no actions + { + "i1": {"url": "https://a.example", "browserHtml": True}, + "o1": {"url": "https://a.example", "browserHtml": HTML}, + "i2": { + "url": "https://a.example", + "browserHtml": True, + "actions": [{"action": "click", "selector": "button#submit"}], + }, + "o2": {"url": "https://a.example", "browserHtml": HTML}, + "cache": "miss", + }, + # Actions: Different action count does not prevent cache hit + { + "i1": { + "url": "https://a.example", + "browserHtml": True, + "actions": [{"action": "click", "selector": "button#submit"}], + }, + "o1": {"url": "https://a.example", "browserHtml": HTML}, + "i2": { + "url": "https://a.example", + "browserHtml": True, + "actions": [ + {"action": "click", "selector": "button#submit"}, + {"action": "scrollBottom"}, + ], + }, + "o2": {"url": "https://a.example", "browserHtml": HTML}, + "cache": "hit", + }, + # Network capture: Empty network capture count as no network capture + { + "i1": {"url": "https://a.example", "browserHtml": True}, + "o1": {"url": "https://a.example", "browserHtml": HTML}, + "i2": { + "url": "https://a.example", + "browserHtml": True, + "networkCapture": [], + }, + "o2": {"url": "https://a.example", "browserHtml": HTML}, + "cache": "hit", + }, + # Network capture: Network capture vs no network capture + { + "i1": {"url": "https://a.example", "browserHtml": True}, + "o1": {"url": "https://a.example", "browserHtml": HTML}, + "i2": { + "url": "https://a.example", + "browserHtml": True, + "networkCapture": [ + { + "filterType": "url", + "httpResponseBody": True, + "value": "/api/", + "matchType": "contains", + } + ], + }, + "o2": {"url": "https://a.example", "browserHtml": HTML}, + "cache": "miss", + }, + # Network capture: Different network capture count does not prevent + # cache hit + { + "i1": { + "url": "https://a.example", + "browserHtml": True, + "networkCapture": [ + { + "filterType": "url", + "httpResponseBody": True, + "value": "/api/", + "matchType": "contains", + } + ], + }, + "o1": {"url": "https://a.example", "browserHtml": HTML}, + "i2": { + "url": "https://a.example", + "browserHtml": True, + "networkCapture": [ + { + "filterType": "url", + "httpResponseBody": True, + "value": "/api/", + "matchType": "contains", + }, + { + "filterType": "url", + "httpResponseBody": True, + "value": "/other/", + "matchType": "contains", + }, + ], + }, + "o2": {"url": "https://a.example", "browserHtml": HTML}, + "cache": "hit", + }, + # Extraction: serp does not affect cost + { + "i1": {"url": "https://a.example", "httpResponseBody": True}, + "o1": {"url": "https://a.example", "httpResponseBody": BODY}, + "i2": {"url": "https://a.example", "serp": True}, + "o2": {"url": "https://a.example"}, + "cache": "hit", + }, + # Extraction: all other extraction types do affect cost + { + "i1": {"url": "https://a.example", "httpResponseBody": True}, + "o1": {"url": "https://a.example", "httpResponseBody": BODY}, + "i2": {"url": "https://a.example", "product": True}, + "o2": {"url": "https://a.example"}, + "cache": "miss", + }, + # Extraction: customAttributes, same method + { + "i1": { + "url": "https://a.example", + "customAttributes": { + "summary": { + "type": "string", + "description": "A two sentence article summary", + }, + }, + }, + "o1": {"url": "https://a.example"}, + "i2": { + "url": "https://a.example", + "customAttributes": { + "summary": { + "type": "string", + "description": "A two sentence article summary", + }, + }, + "customAttributesOptions": { + "method": "generate", + }, + }, + "o2": {"url": "https://a.example"}, + "cache": "hit", + }, + # Extraction: customAttributes, same method + { + "i1": { + "url": "https://a.example", + "customAttributes": { + "summary": { + "type": "string", + "description": "A two sentence article summary", + }, + }, + }, + "o1": {"url": "https://a.example"}, + "i2": { + "url": "https://a.example", + "customAttributes": { + "summary": { + "type": "string", + "description": "A two sentence article summary", + }, + }, + "customAttributesOptions": { + "method": "extract", + }, + }, + "o2": {"url": "https://a.example"}, + "cache": "miss", + }, + # Extraction: the number of custom attributes does not affect cost + { + "i1": { + "url": "https://a.example", + "customAttributes": { + "summary": { + "type": "string", + "description": "A two sentence article summary", + }, + }, + }, + "o1": {"url": "https://a.example"}, + "i2": { + "url": "https://a.example", + "customAttributes": { + "summary": { + "type": "string", + "description": "A two sentence article summary", + }, + "article_sentiment": { + "type": "string", + "enum": ["positive", "negative", "neutral"], + }, + }, + }, + "o2": {"url": "https://a.example"}, + "cache": "hit", + }, + # Extraction: extractFrom is assumed to be browser by default + { + "i1": { + "url": "https://a.example", + "product": True, + }, + "o1": {"url": "https://a.example"}, + "i2": { + "url": "https://a.example", + "product": True, + "productOptions": { + "extractFrom": "browserHtml", + }, + }, + "o2": {"url": "https://a.example"}, + "cache": "hit", + }, + # Extraction: extractFrom affects cost + { + "i1": { + "url": "https://a.example", + "product": True, + "productOptions": { + "extractFrom": "httpResponseBody", + }, + }, + "o1": {"url": "https://a.example"}, + "i2": { + "url": "https://a.example", + "product": True, + "productOptions": { + "extractFrom": "browserHtml", + }, + }, + "o2": {"url": "https://a.example"}, + "cache": "miss", + }, + ), +) +async def test_cache(scenario, mockserver): + """Requests that are expected to have the same cost (or cost modifiers) as + a preceding request should hit the cache. + + https://docs.zyte.com/zyte-api/pricing.html#request-costs + """ + client = AsyncClient(eth_key=KEY, api_url=mockserver.urljoin("/")) + with reset_x402_cache() as cache: + assert len(cache) == 0 + assert client.agg_stats.n_x402_req == 0 + + # Request 1 + actual_result = await client.get(scenario["i1"]) + assert actual_result == scenario["o1"] + assert len(cache) == 1 + assert client.agg_stats.n_x402_req == 1 + + # Request 2 + actual_result = await client.get(scenario["i2"]) + assert actual_result == scenario["o2"] + assert len(cache) == 2 if scenario["cache"] == "miss" else 1 + assert client.agg_stats.n_x402_req == len(cache) + + +@pytest.mark.skipif(not HAS_X402, reason="x402 not installed") +@pytest.mark.asyncio +@mock.patch("zyte_api._x402.MINIMIZE_REQUESTS", False) +async def test_no_cache(mockserver): + client = AsyncClient(eth_key=KEY, api_url=mockserver.urljoin("/")) + input = {"url": "https://a.example", "httpResponseBody": True} + output = { + "url": "https://a.example", + "httpResponseBody": BODY, + } + + with reset_x402_cache() as cache: + assert len(cache) == 0 + assert client.agg_stats.n_x402_req == 0 + + # Initial request + actual_result = await client.get(input) + assert actual_result == output + assert len(cache) == 0 + assert client.agg_stats.n_x402_req == 1 + + # Identical request + actual_result = await client.get(input) + assert actual_result == output + assert len(cache) == 0 + assert client.agg_stats.n_x402_req == 2 + + +@pytest.mark.skipif(not HAS_X402, reason="x402 not installed") +@pytest.mark.asyncio +async def test_4xx(mockserver): + """An unexpected status code lower than 500 raises RequestError + immediately.""" + client = AsyncClient(eth_key=KEY, api_url=mockserver.urljoin("/")) + input = {"url": "https://e404.example", "httpResponseBody": True} + + with reset_x402_cache() as cache: + assert len(cache) == 0 + assert client.agg_stats.n_x402_req == 0 + with pytest.raises(RequestError): + await client.get(input) + assert len(cache) == 0 + assert client.agg_stats.n_x402_req == 1 + + +@pytest.mark.skipif(not HAS_X402, reason="x402 not installed") +@pytest.mark.asyncio +async def test_5xx(mockserver): + """An unexpected status code ≥ 500 gets retried once.""" + client = AsyncClient(eth_key=KEY, api_url=mockserver.urljoin("/")) + input = {"url": "https://e500.example", "httpResponseBody": True} + + with reset_x402_cache() as cache: + assert len(cache) == 0 + assert client.agg_stats.n_x402_req == 0 + with pytest.raises(RequestError): + await client.get(input) + assert len(cache) == 0 + assert client.agg_stats.n_x402_req == 2 From 27a3b138e05c68a93ba3c5467fd9f48a51a1038b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 21:21:02 +0200 Subject: [PATCH 09/25] Use a custom retry policy for x402 initial requests --- zyte_api/_x402.py | 47 ++++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/zyte_api/_x402.py b/zyte_api/_x402.py index 935e420..be8864e 100644 --- a/zyte_api/_x402.py +++ b/zyte_api/_x402.py @@ -6,7 +6,10 @@ from typing import TYPE_CHECKING, Any from urllib.parse import urlparse +from tenacity import stop_after_attempt + from zyte_api._errors import RequestError +from zyte_api._retry import RetryFactory if TYPE_CHECKING: from asyncio import Semaphore @@ -99,6 +102,14 @@ def get_max_cost_hash(query: dict[str, Any]) -> bytes: return md5(json.dumps(data, sort_keys=True).encode()).digest() # noqa: S324 +class X402RetryFactory(RetryFactory): + # Disable ban response retries. + download_error_stop = stop_after_attempt(1) + + +X402_RETRYING = X402RetryFactory().build() + + class _x402Handler: def __init__( self, @@ -151,28 +162,26 @@ async def fetch_requirements( stats: AggStats, ) -> tuple[Any, str]: post_kwargs = {"url": url, "json": query, "headers": headers} - retried = False - while True: + + async def request(): stats.n_x402_req += 1 async with self.semaphore, post_fn(**post_kwargs) as response: - if response.status >= 500 and not retried: - retried = True - continue - if response.status != 402: - content = await response.read() - response.release() - raise RequestError( - request_info=response.request_info, - history=response.history, - status=response.status, - message=response.reason, - headers=response.headers, - response_content=content, - query=query, - ) - data = await response.json() - break + if response.status == 402: + return await response.json() + content = await response.read() + response.release() + raise RequestError( + request_info=response.request_info, + history=response.history, + status=response.status, + message=response.reason, + headers=response.headers, + response_content=content, + query=query, + ) + request = X402_RETRYING.wraps(request) + data = await request() payment_response = self.x402PaymentRequiredResponse(**data) requirements = self.client.select_payment_requirements(payment_response.accepts) version = payment_response.x402_version From e20d1ed7464463661b6129a7f71c87f2ae18e206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 21:23:10 +0200 Subject: [PATCH 10/25] Remove unneeded skip --- tests/test_main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_main.py b/tests/test_main.py index ba21bd5..8e9d371 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -290,7 +290,6 @@ def test_eth_key(mockserver): assert b"must be exactly 32 bytes long" in result.stderr -@pytest.mark.skipif(not HAS_X402, reason="x402 not installed") def test_no_key(mockserver): with NamedTemporaryFile("w") as url_list: url_list.write("https://a.example\n") From 9b42f982c1cb81f8bb0e665110666e8f72ae2500 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 21:31:09 +0200 Subject: [PATCH 11/25] Test command call with an env key --- tests/test_main.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_main.py b/tests/test_main.py index 8e9d371..3b30b62 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,6 +1,7 @@ import json import subprocess from json import JSONDecodeError +from os import environ from pathlib import Path from tempfile import NamedTemporaryFile from unittest.mock import AsyncMock, Mock, patch @@ -288,6 +289,7 @@ def test_eth_key(mockserver): check=False, ) assert b"must be exactly 32 bytes long" in result.stderr + assert result.returncode def test_no_key(mockserver): @@ -307,3 +309,24 @@ def test_no_key(mockserver): check=False, ) assert b"NoApiKey" in result.stderr + assert result.returncode + + +def test_env_key(mockserver): + with NamedTemporaryFile("w") as url_list: + url_list.write("https://a.example\n") + url_list.flush() + result = subprocess.run( + [ + "python", + "-m", + "zyte_api", + "--api-url", + mockserver.urljoin("/"), + url_list.name, + ], + capture_output=True, + check=False, + env={**environ, "ZYTE_API_KEY": "a"}, + ) + assert result.returncode == 0 From 12ec3a4b888623f82b62294ad57dcf9c40ae423d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 21:33:21 +0200 Subject: [PATCH 12/25] Test command when an Ethereum private key is set as an env var --- tests/test_main.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/test_main.py b/tests/test_main.py index 3b30b62..0068678 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -12,6 +12,7 @@ from zyte_api.aio.errors import RequestError from .test_x402 import HAS_X402 +from .test_x402 import KEY as ETH_KEY class MockRequestError(Exception): @@ -330,3 +331,23 @@ def test_env_key(mockserver): env={**environ, "ZYTE_API_KEY": "a"}, ) assert result.returncode == 0 + + +def test_env_eth_key(mockserver): + with NamedTemporaryFile("w") as url_list: + url_list.write("https://a.example\n") + url_list.flush() + result = subprocess.run( + [ + "python", + "-m", + "zyte_api", + "--api-url", + mockserver.urljoin("/"), + url_list.name, + ], + capture_output=True, + check=False, + env={**environ, "ZYTE_API_ETH_KEY": ETH_KEY}, + ) + assert (result.returncode == 0) if HAS_X402 else (result.returncode != 0) From ad3a1bbc4c83dd3b5c0550e765706d5bc5b89fa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 21:34:29 +0200 Subject: [PATCH 13/25] Keep mypy happy --- zyte_api/_x402.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zyte_api/_x402.py b/zyte_api/_x402.py index be8864e..a3cf339 100644 --- a/zyte_api/_x402.py +++ b/zyte_api/_x402.py @@ -104,7 +104,7 @@ def get_max_cost_hash(query: dict[str, Any]) -> bytes: class X402RetryFactory(RetryFactory): # Disable ban response retries. - download_error_stop = stop_after_attempt(1) + download_error_stop = stop_after_attempt(1) # type: ignore[assignment] X402_RETRYING = X402RetryFactory().build() From cae7b641f98bcf4814b1d303ce80163f6cc92022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 21:36:15 +0200 Subject: [PATCH 14/25] Fix min-x402 in CI --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1d7ee9e..1ace900 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,8 +20,8 @@ jobs: - python-version: "3.9" tox: min - python-version: "3.9" + - python-version: "3.10" tox: min-x402 - - python-version: "3.9" - python-version: "3.10" - python-version: "3.11" - python-version: "3.12" From 3720c9be72bbc5700d7bda241ea30c3bee4a4917 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 23 Jul 2025 21:55:54 +0200 Subject: [PATCH 15/25] Clarify required Python version --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index ba99831..e93ded4 100644 --- a/README.rst +++ b/README.rst @@ -43,7 +43,7 @@ Or, to use x402_: pip install zyte-api[x402] -.. note:: Python 3.9+ is required. +.. note:: Python 3.9+ is required; Python 3.10+ if using x402. .. install-end From 1f80f57ce21b507db0e9c698736ac39eb157d684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Fri, 25 Jul 2025 11:38:36 +0200 Subject: [PATCH 16/25] Let --eth-key take precedence over ZYTE_API_KEY --- tests/test_apikey.py | 15 ++++++ tests/test_async.py | 40 +++++++------- tests/test_auth.py | 121 +++++++++++++++++++++++++++++++++++++++++++ tests/test_main.py | 87 ------------------------------- tests/test_retry.py | 16 +++--- tests/test_utils.py | 8 +-- tox.ini | 4 +- zyte_api/_async.py | 34 +++++++----- zyte_api/_x402.py | 11 +--- 9 files changed, 192 insertions(+), 144 deletions(-) create mode 100644 tests/test_apikey.py create mode 100644 tests/test_auth.py diff --git a/tests/test_apikey.py b/tests/test_apikey.py new file mode 100644 index 0000000..46cbbc9 --- /dev/null +++ b/tests/test_apikey.py @@ -0,0 +1,15 @@ +import pytest + +from zyte_api.apikey import NoApiKey, get_apikey + + +def test_get_apikey(monkeypatch): + assert get_apikey("a") == "a" + with pytest.raises(NoApiKey): + get_apikey() + with pytest.raises(NoApiKey): + get_apikey(None) + monkeypatch.setenv("ZYTE_API_KEY", "b") + assert get_apikey("a") == "a" + assert get_apikey() == "b" + assert get_apikey(None) == "b" diff --git a/tests/test_async.py b/tests/test_async.py index c9f4c59..f54eff9 100644 --- a/tests/test_async.py +++ b/tests/test_async.py @@ -12,14 +12,14 @@ @pytest.mark.parametrize( "client_cls", - [ + ( AsyncZyteAPI, AsyncClient, - ], + ), ) @pytest.mark.parametrize( ("user_agent", "expected"), - [ + ( ( None, USER_AGENT, @@ -28,7 +28,7 @@ f"scrapy-zyte-api/0.11.1 {USER_AGENT}", f"scrapy-zyte-api/0.11.1 {USER_AGENT}", ), - ], + ), ) def test_user_agent(client_cls, user_agent, expected): client = client_cls(api_key="123", api_url="http:\\test", user_agent=user_agent) @@ -37,10 +37,10 @@ def test_user_agent(client_cls, user_agent, expected): @pytest.mark.parametrize( "client_cls", - [ + ( AsyncZyteAPI, AsyncClient, - ], + ), ) def test_api_key(client_cls): client_cls(api_key="a") @@ -50,10 +50,10 @@ def test_api_key(client_cls): @pytest.mark.parametrize( ("client_cls", "get_method"), - [ + ( (AsyncZyteAPI, "get"), (AsyncClient, "request_raw"), - ], + ), ) @pytest.mark.asyncio async def test_get(client_cls, get_method, mockserver): @@ -70,10 +70,10 @@ async def test_get(client_cls, get_method, mockserver): @pytest.mark.parametrize( ("client_cls", "get_method"), - [ + ( (AsyncZyteAPI, "get"), (AsyncClient, "request_raw"), - ], + ), ) @pytest.mark.asyncio async def test_get_request_error(client_cls, get_method, mockserver): @@ -94,10 +94,10 @@ async def test_get_request_error(client_cls, get_method, mockserver): @pytest.mark.parametrize( ("client_cls", "get_method"), - [ + ( (AsyncZyteAPI, "get"), (AsyncClient, "request_raw"), - ], + ), ) @pytest.mark.asyncio async def test_get_request_error_empty_body(client_cls, get_method, mockserver): @@ -113,10 +113,10 @@ async def test_get_request_error_empty_body(client_cls, get_method, mockserver): @pytest.mark.parametrize( ("client_cls", "get_method"), - [ + ( (AsyncZyteAPI, "get"), (AsyncClient, "request_raw"), - ], + ), ) @pytest.mark.asyncio async def test_get_request_error_non_json(client_cls, get_method, mockserver): @@ -132,10 +132,10 @@ async def test_get_request_error_non_json(client_cls, get_method, mockserver): @pytest.mark.parametrize( ("client_cls", "get_method"), - [ + ( (AsyncZyteAPI, "get"), (AsyncClient, "request_raw"), - ], + ), ) @pytest.mark.asyncio async def test_get_request_error_unexpected_json(client_cls, get_method, mockserver): @@ -151,10 +151,10 @@ async def test_get_request_error_unexpected_json(client_cls, get_method, mockser @pytest.mark.parametrize( ("client_cls", "iter_method"), - [ + ( (AsyncZyteAPI, "iter"), (AsyncClient, "request_parallel_as_completed"), - ], + ), ) @pytest.mark.asyncio async def test_iter(client_cls, iter_method, mockserver): @@ -192,10 +192,10 @@ async def test_iter(client_cls, iter_method, mockserver): @pytest.mark.parametrize( ("client_cls", "get_method", "iter_method"), - [ + ( (AsyncZyteAPI, "get", "iter"), (AsyncClient, "request_raw", "request_parallel_as_completed"), - ], + ), ) @pytest.mark.asyncio async def test_semaphore(client_cls, get_method, iter_method, mockserver): diff --git a/tests/test_auth.py b/tests/test_auth.py new file mode 100644 index 0000000..90ccbd1 --- /dev/null +++ b/tests/test_auth.py @@ -0,0 +1,121 @@ +from os import environ +from subprocess import run +from tempfile import NamedTemporaryFile + +import pytest + +from zyte_api._x402 import _x402Handler +from zyte_api.aio.client import AsyncClient + +from .test_x402 import HAS_X402 +from .test_x402 import KEY as ETH_KEY + +ETH_KEY_2 = ETH_KEY[-1] + ETH_KEY[:-1] +assert ETH_KEY_2 != ETH_KEY + + +def run_zyte_api(args, env, mockserver): + with NamedTemporaryFile("w") as url_list: + url_list.write("https://a.example\n") + url_list.flush() + return run( + [ + "python", + "-m", + "zyte_api", + "--api-url", + mockserver.urljoin("/"), + url_list.name, + *args, + ], + capture_output=True, + check=False, + env={**environ, **env}, + ) + + +@pytest.mark.parametrize( + ("scenario", "expected"), + ( + ({}, {"stderr": "NoApiKey"}), + ({"args": ["--api-key", "a"]}, {}), + ({"env": {"ZYTE_API_KEY": "a"}}, {}), + ( + {"args": ["--eth-key", ETH_KEY]}, + {} if HAS_X402 else {"stderr": "ModuleNotFoundError"}, + ), + ( + {"env": {"ZYTE_API_ETH_KEY": ETH_KEY}}, + {} if HAS_X402 else {"stderr": "ModuleNotFoundError"}, + ), + ), +) +def test(scenario, expected, mockserver): + result = run_zyte_api( + scenario.get("args", []), + scenario.get("env", {}), + mockserver, + ) + if "stderr" in expected: + assert expected["stderr"].encode() in result.stderr + assert result.returncode == 1 + else: + assert result.returncode == 0 + + +@pytest.mark.parametrize( + ("scenario", "expected"), + ( + ( + { + "kwargs": {"api_key": "a", "eth_key": ETH_KEY}, + "env": { + "ZYTE_API_KEY": "b", + "ZYTE_API_ETH_KEY": ETH_KEY_2, + }, + }, + {"key_type": "zyte", "key": "a"}, + ), + ( + { + "kwargs": {"eth_key": ETH_KEY}, + "env": { + "ZYTE_API_KEY": "b", + "ZYTE_API_ETH_KEY": ETH_KEY_2, + }, + }, + {"key_type": "eth", "key": ETH_KEY}, + ), + ( + { + "env": { + "ZYTE_API_KEY": "b", + "ZYTE_API_ETH_KEY": ETH_KEY_2, + }, + }, + {"key_type": "zyte", "key": "b"}, + ), + ( + { + "env": { + "ZYTE_API_ETH_KEY": ETH_KEY_2, + }, + }, + {"key_type": "eth", "key": ETH_KEY_2}, + ), + ), +) +def test_precedence(scenario, expected, monkeypatch): + for key, value in scenario.get("env", {}).items(): + monkeypatch.setenv(key, value) + if expected["key_type"] == "eth" and not HAS_X402: + with pytest.raises(ImportError): + AsyncClient(**scenario.get("kwargs", {})) + return + client = AsyncClient(**scenario.get("kwargs", {})) + if expected["key_type"] == "zyte": + assert client.auth == expected["key"] + else: + assert expected["key_type"] == "eth" + assert isinstance(client.auth, _x402Handler) + assert client.auth.client.account.key.hex() == expected["key"] diff --git a/tests/test_main.py b/tests/test_main.py index 0068678..12747b9 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,7 +1,6 @@ import json import subprocess from json import JSONDecodeError -from os import environ from pathlib import Path from tempfile import NamedTemporaryFile from unittest.mock import AsyncMock, Mock, patch @@ -11,9 +10,6 @@ from zyte_api.__main__ import run from zyte_api.aio.errors import RequestError -from .test_x402 import HAS_X402 -from .test_x402 import KEY as ETH_KEY - class MockRequestError(Exception): @property @@ -268,86 +264,3 @@ def test_run_non_json_response(mockserver): assert not result.returncode assert result.stdout == b"" assert b"json.decoder.JSONDecodeError" in result.stderr - - -@pytest.mark.skipif(not HAS_X402, reason="x402 not installed") -def test_eth_key(mockserver): - with NamedTemporaryFile("w") as url_list: - url_list.write("https://a.example\n") - url_list.flush() - result = subprocess.run( - [ - "python", - "-m", - "zyte_api", - "--eth-key", - "a", - "--api-url", - mockserver.urljoin("/"), - url_list.name, - ], - capture_output=True, - check=False, - ) - assert b"must be exactly 32 bytes long" in result.stderr - assert result.returncode - - -def test_no_key(mockserver): - with NamedTemporaryFile("w") as url_list: - url_list.write("https://a.example\n") - url_list.flush() - result = subprocess.run( - [ - "python", - "-m", - "zyte_api", - "--api-url", - mockserver.urljoin("/"), - url_list.name, - ], - capture_output=True, - check=False, - ) - assert b"NoApiKey" in result.stderr - assert result.returncode - - -def test_env_key(mockserver): - with NamedTemporaryFile("w") as url_list: - url_list.write("https://a.example\n") - url_list.flush() - result = subprocess.run( - [ - "python", - "-m", - "zyte_api", - "--api-url", - mockserver.urljoin("/"), - url_list.name, - ], - capture_output=True, - check=False, - env={**environ, "ZYTE_API_KEY": "a"}, - ) - assert result.returncode == 0 - - -def test_env_eth_key(mockserver): - with NamedTemporaryFile("w") as url_list: - url_list.write("https://a.example\n") - url_list.flush() - result = subprocess.run( - [ - "python", - "-m", - "zyte_api", - "--api-url", - mockserver.urljoin("/"), - url_list.name, - ], - capture_output=True, - check=False, - env={**environ, "ZYTE_API_ETH_KEY": ETH_KEY}, - ) - assert (result.returncode == 0) if HAS_X402 else (result.returncode != 0) diff --git a/tests/test_retry.py b/tests/test_retry.py index a41e15c..8bf8f7f 100644 --- a/tests/test_retry.py +++ b/tests/test_retry.py @@ -36,11 +36,11 @@ class OutlierException(RuntimeError): @pytest.mark.parametrize( ("value", "exception"), - [ + ( (UNSET, OutlierException), (True, OutlierException), (False, RequestError), - ], + ), ) @pytest.mark.asyncio async def test_get_handle_retries(value, exception, mockserver): @@ -64,13 +64,13 @@ def broken_stop(_): @pytest.mark.parametrize( ("retry_factory", "status", "waiter"), - [ + ( (RetryFactory, 429, "throttling"), (RetryFactory, 520, "download_error"), (AggressiveRetryFactory, 429, "throttling"), (AggressiveRetryFactory, 500, "undocumented_error"), (AggressiveRetryFactory, 520, "download_error"), - ], + ), ) @pytest.mark.asyncio async def test_retry_wait(retry_factory, status, waiter, mockserver): @@ -93,10 +93,10 @@ class CustomRetryFactory(retry_factory): @pytest.mark.parametrize( "retry_factory", - [ + ( RetryFactory, AggressiveRetryFactory, - ], + ), ) @pytest.mark.asyncio async def test_retry_wait_network_error(retry_factory): @@ -150,7 +150,7 @@ def __call__(self, number, add=0): @pytest.mark.parametrize( ("retrying", "outcomes", "exhausted"), - [ + ( # Shared behaviors of all retry policies *( (retrying, outcomes, exhausted) @@ -395,7 +395,7 @@ def __call__(self, number, add=0): ), ) ), - ], + ), ) @pytest.mark.asyncio @patch("time.monotonic") diff --git a/tests/test_utils.py b/tests/test_utils.py index 76eab08..01e59ed 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -16,7 +16,7 @@ async def test_create_session_custom_connector(): @pytest.mark.parametrize( ("file_name", "first_line", "expected"), - [ + ( ( "", "https://toscrape.com", @@ -62,7 +62,7 @@ async def test_create_session_custom_connector(): '{"url": "https://toscrape.com"}', "jl", ), - ], + ), ) def test_guess_intype(file_name, first_line, expected): assert _guess_intype(file_name, [first_line]) == expected @@ -70,7 +70,7 @@ def test_guess_intype(file_name, first_line, expected): @pytest.mark.parametrize( ("input", "output"), - [ + ( # Unsafe URLs in the url field are modified, while left untouched on # other fields. ( @@ -102,7 +102,7 @@ def test_guess_intype(file_name, first_line, expected): ), # NOTE: We use w3lib.url.safe_url_string for escaping. Tests covering # the URL escaping logic exist upstream. - ], + ), ) def test_process_query(input, output): assert _process_query(input) == output diff --git a/tox.ini b/tox.ini index a669d2b..7011cd0 100644 --- a/tox.ini +++ b/tox.ini @@ -66,8 +66,8 @@ commands = pre-commit run --all-files --show-diff-on-failure [testenv:twine] deps = - twine==5.1.1 - build==1.2.2 + twine==6.1.0 + build==1.2.2.post1 commands = python -m build --sdist twine check dist/* diff --git a/zyte_api/_async.py b/zyte_api/_async.py index 91cf7e1..962d2a2 100644 --- a/zyte_api/_async.py +++ b/zyte_api/_async.py @@ -4,18 +4,20 @@ import time from asyncio import Future from functools import partial +from os import environ from typing import TYPE_CHECKING, Any import aiohttp from tenacity import AsyncRetrying from zyte_api._x402 import _x402Handler +from zyte_api.apikey import NoApiKey from ._errors import RequestError from ._retry import zyte_api_retrying from ._utils import _AIO_API_TIMEOUT, create_session -from .apikey import NoApiKey, get_apikey from .constants import API_URL +from .constants import ENV_VARIABLE as API_KEY_ENV_VAR from .stats import AggStats, ResponseStats from .utils import USER_AGENT, _process_query @@ -107,18 +109,24 @@ def __init__( self.retrying = retrying or zyte_api_retrying self.user_agent = user_agent or USER_AGENT self._semaphore = asyncio.Semaphore(n_conn) - - try: - self.auth: str | _x402Handler = get_apikey(api_key) - except NoApiKey: - try: - self.auth = _x402Handler(eth_key, self._semaphore, self.agg_stats) - except KeyError: - raise NoApiKey( - "You must provide either a Zyte API key or an Ethereum " - "private key. For the latter, you must also install " - "zyte-api as zyte-api[x402]." - ) from None + self.auth: str | _x402Handler + self._load_auth(api_key, eth_key) + + def _load_auth(self, api_key: str | None, eth_key: str | None): + if api_key: + self.auth = api_key + elif eth_key: + self.auth = _x402Handler(eth_key, self._semaphore, self.agg_stats) + elif api_key := environ.get(API_KEY_ENV_VAR): + self.auth = api_key + elif eth_key := environ.get("ZYTE_API_ETH_KEY"): + self.auth = _x402Handler(eth_key, self._semaphore, self.agg_stats) + else: + raise NoApiKey( + "You must provide either a Zyte API key or an Ethereum " + "private key. For the latter, you must also install " + "zyte-api as zyte-api[x402]." + ) async def get( self, diff --git a/zyte_api/_x402.py b/zyte_api/_x402.py index a3cf339..4946e81 100644 --- a/zyte_api/_x402.py +++ b/zyte_api/_x402.py @@ -21,7 +21,6 @@ from zyte_api.stats import AggStats CACHE: dict[bytes, tuple[Any, str]] = {} -ENV_VARIABLE = "ZYTE_API_ETH_KEY" EXTRACT_KEYS = { "article", "articleList", @@ -66,12 +65,6 @@ def may_use_browser(query: dict[str, Any]) -> bool: return not query.get("httpResponseBody") -def _get_eth_key(key: str | None = None) -> str: - if key is not None: - return key - return environ[ENV_VARIABLE] - - def get_max_cost_hash(query: dict[str, Any]) -> bytes: """Returns a hash based on *query* that should be the same for queries whose estimate costs are the same. @@ -113,12 +106,10 @@ class X402RetryFactory(RetryFactory): class _x402Handler: def __init__( self, - eth_key: str | None, + eth_key: str, semaphore: Semaphore, stats: AggStats, ): - eth_key = _get_eth_key(eth_key) - from eth_account import Account from x402.clients import x402Client from x402.types import x402PaymentRequiredResponse From dec667f2194570d602684fa0fd4623ee1955d487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Fri, 25 Jul 2025 20:32:20 +0200 Subject: [PATCH 17/25] Handle race conditions and unexpected server responses --- docs/use/x402.rst | 21 +++++-- tests/mockserver.py | 50 +++++++++++++++ tests/test_auth.py | 6 +- tests/test_x402.py | 150 +++++++++++++++++++++++++++++++++++++------- zyte_api/_async.py | 20 +++--- zyte_api/_retry.py | 13 ++++ zyte_api/_x402.py | 53 +++++++++++----- zyte_api/errors.py | 17 ++++- zyte_api/stats.py | 2 +- 9 files changed, 280 insertions(+), 52 deletions(-) diff --git a/docs/use/x402.rst b/docs/use/x402.rst index 0859e9d..e87952c 100644 --- a/docs/use/x402.rst +++ b/docs/use/x402.rst @@ -5,14 +5,25 @@ x402 ==== It is possible to use :ref:`Zyte API ` without a Zyte API account by -using the x402_ protocol to handle payments. +using the x402_ protocol to handle payments: -During :ref:`installation `, make sure to install the ``x402`` extra. +#. Read the `Zyte Terms of Service`_. By using Zyte API, you are accepting + them. -Then, configure the *private* key of your Ethereum_ account as follows so that -it can be used to authorize payments. + .. _Zyte Terms of Service: https://www.zyte.com/terms-policies/terms-of-service/ -.. _Ethereum: https://ethereum.org/ +#. During :ref:`installation `, make sure to install the ``x402`` extra. + +#. :ref:`Configure ` the *private* key of your Ethereum_ account to + authorize payments. + + .. _Ethereum: https://ethereum.org/ + + +.. _eth-key: + +Configuring your Ethereum private key +===================================== It is recommended to configure your Ethereum private key through an environment variable, so that it can be picked by both the :ref:`command-line client diff --git a/tests/mockserver.py b/tests/mockserver.py index 773a241..8b1aed0 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -4,6 +4,7 @@ import sys import time from base64 import b64encode +from collections import defaultdict from importlib import import_module from subprocess import PIPE, Popen from typing import Any @@ -86,6 +87,8 @@ def _delayedRender(self, request): "error": "Use basic auth or x402", } +WORKFLOWS: defaultdict[str, dict[str, Any]] = defaultdict(dict) + class DefaultResource(Resource): request_count = 0 @@ -153,6 +156,53 @@ def render_POST(self, request): request.setResponseCode(402) return json.dumps(RESPONSE_402).encode() + echo_data = request_data.get("echoData") + if echo_data: + session_data = WORKFLOWS.setdefault(echo_data, {}) + if echo_data == "402-payment-retry": + assert request.getHeader("X-Payment") + # Return 402 on the first request, then 200 on the second + if not session_data: + session_data["payment_attempts"] = 1 + request.setResponseCode(402) + return json.dumps(RESPONSE_402).encode() + elif echo_data == "402-payment-retry-exceeded": + assert request.getHeader("X-Payment") + # Return 402 on the first 2 requests, then 200 on the third + # (the client will give up after 2 attempts, so the will be no + # third in practice) + if not session_data: + session_data["payment_attempts"] = 1 + session_data["payment"] = request.getHeader("X-Payment") + request.setResponseCode(402) + return json.dumps(RESPONSE_402).encode() + if session_data["payment_attempts"] == 1: + session_data["payment_attempts"] = 2 + # Make sure the client refreshed the payment header + assert session_data["payment"] != request.getHeader("X-Payment") + request.setResponseCode(402) + return json.dumps(RESPONSE_402).encode() + elif echo_data == "402-no-payment-retry": + assert not request.getHeader("X-Payment") + # Return 402 on the first request, then 200 on the second + if not session_data: + session_data["payment_attempts"] = 1 + request.setResponseCode(402) + return json.dumps(RESPONSE_402).encode() + elif echo_data == "402-no-payment-retry-exceeded": + assert not request.getHeader("X-Payment") + # Return 402 on the first 2 requests, then 200 on the third + # (the client will give up after 2 attempts, so the will be no + # third in practice) + if not session_data: + session_data["payment_attempts"] = 1 + request.setResponseCode(402) + return json.dumps(RESPONSE_402).encode() + if session_data["payment_attempts"] == 1: + session_data["payment_attempts"] = 2 + request.setResponseCode(402) + return json.dumps(RESPONSE_402).encode() + response_data: dict[str, Any] = { "url": url, } diff --git a/tests/test_auth.py b/tests/test_auth.py index 90ccbd1..7bcf6f8 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -4,8 +4,8 @@ import pytest +from zyte_api import AsyncZyteAPI from zyte_api._x402 import _x402Handler -from zyte_api.aio.client import AsyncClient from .test_x402 import HAS_X402 from .test_x402 import KEY as ETH_KEY @@ -110,9 +110,9 @@ def test_precedence(scenario, expected, monkeypatch): monkeypatch.setenv(key, value) if expected["key_type"] == "eth" and not HAS_X402: with pytest.raises(ImportError): - AsyncClient(**scenario.get("kwargs", {})) + AsyncZyteAPI(**scenario.get("kwargs", {})) return - client = AsyncClient(**scenario.get("kwargs", {})) + client = AsyncZyteAPI(**scenario.get("kwargs", {})) if expected["key_type"] == "zyte": assert client.auth == expected["key"] else: diff --git a/tests/test_x402.py b/tests/test_x402.py index 8ec775c..08cbd48 100644 --- a/tests/test_x402.py +++ b/tests/test_x402.py @@ -5,8 +5,8 @@ import pytest +from zyte_api import AsyncZyteAPI from zyte_api._errors import RequestError -from zyte_api.aio.client import AsyncClient from .mockserver import SCREENSHOT @@ -18,28 +18,28 @@ def test_eth_key_param(): if HAS_X402: - AsyncClient(eth_key=KEY) + AsyncZyteAPI(eth_key=KEY) else: with pytest.raises(ImportError, match="No module named 'eth_account'"): - AsyncClient(eth_key=KEY) + AsyncZyteAPI(eth_key=KEY) @mock.patch.dict(environ, {"ZYTE_API_ETH_KEY": KEY}) def test_eth_key_env_var(): if HAS_X402: - AsyncClient() + AsyncZyteAPI() else: with pytest.raises(ImportError, match="No module named 'eth_account'"): - AsyncClient() + AsyncZyteAPI() def test_eth_key_short(): if HAS_X402: with pytest.raises(ValueError, match="must be exactly 32 bytes long"): - AsyncClient(eth_key="a") + AsyncZyteAPI(eth_key="a") else: with pytest.raises(ImportError, match="No module named 'eth_account'"): - AsyncClient(eth_key="a") + AsyncZyteAPI(eth_key="a") @contextlib.contextmanager @@ -351,29 +351,29 @@ async def test_cache(scenario, mockserver): https://docs.zyte.com/zyte-api/pricing.html#request-costs """ - client = AsyncClient(eth_key=KEY, api_url=mockserver.urljoin("/")) + client = AsyncZyteAPI(eth_key=KEY, api_url=mockserver.urljoin("/")) with reset_x402_cache() as cache: assert len(cache) == 0 - assert client.agg_stats.n_x402_req == 0 + assert client.agg_stats.n_402_req == 0 # Request 1 actual_result = await client.get(scenario["i1"]) assert actual_result == scenario["o1"] assert len(cache) == 1 - assert client.agg_stats.n_x402_req == 1 + assert client.agg_stats.n_402_req == 1 # Request 2 actual_result = await client.get(scenario["i2"]) assert actual_result == scenario["o2"] assert len(cache) == 2 if scenario["cache"] == "miss" else 1 - assert client.agg_stats.n_x402_req == len(cache) + assert client.agg_stats.n_402_req == len(cache) @pytest.mark.skipif(not HAS_X402, reason="x402 not installed") @pytest.mark.asyncio @mock.patch("zyte_api._x402.MINIMIZE_REQUESTS", False) async def test_no_cache(mockserver): - client = AsyncClient(eth_key=KEY, api_url=mockserver.urljoin("/")) + client = AsyncZyteAPI(eth_key=KEY, api_url=mockserver.urljoin("/")) input = {"url": "https://a.example", "httpResponseBody": True} output = { "url": "https://a.example", @@ -382,19 +382,19 @@ async def test_no_cache(mockserver): with reset_x402_cache() as cache: assert len(cache) == 0 - assert client.agg_stats.n_x402_req == 0 + assert client.agg_stats.n_402_req == 0 # Initial request actual_result = await client.get(input) assert actual_result == output assert len(cache) == 0 - assert client.agg_stats.n_x402_req == 1 + assert client.agg_stats.n_402_req == 1 # Identical request actual_result = await client.get(input) assert actual_result == output assert len(cache) == 0 - assert client.agg_stats.n_x402_req == 2 + assert client.agg_stats.n_402_req == 2 @pytest.mark.skipif(not HAS_X402, reason="x402 not installed") @@ -402,29 +402,137 @@ async def test_no_cache(mockserver): async def test_4xx(mockserver): """An unexpected status code lower than 500 raises RequestError immediately.""" - client = AsyncClient(eth_key=KEY, api_url=mockserver.urljoin("/")) + client = AsyncZyteAPI(eth_key=KEY, api_url=mockserver.urljoin("/")) input = {"url": "https://e404.example", "httpResponseBody": True} with reset_x402_cache() as cache: assert len(cache) == 0 - assert client.agg_stats.n_x402_req == 0 + assert client.agg_stats.n_402_req == 0 with pytest.raises(RequestError): await client.get(input) assert len(cache) == 0 - assert client.agg_stats.n_x402_req == 1 + assert client.agg_stats.n_402_req == 1 @pytest.mark.skipif(not HAS_X402, reason="x402 not installed") @pytest.mark.asyncio async def test_5xx(mockserver): """An unexpected status code ≥ 500 gets retried once.""" - client = AsyncClient(eth_key=KEY, api_url=mockserver.urljoin("/")) + client = AsyncZyteAPI(eth_key=KEY, api_url=mockserver.urljoin("/")) input = {"url": "https://e500.example", "httpResponseBody": True} with reset_x402_cache() as cache: assert len(cache) == 0 - assert client.agg_stats.n_x402_req == 0 + assert client.agg_stats.n_402_req == 0 with pytest.raises(RequestError): await client.get(input) assert len(cache) == 0 - assert client.agg_stats.n_x402_req == 2 + assert client.agg_stats.n_402_req == 2 + + +@pytest.mark.skipif(not HAS_X402, reason="x402 not installed") +@pytest.mark.asyncio +async def test_payment_retry(mockserver): + client = AsyncZyteAPI(eth_key=KEY, api_url=mockserver.urljoin("/")) + input = { + "url": "https://a.example", + "httpResponseBody": True, + "echoData": "402-payment-retry", + } + + with reset_x402_cache() as cache: + assert len(cache) == 0 + assert client.agg_stats.n_402_req == 0 + data = await client.get(input) + assert len(cache) == 1 + + assert data == {"httpResponseBody": BODY, "url": "https://a.example"} + assert client.agg_stats.n_success == 1 + assert client.agg_stats.n_fatal_errors == 0 + assert client.agg_stats.n_attempts == 2 + assert client.agg_stats.n_errors == 1 + assert client.agg_stats.n_402_req == 1 + assert client.agg_stats.status_codes == {402: 1, 200: 1} + assert client.agg_stats.exception_types == {} + assert client.agg_stats.api_error_types == {"use-basic-auth-or-x402": 1} + + +@pytest.mark.skipif(not HAS_X402, reason="x402 not installed") +@pytest.mark.asyncio +async def test_payment_retry_exceeded(mockserver): + client = AsyncZyteAPI(eth_key=KEY, api_url=mockserver.urljoin("/")) + input = { + "url": "https://a.example", + "httpResponseBody": True, + "echoData": "402-payment-retry-exceeded", + } + + with reset_x402_cache() as cache: + assert len(cache) == 0 + assert client.agg_stats.n_402_req == 0 + with pytest.raises(RequestError): + await client.get(input) + assert len(cache) == 1 + + assert client.agg_stats.n_success == 0 + assert client.agg_stats.n_fatal_errors == 1 + assert client.agg_stats.n_attempts == 2 + assert client.agg_stats.n_errors == 2 + assert client.agg_stats.n_402_req == 1 + assert client.agg_stats.status_codes == {402: 2} + assert client.agg_stats.exception_types == {} + assert client.agg_stats.api_error_types == {"use-basic-auth-or-x402": 2} + + +@pytest.mark.asyncio +async def test_no_payment_retry(mockserver): + """An HTTP 402 response received out of the context of the x402 protocol, + as a response to a regular request using basic auth.""" + client = AsyncZyteAPI(api_key="a", api_url=mockserver.urljoin("/")) + input = { + "url": "https://a.example", + "httpResponseBody": True, + "echoData": "402-no-payment-retry", + } + + with reset_x402_cache() as cache: + assert len(cache) == 0 + assert client.agg_stats.n_402_req == 0 + data = await client.get(input) + assert len(cache) == 0 + + assert data == {"httpResponseBody": BODY, "url": "https://a.example"} + assert client.agg_stats.n_success == 1 + assert client.agg_stats.n_fatal_errors == 0 + assert client.agg_stats.n_attempts == 2 + assert client.agg_stats.n_errors == 1 + assert client.agg_stats.n_402_req == 0 + assert client.agg_stats.status_codes == {402: 1, 200: 1} + assert client.agg_stats.exception_types == {} + assert client.agg_stats.api_error_types == {"use-basic-auth-or-x402": 1} + + +@pytest.mark.asyncio +async def test_no_payment_retry_exceeded(mockserver): + client = AsyncZyteAPI(api_key="a", api_url=mockserver.urljoin("/")) + input = { + "url": "https://a.example", + "httpResponseBody": True, + "echoData": "402-no-payment-retry-exceeded", + } + + with reset_x402_cache() as cache: + assert len(cache) == 0 + assert client.agg_stats.n_402_req == 0 + with pytest.raises(RequestError): + await client.get(input) + assert len(cache) == 0 + + assert client.agg_stats.n_success == 0 + assert client.agg_stats.n_fatal_errors == 1 + assert client.agg_stats.n_attempts == 2 + assert client.agg_stats.n_errors == 2 + assert client.agg_stats.n_402_req == 0 + assert client.agg_stats.status_codes == {402: 2} + assert client.agg_stats.exception_types == {} + assert client.agg_stats.api_error_types == {"use-basic-auth-or-x402": 2} diff --git a/zyte_api/_async.py b/zyte_api/_async.py index 962d2a2..c1f7fc4 100644 --- a/zyte_api/_async.py +++ b/zyte_api/_async.py @@ -152,6 +152,13 @@ async def get( x402_headers = await self.auth.get_headers(url, query, headers, post) headers.update(x402_headers) + post_kwargs = { + "url": url, + "json": query, + "headers": headers, + **auth_kwargs, + } + response_stats = [] start_global = time.perf_counter() @@ -159,16 +166,15 @@ async def request(): stats = ResponseStats.create(start_global) self.agg_stats.n_attempts += 1 - post_kwargs = { - "url": url, - "json": query, - "headers": headers, - **auth_kwargs, - } - try: async with self._semaphore, post(**post_kwargs) as resp: stats.record_connected(resp.status, self.agg_stats) + if ( + resp.status == 402 + and isinstance(self.auth, _x402Handler) + and "X-Payment" in post_kwargs["headers"] + ): + self.auth.refresh_post_kwargs(post_kwargs, await resp.json()) if resp.status >= 400: content = await resp.read() resp.release() diff --git a/zyte_api/_retry.py b/zyte_api/_retry.py index a6a82e1..430d380 100644 --- a/zyte_api/_retry.py +++ b/zyte_api/_retry.py @@ -19,6 +19,7 @@ retry_if_exception, wait_chain, wait_fixed, + wait_none, wait_random, wait_random_exponential, ) @@ -159,6 +160,10 @@ def _undocumented_error(exc: BaseException) -> bool: ) +def _402_error(exc: BaseException) -> bool: + return isinstance(exc, RequestError) and exc.status == 402 + + def _deprecated(message: str, callable: Callable) -> Callable: def wrapper(factory, retry_state: RetryCallState): warn(message, DeprecationWarning, stacklevel=3) @@ -200,6 +205,7 @@ class CustomRetryFactory(RetryFactory): | retry_if_exception(_is_network_error) | retry_if_exception(_download_error) | retry_if_exception(_undocumented_error) + | retry_if_exception(_402_error) ) # throttling throttling_wait = wait_chain( @@ -244,6 +250,9 @@ class CustomRetryFactory(RetryFactory): undocumented_error_stop = stop_on_count(2) undocumented_error_wait = network_error_wait + x402_error_stop = stop_on_count(2) + x402_error_wait = wait_none() + def wait(self, retry_state: RetryCallState) -> float: assert retry_state.outcome, "Unexpected empty outcome" exc = retry_state.outcome.exception() @@ -254,6 +263,8 @@ def wait(self, retry_state: RetryCallState) -> float: return self.network_error_wait(retry_state=retry_state) if _undocumented_error(exc): return self.undocumented_error_wait(retry_state=retry_state) + if _402_error(exc): + return self.x402_error_wait(retry_state=retry_state) assert _download_error(exc) # See retry_condition return self.download_error_wait(retry_state=retry_state) @@ -267,6 +278,8 @@ def stop(self, retry_state: RetryCallState) -> bool: return self.network_error_stop(retry_state) if _undocumented_error(exc): return self.undocumented_error_stop(retry_state) + if _402_error(exc): + return self.x402_error_stop(retry_state) assert _download_error(exc) # See retry_condition return self.download_error_stop(retry_state) diff --git a/zyte_api/_x402.py b/zyte_api/_x402.py index 4946e81..b7d506c 100644 --- a/zyte_api/_x402.py +++ b/zyte_api/_x402.py @@ -127,35 +127,45 @@ async def get_headers( headers: dict[str, str], post_fn: Callable[..., AbstractAsyncContextManager[ClientResponse]], ) -> dict[str, str]: - if MINIMIZE_REQUESTS: - max_cost_hash = get_max_cost_hash(query) - if max_cost_hash not in CACHE: - CACHE[max_cost_hash] = await self.fetch_requirements( - url, query, headers, post_fn, self.stats - ) - requirements, version = CACHE[max_cost_hash] - else: - requirements, version = await self.fetch_requirements( - url, query, headers, post_fn, self.stats - ) - payment_header = self.client.create_payment_header(requirements, version) + requirement_data = await self.get_requirement_data(url, query, headers, post_fn) + return self.get_headers_from_requirement_data(requirement_data) + + def get_headers_from_requirement_data( + self, requirement_data: tuple[Any, str] + ) -> dict[str, str]: + payment_header = self.client.create_payment_header(*requirement_data) return { "Access-Control-Expose-Headers": "X-Payment-Response", "X-Payment": payment_header, } + async def get_requirement_data( + self, + url: str, + query: dict[str, Any], + headers: dict[str, str], + post_fn: Callable[..., AbstractAsyncContextManager[ClientResponse]], + ) -> tuple[Any, str]: + if not MINIMIZE_REQUESTS: + return await self.fetch_requirements(url, query, headers, post_fn) + max_cost_hash = get_max_cost_hash(query) + if max_cost_hash not in CACHE: + CACHE[max_cost_hash] = await self.fetch_requirements( + url, query, headers, post_fn + ) + return CACHE[max_cost_hash] + async def fetch_requirements( self, url: str, query: dict[str, Any], headers: dict[str, str], post_fn: Callable[..., AbstractAsyncContextManager[ClientResponse]], - stats: AggStats, ) -> tuple[Any, str]: post_kwargs = {"url": url, "json": query, "headers": headers} async def request(): - stats.n_x402_req += 1 + self.stats.n_402_req += 1 async with self.semaphore, post_fn(**post_kwargs) as response: if response.status == 402: return await response.json() @@ -173,7 +183,22 @@ async def request(): request = X402_RETRYING.wraps(request) data = await request() + return self.parse_requirements(data) + + def parse_requirements(self, data: dict[str, Any]) -> tuple[Any, str]: payment_response = self.x402PaymentRequiredResponse(**data) requirements = self.client.select_payment_requirements(payment_response.accepts) version = payment_response.x402_version return requirements, version + + def refresh_post_kwargs( + self, + post_kwargs: dict[str, Any], + response_data: dict[str, Any], + ) -> None: + requirement_data = self.parse_requirements(response_data) + if MINIMIZE_REQUESTS: + max_cost_hash = get_max_cost_hash(post_kwargs["json"]) + CACHE[max_cost_hash] = requirement_data + headers = self.get_headers_from_requirement_data(requirement_data) + post_kwargs["headers"] = {**post_kwargs["headers"], **headers} diff --git a/zyte_api/errors.py b/zyte_api/errors.py index 0248e2c..8f3373c 100644 --- a/zyte_api/errors.py +++ b/zyte_api/errors.py @@ -6,6 +6,10 @@ import attr +def _to_kebab_case(s: str) -> str: + return s.strip().replace(" ", "-").replace("_", "-").lower() + + @attr.s(auto_attribs=True) class ParsedError: """Parsed error response body from Zyte API.""" @@ -45,4 +49,15 @@ def from_body(cls, response_body: bytes) -> ParsedError: def type(self) -> Optional[str]: """ID of the error type, e.g. ``"/limits/over-user-limit"`` or ``"/download/temporary-error"``.""" - return (self.data or {}).get("type", None) + data = self.data or {} + if "type" in data: + return data["type"] + if "error" in data and isinstance(data["error"], str): # HTTP 402 + try: + prefix, _ = data["error"].split(":", 1) + except ValueError: + prefix = data["error"] + if len(prefix) > 32: + return None + return _to_kebab_case(prefix) + return None diff --git a/zyte_api/stats.py b/zyte_api/stats.py index c4acf51..e52edfb 100644 --- a/zyte_api/stats.py +++ b/zyte_api/stats.py @@ -37,7 +37,7 @@ def __init__(self): ) self.n_429 = 0 # number of 429 (throttling) responses self.n_errors = 0 # number of errors, including errors which were retried - self.n_x402_req = 0 # x402 requests for payment requirements + self.n_402_req = 0 # requests for a 402 (payment required) response self.status_codes = Counter() self.exception_types = Counter() From a5b5e406b10a9aa0de6f6e0e7f9e0bf8a6447733 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Fri, 25 Jul 2025 20:49:13 +0200 Subject: [PATCH 18/25] Improve error mapping --- tests/test_x402.py | 8 ++++---- zyte_api/errors.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_x402.py b/tests/test_x402.py index 08cbd48..8a53855 100644 --- a/tests/test_x402.py +++ b/tests/test_x402.py @@ -454,7 +454,7 @@ async def test_payment_retry(mockserver): assert client.agg_stats.n_402_req == 1 assert client.agg_stats.status_codes == {402: 1, 200: 1} assert client.agg_stats.exception_types == {} - assert client.agg_stats.api_error_types == {"use-basic-auth-or-x402": 1} + assert client.agg_stats.api_error_types == {"/x402/use-basic-auth-or-x402": 1} @pytest.mark.skipif(not HAS_X402, reason="x402 not installed") @@ -481,7 +481,7 @@ async def test_payment_retry_exceeded(mockserver): assert client.agg_stats.n_402_req == 1 assert client.agg_stats.status_codes == {402: 2} assert client.agg_stats.exception_types == {} - assert client.agg_stats.api_error_types == {"use-basic-auth-or-x402": 2} + assert client.agg_stats.api_error_types == {"/x402/use-basic-auth-or-x402": 2} @pytest.mark.asyncio @@ -509,7 +509,7 @@ async def test_no_payment_retry(mockserver): assert client.agg_stats.n_402_req == 0 assert client.agg_stats.status_codes == {402: 1, 200: 1} assert client.agg_stats.exception_types == {} - assert client.agg_stats.api_error_types == {"use-basic-auth-or-x402": 1} + assert client.agg_stats.api_error_types == {"/x402/use-basic-auth-or-x402": 1} @pytest.mark.asyncio @@ -535,4 +535,4 @@ async def test_no_payment_retry_exceeded(mockserver): assert client.agg_stats.n_402_req == 0 assert client.agg_stats.status_codes == {402: 2} assert client.agg_stats.exception_types == {} - assert client.agg_stats.api_error_types == {"use-basic-auth-or-x402": 2} + assert client.agg_stats.api_error_types == {"/x402/use-basic-auth-or-x402": 2} diff --git a/zyte_api/errors.py b/zyte_api/errors.py index 8f3373c..ff7d19d 100644 --- a/zyte_api/errors.py +++ b/zyte_api/errors.py @@ -59,5 +59,5 @@ def type(self) -> Optional[str]: prefix = data["error"] if len(prefix) > 32: return None - return _to_kebab_case(prefix) + return f"/x402/{_to_kebab_case(prefix)}" return None From 433dbc8b3b842c547f25020145d8cc0ade809ef8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 28 Jul 2025 22:01:15 +0200 Subject: [PATCH 19/25] Complete test coverage, fix backward compatibility, and add client.auth: AuthInfo --- tests/mockserver.py | 13 ++++++++++- tests/test_auth.py | 14 +++++++----- tests/test_x402.py | 37 ++++++++++++++++++++++++++++++ zyte_api/_async.py | 55 ++++++++++++++++++++++++++++++++++++--------- 4 files changed, 103 insertions(+), 16 deletions(-) diff --git a/tests/mockserver.py b/tests/mockserver.py index 8b1aed0..8f3ef01 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -159,7 +159,7 @@ def render_POST(self, request): echo_data = request_data.get("echoData") if echo_data: session_data = WORKFLOWS.setdefault(echo_data, {}) - if echo_data == "402-payment-retry": + if echo_data in {"402-payment-retry", "402-payment-retry-2"}: assert request.getHeader("X-Payment") # Return 402 on the first request, then 200 on the second if not session_data: @@ -202,6 +202,17 @@ def render_POST(self, request): session_data["payment_attempts"] = 2 request.setResponseCode(402) return json.dumps(RESPONSE_402).encode() + elif echo_data == "402-long-error": + request.setResponseCode(402) + response_data = { + **RESPONSE_402, + "error": ( + "This is a long error message that exceeds the 32 " + "character limit for the error type prefix. It should " + "not be parsed as an error type." + ), + } + return json.dumps(response_data).encode() response_data: dict[str, Any] = { "url": url, diff --git a/tests/test_auth.py b/tests/test_auth.py index 7bcf6f8..5807c44 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -5,7 +5,6 @@ import pytest from zyte_api import AsyncZyteAPI -from zyte_api._x402 import _x402Handler from .test_x402 import HAS_X402 from .test_x402 import KEY as ETH_KEY @@ -113,9 +112,14 @@ def test_precedence(scenario, expected, monkeypatch): AsyncZyteAPI(**scenario.get("kwargs", {})) return client = AsyncZyteAPI(**scenario.get("kwargs", {})) + assert client.auth.key_type == expected["key_type"] + assert client.auth.key == expected["key"] if expected["key_type"] == "zyte": - assert client.auth == expected["key"] + with pytest.warns(DeprecationWarning, match="api_key property is deprecated"): + assert client.api_key == expected["key"] else: - assert expected["key_type"] == "eth" - assert isinstance(client.auth, _x402Handler) - assert client.auth.client.account.key.hex() == expected["key"] + with pytest.raises( + NotImplementedError, + match="api_key is not available when using an Ethereum private key", + ): + client.api_key # noqa: B018 diff --git a/tests/test_x402.py b/tests/test_x402.py index 8a53855..83be18c 100644 --- a/tests/test_x402.py +++ b/tests/test_x402.py @@ -396,6 +396,17 @@ async def test_no_cache(mockserver): assert len(cache) == 0 assert client.agg_stats.n_402_req == 2 + # Request refresh + input = { + "url": "https://a.example", + "httpResponseBody": True, + "echoData": "402-payment-retry-2", + } + actual_result = await client.get(input) + assert actual_result == output + assert len(cache) == 0 + assert client.agg_stats.n_402_req == 3 + @pytest.mark.skipif(not HAS_X402, reason="x402 not installed") @pytest.mark.asyncio @@ -536,3 +547,29 @@ async def test_no_payment_retry_exceeded(mockserver): assert client.agg_stats.status_codes == {402: 2} assert client.agg_stats.exception_types == {} assert client.agg_stats.api_error_types == {"/x402/use-basic-auth-or-x402": 2} + + +@pytest.mark.asyncio +async def test_long_error(mockserver): + client = AsyncZyteAPI(api_key="a", api_url=mockserver.urljoin("/")) + input = { + "url": "https://a.example", + "httpResponseBody": True, + "echoData": "402-long-error", + } + + with reset_x402_cache() as cache: + assert len(cache) == 0 + assert client.agg_stats.n_402_req == 0 + with pytest.raises(RequestError): + await client.get(input) + assert len(cache) == 0 + + assert client.agg_stats.n_success == 0 + assert client.agg_stats.n_fatal_errors == 1 + assert client.agg_stats.n_attempts == 2 + assert client.agg_stats.n_errors == 2 + assert client.agg_stats.n_402_req == 0 + assert client.agg_stats.status_codes == {402: 2} + assert client.agg_stats.exception_types == {} + assert client.agg_stats.api_error_types == {None: 2} diff --git a/zyte_api/_async.py b/zyte_api/_async.py index c1f7fc4..9c3b2d6 100644 --- a/zyte_api/_async.py +++ b/zyte_api/_async.py @@ -6,6 +6,7 @@ from functools import partial from os import environ from typing import TYPE_CHECKING, Any +from warnings import warn import aiohttp from tenacity import AsyncRetrying @@ -81,6 +82,25 @@ def iter( ) +class AuthInfo: + def __init__(self, *, _auth): + self._auth = _auth + + @property + def key(self) -> str: + if isinstance(self._auth, str): + return self._auth + assert isinstance(self._auth, _x402Handler) + return self._auth.client.account.key.hex() + + @property + def key_type(self): + if isinstance(self._auth, str): + return "zyte" + assert isinstance(self._auth, _x402Handler) + return "eth" + + class AsyncZyteAPI: """:ref:`Asynchronous Zyte API client `. @@ -109,24 +129,39 @@ def __init__( self.retrying = retrying or zyte_api_retrying self.user_agent = user_agent or USER_AGENT self._semaphore = asyncio.Semaphore(n_conn) - self.auth: str | _x402Handler + self._auth: str | _x402Handler + self.auth: AuthInfo self._load_auth(api_key, eth_key) def _load_auth(self, api_key: str | None, eth_key: str | None): if api_key: - self.auth = api_key + self._auth = api_key elif eth_key: - self.auth = _x402Handler(eth_key, self._semaphore, self.agg_stats) + self._auth = _x402Handler(eth_key, self._semaphore, self.agg_stats) elif api_key := environ.get(API_KEY_ENV_VAR): - self.auth = api_key + self._auth = api_key elif eth_key := environ.get("ZYTE_API_ETH_KEY"): - self.auth = _x402Handler(eth_key, self._semaphore, self.agg_stats) + self._auth = _x402Handler(eth_key, self._semaphore, self.agg_stats) else: raise NoApiKey( "You must provide either a Zyte API key or an Ethereum " "private key. For the latter, you must also install " "zyte-api as zyte-api[x402]." ) + self.auth = AuthInfo(_auth=self._auth) + + @property + def api_key(self) -> str: + if isinstance(self._auth, str): + warn( + "The api_key property is deprecated, use auth.key instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._auth + raise NotImplementedError( + "api_key is not available when using an Ethereum private key, use auth.key instead." + ) async def get( self, @@ -146,10 +181,10 @@ async def get( headers = {"User-Agent": self.user_agent, "Accept-Encoding": "br"} auth_kwargs = {} - if isinstance(self.auth, str): - auth_kwargs["auth"] = aiohttp.BasicAuth(self.auth) + if isinstance(self._auth, str): + auth_kwargs["auth"] = aiohttp.BasicAuth(self._auth) else: - x402_headers = await self.auth.get_headers(url, query, headers, post) + x402_headers = await self._auth.get_headers(url, query, headers, post) headers.update(x402_headers) post_kwargs = { @@ -171,10 +206,10 @@ async def request(): stats.record_connected(resp.status, self.agg_stats) if ( resp.status == 402 - and isinstance(self.auth, _x402Handler) + and isinstance(self._auth, _x402Handler) and "X-Payment" in post_kwargs["headers"] ): - self.auth.refresh_post_kwargs(post_kwargs, await resp.json()) + self._auth.refresh_post_kwargs(post_kwargs, await resp.json()) if resp.status >= 400: content = await resp.read() resp.release() From 9d5115be02c8aeeb542cd90f405a6ba8441b0ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 28 Jul 2025 22:12:12 +0200 Subject: [PATCH 20/25] Expose AuthInfo --- zyte_api/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zyte_api/__init__.py b/zyte_api/__init__.py index eda97e6..fab06e3 100644 --- a/zyte_api/__init__.py +++ b/zyte_api/__init__.py @@ -2,7 +2,7 @@ Python client libraries and command line utilities for Zyte API """ -from ._async import AsyncZyteAPI +from ._async import AsyncZyteAPI, AuthInfo from ._errors import RequestError from ._retry import ( AggressiveRetryFactory, From 5b0072505627cededd5bded6d4dd64fe4ed13565 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 28 Jul 2025 22:16:21 +0200 Subject: [PATCH 21/25] =?UTF-8?q?AuthInfo.key=5Ftype=20=E2=86=92=20type?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_auth.py | 2 +- zyte_api/_async.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_auth.py b/tests/test_auth.py index 5807c44..f8dc41c 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -112,7 +112,7 @@ def test_precedence(scenario, expected, monkeypatch): AsyncZyteAPI(**scenario.get("kwargs", {})) return client = AsyncZyteAPI(**scenario.get("kwargs", {})) - assert client.auth.key_type == expected["key_type"] + assert client.auth.type == expected["key_type"] assert client.auth.key == expected["key"] if expected["key_type"] == "zyte": with pytest.warns(DeprecationWarning, match="api_key property is deprecated"): diff --git a/zyte_api/_async.py b/zyte_api/_async.py index 9c3b2d6..826c378 100644 --- a/zyte_api/_async.py +++ b/zyte_api/_async.py @@ -94,7 +94,7 @@ def key(self) -> str: return self._auth.client.account.key.hex() @property - def key_type(self): + def type(self) -> str: if isinstance(self._auth, str): return "zyte" assert isinstance(self._auth, _x402Handler) From e3931c779a1f91bb61256d06f9a9811ac601bf25 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 29 Jul 2025 11:40:39 +0500 Subject: [PATCH 22/25] Update tests/mockserver.py --- tests/mockserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/mockserver.py b/tests/mockserver.py index 8f3ef01..ea6a8d7 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -169,7 +169,7 @@ def render_POST(self, request): elif echo_data == "402-payment-retry-exceeded": assert request.getHeader("X-Payment") # Return 402 on the first 2 requests, then 200 on the third - # (the client will give up after 2 attempts, so the will be no + # (the client will give up after 2 attempts, so there will be no # third in practice) if not session_data: session_data["payment_attempts"] = 1 From 3557ff11a8abb771a9d7d1fcd440d0623644015e Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 29 Jul 2025 17:13:46 +0500 Subject: [PATCH 23/25] Update tests/mockserver.py --- tests/mockserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/mockserver.py b/tests/mockserver.py index ea6a8d7..cf3fb11 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -192,7 +192,7 @@ def render_POST(self, request): elif echo_data == "402-no-payment-retry-exceeded": assert not request.getHeader("X-Payment") # Return 402 on the first 2 requests, then 200 on the third - # (the client will give up after 2 attempts, so the will be no + # (the client will give up after 2 attempts, so there will be no # third in practice) if not session_data: session_data["payment_attempts"] = 1 From 852fb1c7e360bb40810c04b5970f9e08f918f27e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Thu, 7 Aug 2025 22:25:10 +0200 Subject: [PATCH 24/25] Add release notes --- CHANGES.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 65df607..61611c7 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,15 @@ Changes ======= +0.8.0 (unreleased) +------------------ + +* Added :ref:`x402 support `. + + .. note:: This is an upcoming feature of Zyte API. If you want to use it, + keep an eye on the `Zyte blog `_ for a release + announcement. + 0.7.1 (2025-06-05) ------------------ From 46a3bddd2fe447f22dcf320bfb4f79a1ad84c761 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Thu, 7 Aug 2025 23:32:45 +0200 Subject: [PATCH 25/25] Prepare release notes, set the right endpoint for x402 --- CHANGES.rst | 4 ---- docs/conf.py | 5 +++++ tests/test_auth.py | 5 +++++ tests/test_main.py | 12 +++++++++++- tests/test_x402.py | 5 ++++- tox.ini | 2 +- zyte_api/__main__.py | 16 +++++++++++++--- zyte_api/_async.py | 15 +++++++++++---- zyte_api/_sync.py | 8 +++++--- 9 files changed, 55 insertions(+), 17 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 61611c7..57f3cd2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,10 +6,6 @@ Changes * Added :ref:`x402 support `. - .. note:: This is an upcoming feature of Zyte API. If you want to use it, - keep an eye on the `Zyte blog `_ for a release - announcement. - 0.7.1 (2025-06-05) ------------------ diff --git a/docs/conf.py b/docs/conf.py index 0c02232..d0223a4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -183,6 +183,11 @@ # A list of files that should not be packed into the epub file. epub_exclude_files = ["search.html"] +# -- Smart quotes ------------------------------------------------------------ +# Disabled so that, in the CLI reference, ``--api-key` and similar options are +# not turned into –api-key in descriptions. +smartquotes = False + # -- Extension configuration ------------------------------------------------- diff --git a/tests/test_auth.py b/tests/test_auth.py index f8dc41c..86838ff 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -114,6 +114,11 @@ def test_precedence(scenario, expected, monkeypatch): client = AsyncZyteAPI(**scenario.get("kwargs", {})) assert client.auth.type == expected["key_type"] assert client.auth.key == expected["key"] + assert ( + client.api_url == "https://api-x402.zyte.com/v1/" + if expected["key_type"] == "eth" + else "https://api.zyte.com/v1/" + ) if expected["key_type"] == "zyte": with pytest.warns(DeprecationWarning, match="api_key property is deprecated"): assert client.api_key == expected["key"] diff --git a/tests/test_main.py b/tests/test_main.py index 12747b9..a30cbdc 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -11,7 +11,17 @@ from zyte_api.aio.errors import RequestError -class MockRequestError(Exception): +class MockRequestError(RequestError): + def __init__(self, *args, **kwargs): + super().__init__( + *args, + query={}, + response_content=b"", + request_info=None, + history=None, + **kwargs, + ) + @property def parsed(self): return Mock( diff --git a/tests/test_x402.py b/tests/test_x402.py index 83be18c..bc89a72 100644 --- a/tests/test_x402.py +++ b/tests/test_x402.py @@ -18,7 +18,10 @@ def test_eth_key_param(): if HAS_X402: - AsyncZyteAPI(eth_key=KEY) + client = AsyncZyteAPI(eth_key=KEY) + assert client.auth.key == KEY + assert client.auth.type == "eth" + assert client.api_url == "https://api-x402.zyte.com/v1/" else: with pytest.raises(ImportError, match="No module named 'eth_account'"): AsyncZyteAPI(eth_key=KEY) diff --git a/tox.ini b/tox.ini index 7011cd0..dce2440 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = min,min-x402,py39,py310,py311,py312,py313,x402,mypy,docs,twine +envlist = pre-commit,mypy,min,min-x402,py39,py310,py311,py312,py313,x402,docs,twine [testenv] deps = diff --git a/zyte_api/__main__.py b/zyte_api/__main__.py index db55f46..060115a 100644 --- a/zyte_api/__main__.py +++ b/zyte_api/__main__.py @@ -1,5 +1,7 @@ """Basic command-line interface for Zyte API.""" +from __future__ import annotations + import argparse import asyncio import json @@ -12,6 +14,7 @@ from tenacity import retry_if_exception from zyte_api._async import AsyncZyteAPI +from zyte_api._errors import RequestError from zyte_api._retry import RetryFactory, _is_throttling_error from zyte_api._utils import create_session from zyte_api.constants import API_URL @@ -33,7 +36,7 @@ async def run( *, n_conn, stop_on_errors=_UNSET, - api_url, + api_url: str | None, api_key=None, retry_errors=True, store_errors=None, @@ -77,7 +80,7 @@ def write_output(content): try: result = await fut except Exception as e: - if store_errors: + if store_errors and isinstance(e, RequestError): write_output(e.parsed.response_body.decode()) if stop_on_errors: @@ -184,7 +187,14 @@ def _get_argument_parser(program_name="zyte-api"): ), ) p.add_argument( - "--api-url", help="Zyte API endpoint (default: %(default)s).", default=API_URL + "--api-url", + help=( + f"Zyte API endpoint (default: {API_URL}).\n" + f"\n" + f"Using an Ethereum private key, e.g. through --eth-key or " + f"through the ZYTE_API_ETH_KEY environment variable, changes the " + f"default API URL to https://api-x402.zyte.com/v1/.\n" + ), ) p.add_argument( "--loglevel", diff --git a/zyte_api/_async.py b/zyte_api/_async.py index 826c378..b19df17 100644 --- a/zyte_api/_async.py +++ b/zyte_api/_async.py @@ -111,7 +111,7 @@ def __init__( self, *, api_key: str | None = None, - api_url: str = API_URL, + api_url: str | None = None, n_conn: int = 15, retrying: AsyncRetrying | None = None, user_agent: str | None = None, @@ -123,7 +123,6 @@ def __init__( "AsyncRetrying." ) - self.api_url = api_url self.n_conn = n_conn self.agg_stats = AggStats() self.retrying = retrying or zyte_api_retrying @@ -131,9 +130,10 @@ def __init__( self._semaphore = asyncio.Semaphore(n_conn) self._auth: str | _x402Handler self.auth: AuthInfo - self._load_auth(api_key, eth_key) + self.api_url: str + self._load_auth(api_key, eth_key, api_url) - def _load_auth(self, api_key: str | None, eth_key: str | None): + def _load_auth(self, api_key: str | None, eth_key: str | None, api_url: str | None): if api_key: self._auth = api_key elif eth_key: @@ -149,6 +149,13 @@ def _load_auth(self, api_key: str | None, eth_key: str | None): "zyte-api as zyte-api[x402]." ) self.auth = AuthInfo(_auth=self._auth) + self.api_url = ( + api_url + if api_url is not None + else "https://api-x402.zyte.com/v1/" + if self.auth.type == "eth" + else API_URL + ) @property def api_key(self) -> str: diff --git a/zyte_api/_sync.py b/zyte_api/_sync.py index 66f24b8..5a318e4 100644 --- a/zyte_api/_sync.py +++ b/zyte_api/_sync.py @@ -4,7 +4,6 @@ from typing import TYPE_CHECKING from ._async import AsyncZyteAPI -from .constants import API_URL if TYPE_CHECKING: from collections.abc import Generator @@ -87,7 +86,10 @@ class ZyteAPI: Ethereum for payments. If not specified, it is read from the ``ZYTE_API_ETH_KEY`` environment variable. See :ref:`x402`. - *api_url* is the Zyte API base URL. + *api_url* is the Zyte API base URL. If set to ``None``, it defaults to + ``"https://api.zyte.com/v1/"``. If using an Ethereum private key, e.g. + through *eth_key* or through the ``ZYTE_API_ETH_KEY`` environment + variable, ``None`` results in ``"https://api-x402.zyte.com/v1/"`` instead. *n_conn* is the maximum number of concurrent requests to use. See :ref:`api-optimize`. @@ -106,7 +108,7 @@ def __init__( self, *, api_key: str | None = None, - api_url: str = API_URL, + api_url: str | None = None, n_conn: int = 15, retrying: AsyncRetrying | None = None, user_agent: str | None = None,