diff --git a/.changes/next-release/enhancement-Retries-30198.json b/.changes/next-release/enhancement-Retries-30198.json new file mode 100644 index 000000000000..5d68935c8e80 --- /dev/null +++ b/.changes/next-release/enhancement-Retries-30198.json @@ -0,0 +1,5 @@ +{ + "type": "enhancement", + "category": "Retries", + "description": "Introduced ``AWS_NEW_RETRIES_2026``, an opt-in environment variable (defaults to ``false``) that activates updated standard retry mode behavior. When set to ``true``, the standard retry mode uses lower initial backoff delays (50ms base instead of 1s for non-throttling errors), applies service-specific max attempt overrides, and honors the ``x-amz-retry-after`` response header for server-guided retry timing." +} diff --git a/awscli/botocore/configprovider.py b/awscli/botocore/configprovider.py index 9e40dd58695a..d7ae89b80828 100644 --- a/awscli/botocore/configprovider.py +++ b/awscli/botocore/configprovider.py @@ -21,9 +21,26 @@ from botocore import utils from botocore.exceptions import InvalidConfigError -logger = logging.getLogger(__name__) +try: + # This is not a public interface and is subject to abrupt breaking changes. + # Currently it's only available to internal users for testing and validation. + # Any usage is not advised or supported in external code bases. + from botocore.customizations.retries import DEFAULT_NEW_RETRIES +except ImportError: + DEFAULT_NEW_RETRIES = False + + +def _resolve_new_retries(): + _env_new_retries = os.environ.get('AWS_NEW_RETRIES_2026') + if _env_new_retries is not None: + return _env_new_retries.lower() == 'true' + return DEFAULT_NEW_RETRIES +NEW_RETRIES_ENABLED = _resolve_new_retries() + +logger = logging.getLogger(__name__) + #: A default dictionary that maps the logical names for session variables #: to the specific environment variables and configuration file names #: that contain the values for these variables. diff --git a/awscli/botocore/endpoint.py b/awscli/botocore/endpoint.py index 6094cc0d4385..1a28fc6152ec 100644 --- a/awscli/botocore/endpoint.py +++ b/awscli/botocore/endpoint.py @@ -318,7 +318,7 @@ def _needs_retry( request_dict=request_dict, ) handler_response = first_non_none_response(responses) - if handler_response is None: + if handler_response is None or handler_response is False: return False else: # Request needs to be retried, and we need to sleep diff --git a/awscli/botocore/retries/standard.py b/awscli/botocore/retries/standard.py index 0f82a6e3a6de..e89cfc25815a 100644 --- a/awscli/botocore/retries/standard.py +++ b/awscli/botocore/retries/standard.py @@ -26,7 +26,12 @@ import logging import random +import time +# This is not a public interface and is subject to abrupt breaking changes. +# Currently, it's only available to internal users for testing and validation. +# Any usage is not advised or supported in external code bases. +from botocore.configprovider import NEW_RETRIES_ENABLED from botocore.exceptions import ( ConnectionError, ConnectTimeoutError, @@ -37,27 +42,61 @@ from botocore.retries.base import BaseRetryableChecker, BaseRetryBackoff DEFAULT_MAX_ATTEMPTS = 3 +_SERVICE_MAX_ATTEMPTS = { + 'dynamodb': 4, + 'dynamodb-streams': 4, +} logger = logging.getLogger(__name__) -def register_retry_handler(client, max_attempts=DEFAULT_MAX_ATTEMPTS): - retry_quota = RetryQuotaChecker(quota.RetryQuota()) - +def register_retry_handler(client, max_attempts=None): service_id = client.meta.service_model.service_id service_event_name = service_id.hyphenize() + retry_event_adapter = RetryEventAdapter() + + if NEW_RETRIES_ENABLED: + if ( + max_attempts is None + and service_event_name in _SERVICE_MAX_ATTEMPTS + ): + max_attempts = _SERVICE_MAX_ATTEMPTS[service_event_name] + elif max_attempts is None: + max_attempts = DEFAULT_MAX_ATTEMPTS + throttling_detector = ThrottlingErrorDetector(retry_event_adapter) + retry_quota = RetryQuotaChecker( + quota.RetryQuota(), throttling_detector + ) + handler = RetryHandler( + retry_policy=RetryPolicy( + retry_checker=StandardRetryConditions( + max_attempts=max_attempts + ), + retry_backoff=ExponentialBackoff( + service_name=service_event_name, + throttling_detector=throttling_detector, + ), + ), + retry_event_adapter=retry_event_adapter, + retry_quota=retry_quota, + service_name=service_event_name, + ) + else: + retry_quota = RetryQuotaChecker(quota.RetryQuota()) + handler = RetryHandler( + retry_policy=RetryPolicy( + retry_checker=StandardRetryConditions( + max_attempts=max_attempts or DEFAULT_MAX_ATTEMPTS + ), + retry_backoff=ExponentialBackoff(), + ), + retry_event_adapter=retry_event_adapter, + retry_quota=retry_quota, + ) + client.meta.events.register( f'after-call.{service_event_name}', retry_quota.release_retry_quota ) - handler = RetryHandler( - retry_policy=RetryPolicy( - retry_checker=StandardRetryConditions(max_attempts=max_attempts), - retry_backoff=ExponentialBackoff(), - ), - retry_event_adapter=RetryEventAdapter(), - retry_quota=retry_quota, - ) - unique_id = f'retry-config-{service_event_name}' client.meta.events.register( f'needs-retry.{service_event_name}', @@ -74,10 +113,28 @@ class RetryHandler: as an event handler. """ - def __init__(self, retry_policy, retry_event_adapter, retry_quota): + # Temporary hard-coded list of long-polling operations. This will be + # replaced by the aws.api#longPoll modeled trait once it is available + # in service models. + _LONG_POLLING_OPERATIONS = { + 'sqs': {'ReceiveMessage'}, + 'sfn': {'GetActivityTask'}, + 'swf': {'PollForActivityTask', 'PollForDecisionTask'}, + } + + def __init__( + self, + retry_policy, + retry_event_adapter, + retry_quota, + service_name=None, + sleep=time.sleep, + ): self._retry_policy = retry_policy self._retry_event_adapter = retry_event_adapter self._retry_quota = retry_quota + self._service_name = service_name + self._sleep = sleep def needs_retry(self, **kwargs): """Connect as a handler to the needs-retry event.""" @@ -93,6 +150,23 @@ def needs_retry(self, **kwargs): retry_delay, ) else: + if NEW_RETRIES_ENABLED: + if self._is_long_polling_operation(context): + polling_delay = self._retry_policy.compute_retry_delay( + context + ) + self._sleep(polling_delay) + logger.debug( + "Retry needed but retry quota reached, " + "not retrying request." + ) + self._retry_event_adapter.adapt_retry_response_from_context( + context + ) + # Return False (non-None) to prevent any later needs-retry + # handler from returning a delay that would cause + # _needs_retry in endpoint.py to sleep again. + return False logger.debug( "Retry needed but retry quota reached, " "not retrying request." @@ -102,6 +176,17 @@ def needs_retry(self, **kwargs): self._retry_event_adapter.adapt_retry_response_from_context(context) return retry_delay + def _is_long_polling_operation(self, context): + # TODO: Replace this hard-coded list with a model check once + # aws.api#longPoll trait is available in service models. + if self._service_name is None or context.operation_model is None: + return False + operations = self._LONG_POLLING_OPERATIONS.get(self._service_name) + return ( + operations is not None + and context.operation_model.name in operations + ) + class RetryEventAdapter: """Adapter to existing retry interface used in the endpoints layer. @@ -252,11 +337,31 @@ def compute_retry_delay(self, context): class ExponentialBackoff(BaseRetryBackoff): _BASE = 2 _MAX_BACKOFF = 20 + _RETRY_AFTER_HEADER = 'x-amz-retry-after' + _RETRY_AFTER_MAX_ADDITIONAL = 5 # seconds + + _DEFAULT_BACKOFF_CONFIG = { + 'throttling_base_scale': 1, + 'non_throttling_base_scale': 0.05, + } - def __init__(self, max_backoff=20, random=random.random): + _SERVICE_BACKOFF_CONFIG = { + 'dynamodb': {'non_throttling_base_scale': 0.025}, + 'dynamodb-streams': {'non_throttling_base_scale': 0.025}, + } + + def __init__( + self, + max_backoff=20, + random=random.random, + service_name=None, + throttling_detector=None, + ): self._base = self._BASE self._max_backoff = max_backoff self._random = random + self._service_name = service_name + self._throttling_detector = throttling_detector def delay_amount(self, context): """Calculates delay based on exponential backoff. @@ -272,10 +377,64 @@ def delay_amount(self, context): # The context.attempt_number is a 1-based value, but we have # to calculate the delay based on i based a 0-based value. We # want the first delay to just be ``rand(0, 1)``. - return min( - self._random() * (self._base ** (context.attempt_number - 1)), - self._max_backoff, + if NEW_RETRIES_ENABLED: + t_i = self._random() * min( + self._get_base_scale(context) + * (self._base ** (context.attempt_number - 1)), + self._max_backoff, + ) + + # Check for x-amz-retry-after header + retry_after = self._get_retry_after_delay(context) + if retry_after is not None: + # min is 't_i', max is 't_i + 5' + return max( + t_i, + min(retry_after, self._RETRY_AFTER_MAX_ADDITIONAL + t_i), + ) + + return t_i + else: + return self._random() * min( + (self._base ** (context.attempt_number - 1)), + self._max_backoff, + ) + + def _get_base_scale(self, context): + if ( + self._throttling_detector + and self._throttling_detector.is_throttling_error_from_context( + context + ) + ): + return self._DEFAULT_BACKOFF_CONFIG['throttling_base_scale'] + if self._service_name in self._SERVICE_BACKOFF_CONFIG: + return self._SERVICE_BACKOFF_CONFIG[self._service_name][ + 'non_throttling_base_scale' + ] + return self._DEFAULT_BACKOFF_CONFIG['non_throttling_base_scale'] + + def _get_retry_after_delay(self, context): + if context.http_response is None: + return None + retry_after_ms = context.http_response.headers.get( + self._RETRY_AFTER_HEADER ) + if retry_after_ms is None: + return None + try: + value = int(retry_after_ms) / 1000.0 + if value < 0: + raise ValueError("Negative retry-after value") + return value + except (ValueError, OverflowError) as e: + logger.debug( + "Invalid %s header value: %s, ignoring. Error: %s", + self._RETRY_AFTER_HEADER, + retry_after_ms, + e, + ) + return None class MaxAttemptsChecker(BaseRetryableChecker): @@ -424,6 +583,9 @@ def __init__(self, retry_event_adapter): # This expects the kwargs from needs-retry to be passed through. def is_throttling_error(self, **kwargs): context = self._retry_event_adapter.create_retry_context(**kwargs) + return self.is_throttling_error_from_context(context) + + def is_throttling_error_from_context(self, context): if self._fixed_error_code_detector.is_retryable(context): return True error_type = self._modeled_error_detector.detect_error_type(context) @@ -473,7 +635,9 @@ def is_retryable(self, context): class RetryQuotaChecker: _RETRY_COST = 5 + _RETRY_COST_V2 = 14 _NO_RETRY_INCREMENT = 1 + _THROTTLING_RETRY_COST = 5 _TIMEOUT_RETRY_REQUEST = 10 _TIMEOUT_EXCEPTIONS = (ConnectTimeoutError, ReadTimeoutError) @@ -483,16 +647,23 @@ class RetryQuotaChecker: # a BaseRetryableChecker implies you can call .is_retryable(context) # as many times as you want and not affect anything. - def __init__(self, quota): + def __init__(self, quota, throttling_detector=None): self._quota = quota + self._throttling_detector = throttling_detector # This tracks the last amount self._last_amount_acquired = None def acquire_retry_quota(self, context): - if self._is_timeout_error(context): - capacity_amount = self._TIMEOUT_RETRY_REQUEST + if NEW_RETRIES_ENABLED: + if self._is_throttling_error(context): + capacity_amount = self._THROTTLING_RETRY_COST + else: + capacity_amount = self._RETRY_COST_V2 else: - capacity_amount = self._RETRY_COST + if self._is_timeout_error(context): + capacity_amount = self._TIMEOUT_RETRY_REQUEST + else: + capacity_amount = self._RETRY_COST success = self._quota.acquire(capacity_amount) if success: # We add the capacity amount to the request context so we know @@ -503,6 +674,13 @@ def acquire_retry_quota(self, context): context.add_retry_metadata(RetryQuotaReached=True) return False + def _is_throttling_error(self, context): + if self._throttling_detector is None: + return False + return self._throttling_detector.is_throttling_error_from_context( + context + ) + def _is_timeout_error(self, context): return isinstance(context.caught_exception, self._TIMEOUT_EXCEPTIONS) diff --git a/tests/unit/botocore/retries/test_standard_retry_v2_1.py b/tests/unit/botocore/retries/test_standard_retry_v2_1.py new file mode 100644 index 000000000000..d1d506dd7eb3 --- /dev/null +++ b/tests/unit/botocore/retries/test_standard_retry_v2_1.py @@ -0,0 +1,439 @@ +"""Temporary test suite for new updated retry behavior. + +These tests validate the updated retry behavior for standard retries, +which is currently gated behind an internal flag and not yet available +to external users. Once the changes are validated internally and released +publicly, these tests will replace the corresponding tests in +test_standard.py and this file will be removed. +""" + +from collections import Counter + +import pytest +from botocore import configprovider +from botocore.awsrequest import AWSResponse +from botocore.exceptions import ReadTimeoutError +from botocore.retries import quota, standard + +from tests import BaseEnvVar, mock, unittest + + +@mock.patch('botocore.retries.standard.NEW_RETRIES_ENABLED', True) +class TestExponentialBackoff(unittest.TestCase): + def setUp(self): + self.random = lambda: 1 + self.backoff = standard.ExponentialBackoff( + max_backoff=20, random=self.random + ) + + def test_range_of_exponential_backoff(self): + backoffs = [ + self.backoff.delay_amount(standard.RetryContext(attempt_number=i)) + for i in range(1, 12) + ] + # Note that we're capped at 20 which is our max backoff. + # Cap kicks in at attempt 10 + self.assertEqual( + backoffs, [0.05, 0.1, 0.2, 0.4, 0.8, 1.6, 3.2, 6.4, 12.8, 20, 20] + ) + + def test_exponential_backoff_with_jitter(self): + backoff = standard.ExponentialBackoff() + backoffs = [ + backoff.delay_amount(standard.RetryContext(attempt_number=3)) + for i in range(10) + ] + # For attempt number 3, we should have a max value of 0.2 (0.05 * 2 ^ 2), + # so we can assert all the backoff values are within that range. + # 0.05 is the default non-throttling scale + for x in backoffs: + self.assertTrue(0 <= x <= 0.2) + + def test_uniform_rand_dist_on_max_attempts(self): + backoff = standard.ExponentialBackoff() + num_datapoints = 10_000 + backoffs = [ + backoff.delay_amount(standard.RetryContext(attempt_number=10)) + for i in range(num_datapoints) + ] + self._assert_looks_like_uniform_distribution(backoffs) + + def _assert_looks_like_uniform_distribution(self, backoffs): + histogram = Counter(int(el) for el in backoffs) + expected_value = len(backoffs) / len(histogram) + # This is an arbitrarily chosen tolerance, but we're being fairly + # lenient here and giving a 20% tolerance. We're only interested + # in cases where it's obviously broken and not a uniform distribution. + tolerance = 0.20 + low = expected_value - (expected_value * tolerance) + high = expected_value + (expected_value * tolerance) + out_of_range = [ + str(i) for i in histogram.values() if not low <= i <= high + ] + if out_of_range: + raise AssertionError( + "Backoff values outside of uniform distribution range " + f"({low} - {high}): {', '.join(out_of_range)}" + ) + + +@mock.patch('botocore.retries.standard.NEW_RETRIES_ENABLED', True) +class TestRetryQuotaChecker(unittest.TestCase): + def setUp(self): + self.quota = quota.RetryQuota(500) + self.throttling_detector = standard.ThrottlingErrorDetector( + standard.RetryEventAdapter() + ) + self.quota_checker = standard.RetryQuotaChecker( + self.quota, self.throttling_detector + ) + self.request_context = {} + + def create_context( + self, + is_timeout_error=False, + status_code=200, + is_throttling_error=False, + ): + caught_exception = None + parsed_response = {} + if is_timeout_error: + caught_exception = ReadTimeoutError(endpoint_url='https://foo') + if is_throttling_error: + status_code = 400 + parsed_response = {'Error': {'Code': 'Throttling'}} + http_response = AWSResponse( + status_code=status_code, raw=None, headers={}, url='https://foo/' + ) + context = standard.RetryContext( + attempt_number=1, + request_context=self.request_context, + caught_exception=caught_exception, + http_response=http_response, + parsed_response=parsed_response, + ) + return context + + def test_can_acquire_quota_for_throttling_error(self): + self.assertTrue( + self.quota_checker.acquire_retry_quota( + self.create_context(is_throttling_error=True) + ) + ) + self.assertEqual(self.request_context['retry_quota_capacity'], 5) + + def test_can_acquire_quota_non_timeout_error(self): + self.assertTrue( + self.quota_checker.acquire_retry_quota(self.create_context()) + ) + self.assertEqual(self.request_context['retry_quota_capacity'], 14) + + def test_can_acquire_quota_for_timeout_error(self): + self.assertTrue( + self.quota_checker.acquire_retry_quota( + self.create_context(is_timeout_error=True) + ) + ) + self.assertEqual(self.request_context['retry_quota_capacity'], 14) + + def test_can_release_quota_based_on_context_value_on_success(self): + context = self.create_context() + # This is where we had to retry the request but eventually + # succeeded. + http_response = self.create_context(status_code=200).http_response + self.assertTrue(self.quota_checker.acquire_retry_quota(context)) + self.assertEqual(self.quota.available_capacity, 486) + self.quota_checker.release_retry_quota( + context.request_context, http_response=http_response + ) + self.assertEqual(self.quota.available_capacity, 500) + + def test_can_release_quota_when_succeed_after_throttling_error(self): + context = self.create_context(is_throttling_error=True) + http_response = self.create_context(status_code=200).http_response + self.assertTrue(self.quota_checker.acquire_retry_quota(context)) + self.assertEqual(self.quota.available_capacity, 495) + self.quota_checker.release_retry_quota( + context.request_context, http_response=http_response + ) + self.assertEqual(self.quota.available_capacity, 500) + + def test_dont_release_quota_if_all_retries_failed(self): + context = self.create_context() + # If max_attempts_reached is True, then it means we used up all + # our retry attempts and still failed. In this case we shouldn't + # give any retry quota back. + http_response = self.create_context(status_code=500).http_response + self.assertTrue(self.quota_checker.acquire_retry_quota(context)) + self.assertEqual(self.quota.available_capacity, 486) + self.quota_checker.release_retry_quota( + context.request_context, http_response=http_response + ) + self.assertEqual(self.quota.available_capacity, 486) + + def test_can_release_default_quota_if_not_in_context(self): + context = self.create_context() + self.assertTrue(self.quota_checker.acquire_retry_quota(context)) + self.assertEqual(self.quota.available_capacity, 486) + # We're going to remove the quota amount from the request context. + # This represents a successful request with no retries. + self.request_context.pop('retry_quota_capacity') + self.quota_checker.release_retry_quota( + context.request_context, context.http_response + ) + # We expect only 1 unit was released. + self.assertEqual(self.quota.available_capacity, 487) + + def test_acquire_quota_fails(self): + quota_checker = standard.RetryQuotaChecker( + quota.RetryQuota(initial_capacity=14) + ) + # The first one succeeds. + self.assertTrue( + quota_checker.acquire_retry_quota(self.create_context()) + ) + # But we should fail now because we're out of quota. + self.request_context.pop('retry_quota_capacity') + self.assertFalse( + quota_checker.acquire_retry_quota(self.create_context()) + ) + self.assertNotIn('retry_quota_capacity', self.request_context) + + def test_quota_reached_adds_retry_metadata(self): + quota_checker = standard.RetryQuotaChecker( + quota.RetryQuota(initial_capacity=0) + ) + context = self.create_context() + self.assertFalse(quota_checker.acquire_retry_quota(context)) + self.assertEqual( + context.get_retry_metadata(), {'RetryQuotaReached': True} + ) + + def test_single_failed_request_does_not_give_back_quota(self): + context = self.create_context() + http_response = self.create_context(status_code=400).http_response + # First deduct some amount of the retry quota so we're not hitting + # the upper bound. + self.quota.acquire(50) + self.assertEqual(self.quota.available_capacity, 450) + self.quota_checker.release_retry_quota( + context.request_context, http_response=http_response + ) + self.assertEqual(self.quota.available_capacity, 450) + + +@mock.patch('botocore.retries.standard.NEW_RETRIES_ENABLED', True) +class TestServiceSpecificRetries(unittest.TestCase): + def _make_retry_context(self, attempt, status_code, error_code=None): + http_response = AWSResponse( + status_code=status_code, raw=None, headers={}, url='https://foo/' + ) + parsed_response = {} + if error_code: + parsed_response = {'Error': {'Code': error_code}} + return standard.RetryContext( + attempt_number=attempt, + operation_model=mock.Mock(error_shapes=[]), + http_response=http_response, + parsed_response=parsed_response, + request_context={}, + ) + + def test_dynamodb_base_backoff_and_increased_retries(self): + retry_quota_bucket = quota.RetryQuota() + + throttling_detector = standard.ThrottlingErrorDetector( + standard.RetryEventAdapter() + ) + retry_quota = standard.RetryQuotaChecker( + retry_quota_bucket, throttling_detector + ) + backoff = standard.ExponentialBackoff( + random=lambda: 1, + service_name='dynamodb', + throttling_detector=throttling_detector, + ) + + retry_conditions = standard.StandardRetryConditions(max_attempts=4) + + # Attempts 1-3: retryable + for attempt, expected_quota, expected_delay in [ + (1, 486, 0.025), + (2, 472, 0.05), + (3, 458, 0.1), + ]: + context = self._make_retry_context( + attempt=attempt, status_code=500 + ) + self.assertTrue(retry_conditions.is_retryable(context)) + self.assertTrue(retry_quota.acquire_retry_quota(context)) + self.assertEqual( + retry_quota_bucket.available_capacity, expected_quota + ) + self.assertEqual(backoff.delay_amount(context), expected_delay) + + # Attempt 4: NOT retryable because max_attempts=4 + context4 = self._make_retry_context(attempt=4, status_code=500) + self.assertFalse(retry_conditions.is_retryable(context4)) + self.assertEqual(retry_quota_bucket.available_capacity, 458) + self.assertEqual( + context4.get_retry_metadata(), {'MaxAttemptsReached': True} + ) + + def test_sqs_triggers_long_polling_backoff_when_token_empty(self): + mock_sleep = mock.Mock() + retry_policy = mock.Mock(spec=standard.RetryPolicy) + retry_policy.should_retry.return_value = True + retry_policy.compute_retry_delay.return_value = 0.05 + + retry_quota = mock.Mock(spec=standard.RetryQuotaChecker) + retry_quota.acquire_retry_quota.return_value = False + + handler = standard.RetryHandler( + retry_policy=retry_policy, + retry_event_adapter=standard.RetryEventAdapter(), + retry_quota=retry_quota, + service_name='sqs', + sleep=mock_sleep, + ) + + context = self._make_retry_context(attempt=1, status_code=500) + context.operation_model = mock.Mock() + context.operation_model.name = 'ReceiveMessage' + + result = handler.needs_retry( + response=(context.http_response, {}), + attempts=1, + caught_exception=None, + request_dict={'context': {}}, + operation=context.operation_model, + ) + + assert result is False + retry_quota.acquire_retry_quota.assert_called_once() + mock_sleep.assert_called_once_with(0.05) + + def test_non_long_polling_operation_does_not_sleep_when_quota_exhausted( + self, + ): + mock_sleep = mock.Mock() + retry_policy = mock.Mock(spec=standard.RetryPolicy) + retry_policy.should_retry.return_value = True + retry_policy.compute_retry_delay.return_value = 0.05 + + retry_quota = mock.Mock(spec=standard.RetryQuotaChecker) + retry_quota.acquire_retry_quota.return_value = False + + handler = standard.RetryHandler( + retry_policy=retry_policy, + retry_event_adapter=standard.RetryEventAdapter(), + retry_quota=retry_quota, + service_name='sqs', + sleep=mock_sleep, + ) + + context = self._make_retry_context(attempt=1, status_code=500) + context.operation_model = mock.Mock() + context.operation_model.name = 'SendMessage' + + result = handler.needs_retry( + response=(context.http_response, {}), + attempts=1, + caught_exception=None, + request_dict={'context': {}}, + operation=context.operation_model, + ) + + self.assertIsNone(result) + mock_sleep.assert_not_called() + + +@mock.patch('botocore.retries.standard.NEW_RETRIES_ENABLED', True) +class TestRetryAfterHeaderInRetries: + def setup_method(self): + throttling_detector = standard.ThrottlingErrorDetector( + standard.RetryEventAdapter() + ) + self.retry_quota_bucket = quota.RetryQuota() + self.retry_quota = standard.RetryQuotaChecker( + self.retry_quota_bucket, throttling_detector + ) + self.backoff = standard.ExponentialBackoff( + random=lambda: 1, + throttling_detector=throttling_detector, + ) + + def _make_retry_context( + self, attempt, status_code, error_code=None, retry_after='0' + ): + http_response = AWSResponse( + status_code=status_code, + raw=None, + headers={'x-amz-retry-after': retry_after}, + url='https://foo/', + ) + parsed_response = {} + if error_code: + parsed_response = {'Error': {'Code': error_code}} + return standard.RetryContext( + attempt_number=attempt, + operation_model=mock.Mock(error_shapes=[]), + http_response=http_response, + parsed_response=parsed_response, + request_context={}, + ) + + @pytest.mark.parametrize( + 'retry_after_header,' + 'expected_capacity_after_failure,' + 'expected_delay_amount,' + 'expected_capacity_after_success', + [ + ('1500', 486, 1.5, 500), + ('0', 486, 0.05, 500), + ('10000', 486, 5.05, 500), + ('invalid', 486, 0.05, 500), + ('-100', 486, 0.05, 500), + ], + ) + def test_x_amz_retry_after_header_is_honored( + self, + retry_after_header, + expected_capacity_after_failure, + expected_delay_amount, + expected_capacity_after_success, + ): + context = self._make_retry_context( + attempt=1, status_code=500, retry_after=retry_after_header + ) + + assert self.retry_quota.acquire_retry_quota(context) + assert ( + self.retry_quota_bucket.available_capacity + == expected_capacity_after_failure + ) + assert self.backoff.delay_amount(context) == expected_delay_amount + + http_success = AWSResponse( + status_code=200, raw=None, headers={}, url='https://foo/' + ) + self.retry_quota.release_retry_quota( + context.request_context, http_response=http_success + ) + assert ( + self.retry_quota_bucket.available_capacity + == expected_capacity_after_success + ) + + +class TestNewRetriesEnvironmentVariable(BaseEnvVar): + def test_env_var_true_enables_new_retries(self): + self.environ['AWS_NEW_RETRIES_2026'] = 'true' + self.assertTrue(configprovider._resolve_new_retries()) + + def test_env_var_false_disables_new_retries(self): + self.environ['AWS_NEW_RETRIES_2026'] = 'false' + self.assertFalse(configprovider._resolve_new_retries()) + + def test_no_env_var_uses_default(self): + self.assertFalse(configprovider._resolve_new_retries())