diff --git a/.changes/next-release/enhancement-Retries-30198.json b/.changes/next-release/enhancement-Retries-30198.json
new file mode 100644
index 000000000000..5d68935c8e80
--- /dev/null
+++ b/.changes/next-release/enhancement-Retries-30198.json
@@ -0,0 +1,5 @@
+{
+  "type": "enhancement",
+  "category": "Retries",
+  "description": "Introduced ``AWS_NEW_RETRIES_2026``, an opt-in environment variable (defaults to ``false``) that activates updated standard retry mode behavior. When set to ``true``, the standard retry mode uses lower initial backoff delays (50ms base instead of 1s for non-throttling errors), applies service-specific max attempt overrides, and honors the ``x-amz-retry-after`` response header for server-guided retry timing."
+}
diff --git a/awscli/botocore/configprovider.py b/awscli/botocore/configprovider.py
index 9e40dd58695a..d7ae89b80828 100644
--- a/awscli/botocore/configprovider.py
+++ b/awscli/botocore/configprovider.py
@@ -21,9 +21,26 @@
 from botocore import utils
 from botocore.exceptions import InvalidConfigError
 
-logger = logging.getLogger(__name__)
+try:
+    # This is not a public interface and is subject to abrupt breaking changes.
+    # Currently it's only available to internal users for testing and validation.
+    # Any usage is not advised or supported in external code bases.
+    from botocore.customizations.retries import DEFAULT_NEW_RETRIES
+except ImportError:
+    DEFAULT_NEW_RETRIES = False
+
+
+def _resolve_new_retries():
+    _env_new_retries = os.environ.get('AWS_NEW_RETRIES_2026')
+    if _env_new_retries is not None:
+        return _env_new_retries.lower() == 'true'
+    return DEFAULT_NEW_RETRIES
 
 
+NEW_RETRIES_ENABLED = _resolve_new_retries()
+
+logger = logging.getLogger(__name__)
+
 #: A default dictionary that maps the logical names for session variables
 #: to the specific environment variables and configuration file names
 #: that contain the values for these variables.
diff --git a/awscli/botocore/endpoint.py b/awscli/botocore/endpoint.py
index 6094cc0d4385..1a28fc6152ec 100644
--- a/awscli/botocore/endpoint.py
+++ b/awscli/botocore/endpoint.py
@@ -318,7 +318,7 @@ def _needs_retry(
             request_dict=request_dict,
         )
         handler_response = first_non_none_response(responses)
-        if handler_response is None:
+        if handler_response is None or handler_response is False:
             return False
         else:
             # Request needs to be retried, and we need to sleep
diff --git a/awscli/botocore/retries/standard.py b/awscli/botocore/retries/standard.py
index 0f82a6e3a6de..e89cfc25815a 100644
--- a/awscli/botocore/retries/standard.py
+++ b/awscli/botocore/retries/standard.py
@@ -26,7 +26,12 @@
 
 import logging
 import random
+import time
 
+# This is not a public interface and is subject to abrupt breaking changes.
+# Currently, it's only available to internal users for testing and validation.
+# Any usage is not advised or supported in external code bases.
+from botocore.configprovider import NEW_RETRIES_ENABLED
 from botocore.exceptions import (
     ConnectionError,
     ConnectTimeoutError,
@@ -37,27 +42,61 @@
 from botocore.retries.base import BaseRetryableChecker, BaseRetryBackoff
 
 DEFAULT_MAX_ATTEMPTS = 3
+_SERVICE_MAX_ATTEMPTS = {
+    'dynamodb': 4,
+    'dynamodb-streams': 4,
+}
 logger = logging.getLogger(__name__)
 
 
-def register_retry_handler(client, max_attempts=DEFAULT_MAX_ATTEMPTS):
-    retry_quota = RetryQuotaChecker(quota.RetryQuota())
-
+def register_retry_handler(client, max_attempts=None):
     service_id = client.meta.service_model.service_id
     service_event_name = service_id.hyphenize()
+    retry_event_adapter = RetryEventAdapter()
+
+    if NEW_RETRIES_ENABLED:
+        if (
+            max_attempts is None
+            and service_event_name in _SERVICE_MAX_ATTEMPTS
+        ):
+            max_attempts = _SERVICE_MAX_ATTEMPTS[service_event_name]
+        elif max_attempts is None:
+            max_attempts = DEFAULT_MAX_ATTEMPTS
+        throttling_detector = ThrottlingErrorDetector(retry_event_adapter)
+        retry_quota = RetryQuotaChecker(
+            quota.RetryQuota(), throttling_detector
+        )
+        handler = RetryHandler(
+            retry_policy=RetryPolicy(
+                retry_checker=StandardRetryConditions(
+                    max_attempts=max_attempts
+                ),
+                retry_backoff=ExponentialBackoff(
+                    service_name=service_event_name,
+                    throttling_detector=throttling_detector,
+                ),
+            ),
+            retry_event_adapter=retry_event_adapter,
+            retry_quota=retry_quota,
+            service_name=service_event_name,
+        )
+    else:
+        retry_quota = RetryQuotaChecker(quota.RetryQuota())
+        handler = RetryHandler(
+            retry_policy=RetryPolicy(
+                retry_checker=StandardRetryConditions(
+                    max_attempts=max_attempts or DEFAULT_MAX_ATTEMPTS
+                ),
+                retry_backoff=ExponentialBackoff(),
+            ),
+            retry_event_adapter=retry_event_adapter,
+            retry_quota=retry_quota,
+        )
+
     client.meta.events.register(
         f'after-call.{service_event_name}', retry_quota.release_retry_quota
     )
 
-    handler = RetryHandler(
-        retry_policy=RetryPolicy(
-            retry_checker=StandardRetryConditions(max_attempts=max_attempts),
-            retry_backoff=ExponentialBackoff(),
-        ),
-        retry_event_adapter=RetryEventAdapter(),
-        retry_quota=retry_quota,
-    )
-
     unique_id = f'retry-config-{service_event_name}'
     client.meta.events.register(
         f'needs-retry.{service_event_name}',
@@ -74,10 +113,28 @@ class RetryHandler:
     as an event handler.
     """
 
-    def __init__(self, retry_policy, retry_event_adapter, retry_quota):
+    # Temporary hard-coded list of long-polling operations. This will be
+    # replaced by the aws.api#longPoll modeled trait once it is available
+    # in service models.
+    _LONG_POLLING_OPERATIONS = {
+        'sqs': {'ReceiveMessage'},
+        'sfn': {'GetActivityTask'},
+        'swf': {'PollForActivityTask', 'PollForDecisionTask'},
+    }
+
+    def __init__(
+        self,
+        retry_policy,
+        retry_event_adapter,
+        retry_quota,
+        service_name=None,
+        sleep=time.sleep,
+    ):
         self._retry_policy = retry_policy
         self._retry_event_adapter = retry_event_adapter
         self._retry_quota = retry_quota
+        self._service_name = service_name
+        self._sleep = sleep
 
     def needs_retry(self, **kwargs):
         """Connect as a handler to the needs-retry event."""
@@ -93,6 +150,23 @@ def needs_retry(self, **kwargs):
                     retry_delay,
                 )
             else:
+                if NEW_RETRIES_ENABLED:
+                    if self._is_long_polling_operation(context):
+                        polling_delay = self._retry_policy.compute_retry_delay(
+                            context
+                        )
+                        self._sleep(polling_delay)
+                        logger.debug(
+                            "Retry needed but retry quota reached, "
+                            "not retrying request."
+                        )
+                        self._retry_event_adapter.adapt_retry_response_from_context(
+                            context
+                        )
+                        # Return False (non-None) to prevent any later needs-retry
+                        # handler from returning a delay that would cause
+                        # _needs_retry in endpoint.py to sleep again.
+                        return False
                 logger.debug(
                     "Retry needed but retry quota reached, "
                     "not retrying request."
@@ -102,6 +176,17 @@ def needs_retry(self, **kwargs):
         self._retry_event_adapter.adapt_retry_response_from_context(context)
         return retry_delay
 
+    def _is_long_polling_operation(self, context):
+        # TODO: Replace this hard-coded list with a model check once
+        # aws.api#longPoll trait is available in service models.
+        if self._service_name is None or context.operation_model is None:
+            return False
+        operations = self._LONG_POLLING_OPERATIONS.get(self._service_name)
+        return (
+            operations is not None
+            and context.operation_model.name in operations
+        )
+
 
 class RetryEventAdapter:
     """Adapter to existing retry interface used in the endpoints layer.
@@ -252,11 +337,31 @@ def compute_retry_delay(self, context):
 class ExponentialBackoff(BaseRetryBackoff):
     _BASE = 2
     _MAX_BACKOFF = 20
+    _RETRY_AFTER_HEADER = 'x-amz-retry-after'
+    _RETRY_AFTER_MAX_ADDITIONAL = 5  # seconds
+
+    _DEFAULT_BACKOFF_CONFIG = {
+        'throttling_base_scale': 1,
+        'non_throttling_base_scale': 0.05,
+    }
 
-    def __init__(self, max_backoff=20, random=random.random):
+    _SERVICE_BACKOFF_CONFIG = {
+        'dynamodb': {'non_throttling_base_scale': 0.025},
+        'dynamodb-streams': {'non_throttling_base_scale': 0.025},
+    }
+
+    def __init__(
+        self,
+        max_backoff=20,
+        random=random.random,
+        service_name=None,
+        throttling_detector=None,
+    ):
         self._base = self._BASE
         self._max_backoff = max_backoff
         self._random = random
+        self._service_name = service_name
+        self._throttling_detector = throttling_detector
 
     def delay_amount(self, context):
         """Calculates delay based on exponential backoff.
@@ -272,10 +377,64 @@ def delay_amount(self, context):
         # The context.attempt_number is a 1-based value, but we have
         # to calculate the delay based on i based a 0-based value.  We
         # want the first delay to just be ``rand(0, 1)``.
-        return min(
-            self._random() * (self._base ** (context.attempt_number - 1)),
-            self._max_backoff,
+        if NEW_RETRIES_ENABLED:
+            t_i = self._random() * min(
+                self._get_base_scale(context)
+                * (self._base ** (context.attempt_number - 1)),
+                self._max_backoff,
+            )
+
+            # Check for x-amz-retry-after header
+            retry_after = self._get_retry_after_delay(context)
+            if retry_after is not None:
+                # min is 't_i', max is 't_i + 5'
+                return max(
+                    t_i,
+                    min(retry_after, self._RETRY_AFTER_MAX_ADDITIONAL + t_i),
+                )
+
+            return t_i
+        else:
+            return self._random() * min(
+                (self._base ** (context.attempt_number - 1)),
+                self._max_backoff,
+            )
+
+    def _get_base_scale(self, context):
+        if (
+            self._throttling_detector
+            and self._throttling_detector.is_throttling_error_from_context(
+                context
+            )
+        ):
+            return self._DEFAULT_BACKOFF_CONFIG['throttling_base_scale']
+        if self._service_name in self._SERVICE_BACKOFF_CONFIG:
+            return self._SERVICE_BACKOFF_CONFIG[self._service_name][
+                'non_throttling_base_scale'
+            ]
+        return self._DEFAULT_BACKOFF_CONFIG['non_throttling_base_scale']
+
+    def _get_retry_after_delay(self, context):
+        if context.http_response is None:
+            return None
+        retry_after_ms = context.http_response.headers.get(
+            self._RETRY_AFTER_HEADER
         )
+        if retry_after_ms is None:
+            return None
+        try:
+            value = int(retry_after_ms) / 1000.0
+            if value < 0:
+                raise ValueError("Negative retry-after value")
+            return value
+        except (ValueError, OverflowError) as e:
+            logger.debug(
+                "Invalid %s header value: %s, ignoring. Error: %s",
+                self._RETRY_AFTER_HEADER,
+                retry_after_ms,
+                e,
+            )
+            return None
 
 
 class MaxAttemptsChecker(BaseRetryableChecker):
@@ -424,6 +583,9 @@ def __init__(self, retry_event_adapter):
     # This expects the kwargs from needs-retry to be passed through.
     def is_throttling_error(self, **kwargs):
         context = self._retry_event_adapter.create_retry_context(**kwargs)
+        return self.is_throttling_error_from_context(context)
+
+    def is_throttling_error_from_context(self, context):
         if self._fixed_error_code_detector.is_retryable(context):
             return True
         error_type = self._modeled_error_detector.detect_error_type(context)
@@ -473,7 +635,9 @@ def is_retryable(self, context):
 
 class RetryQuotaChecker:
     _RETRY_COST = 5
+    _RETRY_COST_V2 = 14
     _NO_RETRY_INCREMENT = 1
+    _THROTTLING_RETRY_COST = 5
     _TIMEOUT_RETRY_REQUEST = 10
     _TIMEOUT_EXCEPTIONS = (ConnectTimeoutError, ReadTimeoutError)
 
@@ -483,16 +647,23 @@ class RetryQuotaChecker:
     # a BaseRetryableChecker implies you can call .is_retryable(context)
     # as many times as you want and not affect anything.
 
-    def __init__(self, quota):
+    def __init__(self, quota, throttling_detector=None):
         self._quota = quota
+        self._throttling_detector = throttling_detector
         # This tracks the last amount
         self._last_amount_acquired = None
 
     def acquire_retry_quota(self, context):
-        if self._is_timeout_error(context):
-            capacity_amount = self._TIMEOUT_RETRY_REQUEST
+        if NEW_RETRIES_ENABLED:
+            if self._is_throttling_error(context):
+                capacity_amount = self._THROTTLING_RETRY_COST
+            else:
+                capacity_amount = self._RETRY_COST_V2
         else:
-            capacity_amount = self._RETRY_COST
+            if self._is_timeout_error(context):
+                capacity_amount = self._TIMEOUT_RETRY_REQUEST
+            else:
+                capacity_amount = self._RETRY_COST
         success = self._quota.acquire(capacity_amount)
         if success:
             # We add the capacity amount to the request context so we know
@@ -503,6 +674,13 @@ def acquire_retry_quota(self, context):
         context.add_retry_metadata(RetryQuotaReached=True)
         return False
 
+    def _is_throttling_error(self, context):
+        if self._throttling_detector is None:
+            return False
+        return self._throttling_detector.is_throttling_error_from_context(
+            context
+        )
+
     def _is_timeout_error(self, context):
         return isinstance(context.caught_exception, self._TIMEOUT_EXCEPTIONS)
 
diff --git a/tests/unit/botocore/retries/test_standard_retry_v2_1.py b/tests/unit/botocore/retries/test_standard_retry_v2_1.py
new file mode 100644
index 000000000000..d1d506dd7eb3
--- /dev/null
+++ b/tests/unit/botocore/retries/test_standard_retry_v2_1.py
@@ -0,0 +1,439 @@
+"""Temporary test suite for new updated retry behavior.
+
+These tests validate the updated retry behavior for standard retries,
+which is currently gated behind an internal flag and not yet available
+to external users. Once the changes are validated internally and released
+publicly, these tests will replace the corresponding tests in
+test_standard.py and this file will be removed.
+"""
+
+from collections import Counter
+
+import pytest
+from botocore import configprovider
+from botocore.awsrequest import AWSResponse
+from botocore.exceptions import ReadTimeoutError
+from botocore.retries import quota, standard
+
+from tests import BaseEnvVar, mock, unittest
+
+
+@mock.patch('botocore.retries.standard.NEW_RETRIES_ENABLED', True)
+class TestExponentialBackoff(unittest.TestCase):
+    def setUp(self):
+        self.random = lambda: 1
+        self.backoff = standard.ExponentialBackoff(
+            max_backoff=20, random=self.random
+        )
+
+    def test_range_of_exponential_backoff(self):
+        backoffs = [
+            self.backoff.delay_amount(standard.RetryContext(attempt_number=i))
+            for i in range(1, 12)
+        ]
+        # Note that we're capped at 20 which is our max backoff.
+        # Cap kicks in at attempt 10
+        self.assertEqual(
+            backoffs, [0.05, 0.1, 0.2, 0.4, 0.8, 1.6, 3.2, 6.4, 12.8, 20, 20]
+        )
+
+    def test_exponential_backoff_with_jitter(self):
+        backoff = standard.ExponentialBackoff()
+        backoffs = [
+            backoff.delay_amount(standard.RetryContext(attempt_number=3))
+            for i in range(10)
+        ]
+        # For attempt number 3, we should have a max value of 0.2 (0.05 * 2 ^ 2),
+        # so we can assert all the backoff values are within that range.
+        # 0.05 is the default non-throttling scale
+        for x in backoffs:
+            self.assertTrue(0 <= x <= 0.2)
+
+    def test_uniform_rand_dist_on_max_attempts(self):
+        backoff = standard.ExponentialBackoff()
+        num_datapoints = 10_000
+        backoffs = [
+            backoff.delay_amount(standard.RetryContext(attempt_number=10))
+            for i in range(num_datapoints)
+        ]
+        self._assert_looks_like_uniform_distribution(backoffs)
+
+    def _assert_looks_like_uniform_distribution(self, backoffs):
+        histogram = Counter(int(el) for el in backoffs)
+        expected_value = len(backoffs) / len(histogram)
+        # This is an arbitrarily chosen tolerance, but we're being fairly
+        # lenient here and giving a 20% tolerance.  We're only interested
+        # in cases where it's obviously broken and not a uniform distribution.
+        tolerance = 0.20
+        low = expected_value - (expected_value * tolerance)
+        high = expected_value + (expected_value * tolerance)
+        out_of_range = [
+            str(i) for i in histogram.values() if not low <= i <= high
+        ]
+        if out_of_range:
+            raise AssertionError(
+                "Backoff values outside of uniform distribution range "
+                f"({low} - {high}): {', '.join(out_of_range)}"
+            )
+
+
+@mock.patch('botocore.retries.standard.NEW_RETRIES_ENABLED', True)
+class TestRetryQuotaChecker(unittest.TestCase):
+    def setUp(self):
+        self.quota = quota.RetryQuota(500)
+        self.throttling_detector = standard.ThrottlingErrorDetector(
+            standard.RetryEventAdapter()
+        )
+        self.quota_checker = standard.RetryQuotaChecker(
+            self.quota, self.throttling_detector
+        )
+        self.request_context = {}
+
+    def create_context(
+        self,
+        is_timeout_error=False,
+        status_code=200,
+        is_throttling_error=False,
+    ):
+        caught_exception = None
+        parsed_response = {}
+        if is_timeout_error:
+            caught_exception = ReadTimeoutError(endpoint_url='https://foo')
+        if is_throttling_error:
+            status_code = 400
+            parsed_response = {'Error': {'Code': 'Throttling'}}
+        http_response = AWSResponse(
+            status_code=status_code, raw=None, headers={}, url='https://foo/'
+        )
+        context = standard.RetryContext(
+            attempt_number=1,
+            request_context=self.request_context,
+            caught_exception=caught_exception,
+            http_response=http_response,
+            parsed_response=parsed_response,
+        )
+        return context
+
+    def test_can_acquire_quota_for_throttling_error(self):
+        self.assertTrue(
+            self.quota_checker.acquire_retry_quota(
+                self.create_context(is_throttling_error=True)
+            )
+        )
+        self.assertEqual(self.request_context['retry_quota_capacity'], 5)
+
+    def test_can_acquire_quota_non_timeout_error(self):
+        self.assertTrue(
+            self.quota_checker.acquire_retry_quota(self.create_context())
+        )
+        self.assertEqual(self.request_context['retry_quota_capacity'], 14)
+
+    def test_can_acquire_quota_for_timeout_error(self):
+        self.assertTrue(
+            self.quota_checker.acquire_retry_quota(
+                self.create_context(is_timeout_error=True)
+            )
+        )
+        self.assertEqual(self.request_context['retry_quota_capacity'], 14)
+
+    def test_can_release_quota_based_on_context_value_on_success(self):
+        context = self.create_context()
+        # This is where we had to retry the request but eventually
+        # succeeded.
+        http_response = self.create_context(status_code=200).http_response
+        self.assertTrue(self.quota_checker.acquire_retry_quota(context))
+        self.assertEqual(self.quota.available_capacity, 486)
+        self.quota_checker.release_retry_quota(
+            context.request_context, http_response=http_response
+        )
+        self.assertEqual(self.quota.available_capacity, 500)
+
+    def test_can_release_quota_when_succeed_after_throttling_error(self):
+        context = self.create_context(is_throttling_error=True)
+        http_response = self.create_context(status_code=200).http_response
+        self.assertTrue(self.quota_checker.acquire_retry_quota(context))
+        self.assertEqual(self.quota.available_capacity, 495)
+        self.quota_checker.release_retry_quota(
+            context.request_context, http_response=http_response
+        )
+        self.assertEqual(self.quota.available_capacity, 500)
+
+    def test_dont_release_quota_if_all_retries_failed(self):
+        context = self.create_context()
+        # If max_attempts_reached is True, then it means we used up all
+        # our retry attempts and still failed.  In this case we shouldn't
+        # give any retry quota back.
+        http_response = self.create_context(status_code=500).http_response
+        self.assertTrue(self.quota_checker.acquire_retry_quota(context))
+        self.assertEqual(self.quota.available_capacity, 486)
+        self.quota_checker.release_retry_quota(
+            context.request_context, http_response=http_response
+        )
+        self.assertEqual(self.quota.available_capacity, 486)
+
+    def test_can_release_default_quota_if_not_in_context(self):
+        context = self.create_context()
+        self.assertTrue(self.quota_checker.acquire_retry_quota(context))
+        self.assertEqual(self.quota.available_capacity, 486)
+        # We're going to remove the quota amount from the request context.
+        # This represents a successful request with no retries.
+        self.request_context.pop('retry_quota_capacity')
+        self.quota_checker.release_retry_quota(
+            context.request_context, context.http_response
+        )
+        # We expect only 1 unit was released.
+        self.assertEqual(self.quota.available_capacity, 487)
+
+    def test_acquire_quota_fails(self):
+        quota_checker = standard.RetryQuotaChecker(
+            quota.RetryQuota(initial_capacity=14)
+        )
+        # The first one succeeds.
+        self.assertTrue(
+            quota_checker.acquire_retry_quota(self.create_context())
+        )
+        # But we should fail now because we're out of quota.
+        self.request_context.pop('retry_quota_capacity')
+        self.assertFalse(
+            quota_checker.acquire_retry_quota(self.create_context())
+        )
+        self.assertNotIn('retry_quota_capacity', self.request_context)
+
+    def test_quota_reached_adds_retry_metadata(self):
+        quota_checker = standard.RetryQuotaChecker(
+            quota.RetryQuota(initial_capacity=0)
+        )
+        context = self.create_context()
+        self.assertFalse(quota_checker.acquire_retry_quota(context))
+        self.assertEqual(
+            context.get_retry_metadata(), {'RetryQuotaReached': True}
+        )
+
+    def test_single_failed_request_does_not_give_back_quota(self):
+        context = self.create_context()
+        http_response = self.create_context(status_code=400).http_response
+        # First deduct some amount of the retry quota so we're not hitting
+        # the upper bound.
+        self.quota.acquire(50)
+        self.assertEqual(self.quota.available_capacity, 450)
+        self.quota_checker.release_retry_quota(
+            context.request_context, http_response=http_response
+        )
+        self.assertEqual(self.quota.available_capacity, 450)
+
+
+@mock.patch('botocore.retries.standard.NEW_RETRIES_ENABLED', True)
+class TestServiceSpecificRetries(unittest.TestCase):
+    def _make_retry_context(self, attempt, status_code, error_code=None):
+        http_response = AWSResponse(
+            status_code=status_code, raw=None, headers={}, url='https://foo/'
+        )
+        parsed_response = {}
+        if error_code:
+            parsed_response = {'Error': {'Code': error_code}}
+        return standard.RetryContext(
+            attempt_number=attempt,
+            operation_model=mock.Mock(error_shapes=[]),
+            http_response=http_response,
+            parsed_response=parsed_response,
+            request_context={},
+        )
+
+    def test_dynamodb_base_backoff_and_increased_retries(self):
+        retry_quota_bucket = quota.RetryQuota()
+
+        throttling_detector = standard.ThrottlingErrorDetector(
+            standard.RetryEventAdapter()
+        )
+        retry_quota = standard.RetryQuotaChecker(
+            retry_quota_bucket, throttling_detector
+        )
+        backoff = standard.ExponentialBackoff(
+            random=lambda: 1,
+            service_name='dynamodb',
+            throttling_detector=throttling_detector,
+        )
+
+        retry_conditions = standard.StandardRetryConditions(max_attempts=4)
+
+        # Attempts 1-3: retryable
+        for attempt, expected_quota, expected_delay in [
+            (1, 486, 0.025),
+            (2, 472, 0.05),
+            (3, 458, 0.1),
+        ]:
+            context = self._make_retry_context(
+                attempt=attempt, status_code=500
+            )
+            self.assertTrue(retry_conditions.is_retryable(context))
+            self.assertTrue(retry_quota.acquire_retry_quota(context))
+            self.assertEqual(
+                retry_quota_bucket.available_capacity, expected_quota
+            )
+            self.assertEqual(backoff.delay_amount(context), expected_delay)
+
+        # Attempt 4: NOT retryable because max_attempts=4
+        context4 = self._make_retry_context(attempt=4, status_code=500)
+        self.assertFalse(retry_conditions.is_retryable(context4))
+        self.assertEqual(retry_quota_bucket.available_capacity, 458)
+        self.assertEqual(
+            context4.get_retry_metadata(), {'MaxAttemptsReached': True}
+        )
+
+    def test_sqs_triggers_long_polling_backoff_when_token_empty(self):
+        mock_sleep = mock.Mock()
+        retry_policy = mock.Mock(spec=standard.RetryPolicy)
+        retry_policy.should_retry.return_value = True
+        retry_policy.compute_retry_delay.return_value = 0.05
+
+        retry_quota = mock.Mock(spec=standard.RetryQuotaChecker)
+        retry_quota.acquire_retry_quota.return_value = False
+
+        handler = standard.RetryHandler(
+            retry_policy=retry_policy,
+            retry_event_adapter=standard.RetryEventAdapter(),
+            retry_quota=retry_quota,
+            service_name='sqs',
+            sleep=mock_sleep,
+        )
+
+        context = self._make_retry_context(attempt=1, status_code=500)
+        context.operation_model = mock.Mock()
+        context.operation_model.name = 'ReceiveMessage'
+
+        result = handler.needs_retry(
+            response=(context.http_response, {}),
+            attempts=1,
+            caught_exception=None,
+            request_dict={'context': {}},
+            operation=context.operation_model,
+        )
+
+        assert result is False
+        retry_quota.acquire_retry_quota.assert_called_once()
+        mock_sleep.assert_called_once_with(0.05)
+
+    def test_non_long_polling_operation_does_not_sleep_when_quota_exhausted(
+        self,
+    ):
+        mock_sleep = mock.Mock()
+        retry_policy = mock.Mock(spec=standard.RetryPolicy)
+        retry_policy.should_retry.return_value = True
+        retry_policy.compute_retry_delay.return_value = 0.05
+
+        retry_quota = mock.Mock(spec=standard.RetryQuotaChecker)
+        retry_quota.acquire_retry_quota.return_value = False
+
+        handler = standard.RetryHandler(
+            retry_policy=retry_policy,
+            retry_event_adapter=standard.RetryEventAdapter(),
+            retry_quota=retry_quota,
+            service_name='sqs',
+            sleep=mock_sleep,
+        )
+
+        context = self._make_retry_context(attempt=1, status_code=500)
+        context.operation_model = mock.Mock()
+        context.operation_model.name = 'SendMessage'
+
+        result = handler.needs_retry(
+            response=(context.http_response, {}),
+            attempts=1,
+            caught_exception=None,
+            request_dict={'context': {}},
+            operation=context.operation_model,
+        )
+
+        self.assertIsNone(result)
+        mock_sleep.assert_not_called()
+
+
+@mock.patch('botocore.retries.standard.NEW_RETRIES_ENABLED', True)
+class TestRetryAfterHeaderInRetries:
+    def setup_method(self):
+        throttling_detector = standard.ThrottlingErrorDetector(
+            standard.RetryEventAdapter()
+        )
+        self.retry_quota_bucket = quota.RetryQuota()
+        self.retry_quota = standard.RetryQuotaChecker(
+            self.retry_quota_bucket, throttling_detector
+        )
+        self.backoff = standard.ExponentialBackoff(
+            random=lambda: 1,
+            throttling_detector=throttling_detector,
+        )
+
+    def _make_retry_context(
+        self, attempt, status_code, error_code=None, retry_after='0'
+    ):
+        http_response = AWSResponse(
+            status_code=status_code,
+            raw=None,
+            headers={'x-amz-retry-after': retry_after},
+            url='https://foo/',
+        )
+        parsed_response = {}
+        if error_code:
+            parsed_response = {'Error': {'Code': error_code}}
+        return standard.RetryContext(
+            attempt_number=attempt,
+            operation_model=mock.Mock(error_shapes=[]),
+            http_response=http_response,
+            parsed_response=parsed_response,
+            request_context={},
+        )
+
+    @pytest.mark.parametrize(
+        'retry_after_header,'
+        'expected_capacity_after_failure,'
+        'expected_delay_amount,'
+        'expected_capacity_after_success',
+        [
+            ('1500', 486, 1.5, 500),
+            ('0', 486, 0.05, 500),
+            ('10000', 486, 5.05, 500),
+            ('invalid', 486, 0.05, 500),
+            ('-100', 486, 0.05, 500),
+        ],
+    )
+    def test_x_amz_retry_after_header_is_honored(
+        self,
+        retry_after_header,
+        expected_capacity_after_failure,
+        expected_delay_amount,
+        expected_capacity_after_success,
+    ):
+        context = self._make_retry_context(
+            attempt=1, status_code=500, retry_after=retry_after_header
+        )
+
+        assert self.retry_quota.acquire_retry_quota(context)
+        assert (
+            self.retry_quota_bucket.available_capacity
+            == expected_capacity_after_failure
+        )
+        assert self.backoff.delay_amount(context) == expected_delay_amount
+
+        http_success = AWSResponse(
+            status_code=200, raw=None, headers={}, url='https://foo/'
+        )
+        self.retry_quota.release_retry_quota(
+            context.request_context, http_response=http_success
+        )
+        assert (
+            self.retry_quota_bucket.available_capacity
+            == expected_capacity_after_success
+        )
+
+
+class TestNewRetriesEnvironmentVariable(BaseEnvVar):
+    def test_env_var_true_enables_new_retries(self):
+        self.environ['AWS_NEW_RETRIES_2026'] = 'true'
+        self.assertTrue(configprovider._resolve_new_retries())
+
+    def test_env_var_false_disables_new_retries(self):
+        self.environ['AWS_NEW_RETRIES_2026'] = 'false'
+        self.assertFalse(configprovider._resolve_new_retries())
+
+    def test_no_env_var_uses_default(self):
+        self.assertFalse(configprovider._resolve_new_retries())