diff --git a/src/cloudevents/core/bindings/http.py b/src/cloudevents/core/bindings/http.py index 86e013b..fbdcc3b 100644 --- a/src/cloudevents/core/bindings/http.py +++ b/src/cloudevents/core/bindings/http.py @@ -13,20 +13,64 @@ # under the License. from dataclasses import dataclass +from datetime import datetime from typing import Any, Final +from urllib.parse import quote, unquote + +from dateutil.parser import isoparse from cloudevents.core.base import BaseCloudEvent, EventFactory from cloudevents.core.bindings.common import ( CONTENT_TYPE_HEADER, DATACONTENTTYPE_ATTR, - decode_header_value, - encode_header_value, + TIME_ATTR, get_event_factory_for_version, ) from cloudevents.core.formats.base import Format from cloudevents.core.formats.json import JSONFormat from cloudevents.core.spec import SPECVERSION_V1_0 +# Per CloudEvents HTTP binding spec (section 3.1.3.2), all printable ASCII +# characters (U+0021-U+007E) are safe EXCEPT space, double-quote, and percent. +_CE_SAFE_CHARS: Final[str] = "".join( + c for c in map(chr, range(0x21, 0x7F)) if c not in (" ", '"', "%") +) + + +def _encode_header_value(value: Any) -> str: + """ + Encode a CloudEvent attribute value for use in an HTTP header. + + Handles datetime objects (ISO 8601 with 'Z' suffix for UTC) and applies + percent-encoding per the CloudEvents HTTP binding spec (section 3.1.3.2). + + :param value: The attribute value to encode + :return: Percent-encoded string suitable for HTTP headers + """ + if isinstance(value, datetime): + str_value = value.isoformat() + if str_value.endswith("+00:00"): + str_value = str_value[:-6] + "Z" + return quote(str_value, safe=_CE_SAFE_CHARS) + return quote(str(value), safe=_CE_SAFE_CHARS) + + +def _decode_header_value(attr_name: str, value: str) -> Any: + """ + Decode a CloudEvent attribute value from an HTTP header. + + Applies percent-decoding and parses the 'time' attribute as datetime. + + :param attr_name: The name of the CloudEvent attribute + :param value: The percent-encoded header value + :return: Decoded value (datetime for 'time' attribute, string otherwise) + """ + decoded = unquote(value) + if attr_name == TIME_ATTR: + return isoparse(decoded) + return decoded + + CE_PREFIX: Final[str] = "ce-" @@ -83,7 +127,7 @@ def to_binary(event: BaseCloudEvent, event_format: Format) -> HTTPMessage: headers[CONTENT_TYPE_HEADER] = str(attr_value) else: header_name = f"{CE_PREFIX}{attr_name}" - headers[header_name] = encode_header_value(attr_value) + headers[header_name] = _encode_header_value(attr_value) data = event.get_data() datacontenttype = attributes.get(DATACONTENTTYPE_ATTR) @@ -130,7 +174,7 @@ def from_binary( if normalized_name.startswith(CE_PREFIX): attr_name = normalized_name[len(CE_PREFIX) :] - attributes[attr_name] = decode_header_value(attr_name, header_value) + attributes[attr_name] = _decode_header_value(attr_name, header_value) elif normalized_name == CONTENT_TYPE_HEADER: attributes[DATACONTENTTYPE_ATTR] = header_value diff --git a/tests/test_core/test_bindings/test_http.py b/tests/test_core/test_bindings/test_http.py index cb5b560..cf46ba4 100644 --- a/tests/test_core/test_bindings/test_http.py +++ b/tests/test_core/test_bindings/test_http.py @@ -118,7 +118,7 @@ def test_to_binary_required_attributes() -> None: assert "ce-type" in message.headers assert message.headers["ce-type"] == "com.example.test" assert "ce-source" in message.headers - assert message.headers["ce-source"] == "%2Ftest" # Forward slash is percent-encoded + assert message.headers["ce-source"] == "/test" # Printable ASCII is not encoded assert "ce-id" in message.headers assert message.headers["ce-id"] == "test-id-123" assert "ce-specversion" in message.headers @@ -134,8 +134,8 @@ def test_to_binary_with_optional_attributes() -> None: message = to_binary(event, JSONFormat()) assert message.headers["ce-subject"] == "test-subject" - # All special characters including : and / are percent-encoded - assert message.headers["ce-dataschema"] == "https%3A%2F%2Fexample.com%2Fschema" + # Printable ASCII (including : and /) is not encoded per CE spec 3.1.3.2 + assert message.headers["ce-dataschema"] == "https://example.com/schema" def test_to_binary_with_extensions() -> None: @@ -203,9 +203,9 @@ def test_to_binary_datetime_encoding() -> None: ) message = to_binary(event, JSONFormat()) - # Should encode with 'Z' suffix for UTC + # Should encode with 'Z' suffix for UTC, colons not encoded per CE spec assert "ce-time" in message.headers - assert "2023-01-15T10%3A30%3A45Z" in message.headers["ce-time"] + assert "2023-01-15T10:30:45Z" == message.headers["ce-time"] def test_to_binary_special_characters() -> None: @@ -216,10 +216,9 @@ def test_to_binary_special_characters() -> None: ) message = to_binary(event, JSONFormat()) - # Should be percent-encoded + # Only space is encoded; ! is printable ASCII and left as-is per CE spec assert "ce-subject" in message.headers - # Space becomes %20, ! becomes %21 - assert "Hello%20World%21" == message.headers["ce-subject"] + assert "Hello%20World!" == message.headers["ce-subject"] def test_to_binary_datacontenttype_mapping() -> None: @@ -254,11 +253,11 @@ def test_to_binary_header_encoding() -> None: ) message = to_binary(event, JSONFormat()) - # Should be percent-encoded + # Per CE spec 3.1.3.2: only space, double-quote, percent, and non-printable ASCII encoded encoded_subject = message.headers["ce-subject"] assert " " not in encoded_subject # Spaces should be encoded assert "%20" in encoded_subject # Encoded space - assert "%3A" in encoded_subject # Encoded colon + assert ":" in encoded_subject # Colon is printable ASCII, not encoded def test_from_binary_accepts_http_message() -> None: @@ -738,11 +737,28 @@ def test_percent_encoding_special_chars() -> None: ) message = to_binary(event, JSONFormat()) - # All special chars should be encoded + # Per CE spec: space and double-quote are encoded, but & is printable ASCII encoded = message.headers["ce-subject"] assert " " not in encoded assert '"' not in encoded - assert "&" not in encoded + assert "&" in encoded # & is printable ASCII (U+0026), not encoded + + +def test_percent_encoding_spec_example() -> None: + """Test the example from CE HTTP binding spec section 3.1.3.2: + 'Euro € 😀' SHOULD be encoded as 'Euro%20%E2%82%AC%20%F0%9F%98%80' + """ + event = create_event( + {"subject": "Euro € 😀"}, + data=None, + ) + message = to_binary(event, JSONFormat()) + + assert message.headers["ce-subject"] == "Euro%20%E2%82%AC%20%F0%9F%98%80" + + # Round-trip: decode back to original + parsed = from_binary(message, JSONFormat(), CloudEvent) + assert parsed.get_subject() == "Euro € 😀" def test_percent_encoding_unicode() -> None: