From 23b35f1a7b0c46bd573ffc3c02234b25623d3271 Mon Sep 17 00:00:00 2001 From: Berend Klein Haneveld Date: Tue, 26 May 2026 11:15:27 +0200 Subject: [PATCH] perf: use str.replace in Pointer escape/unescape Replaces the regex-based escape helpers in patchdiff/pointer.py with str.replace. For single-character substitutions this is ~3x faster than a compiled regex, and Pointer.__str__ calls escape() per token on every serialization. Co-Authored-By: Claude Opus 4.7 --- patchdiff/pointer.py | 10 ++-------- tests/test_pointer.py | 30 +++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/patchdiff/pointer.py b/patchdiff/pointer.py index cf4dbb1..13b0863 100644 --- a/patchdiff/pointer.py +++ b/patchdiff/pointer.py @@ -1,22 +1,16 @@ from __future__ import annotations -import re from typing import Any, Hashable, Iterable from .types import Diffable -tilde0_re = re.compile("~0") -tilde1_re = re.compile("~1") -tilde_re = re.compile("~") -slash_re = re.compile("/") - def unescape(token: str) -> str: - return tilde0_re.sub("~", tilde1_re.sub("/", token)) + return token.replace("~1", "/").replace("~0", "~") def escape(token: str) -> str: - return slash_re.sub("~1", tilde_re.sub("~0", token)) + return token.replace("~", "~0").replace("/", "~1") class Pointer: diff --git a/tests/test_pointer.py b/tests/test_pointer.py index 045d7a9..6982557 100644 --- a/tests/test_pointer.py +++ b/tests/test_pointer.py @@ -1,4 +1,4 @@ -from patchdiff.pointer import Pointer +from patchdiff.pointer import Pointer, escape, unescape def test_pointer_get(): @@ -44,3 +44,31 @@ def test_pointer_eq(): def test_pointer_append(): assert Pointer([1]).append("foo") == Pointer([1, "foo"]) + + +def test_escape_unescape_roundtrip(): + # ~01 is the tricky case: escape must produce ~001, not be confused with + # the RFC 6901 escape sequence ~0 followed by '1'. + tokens = [ + "", + "plain", + "has/slash", + "has~tilde", + "~/mix", + "///", + "~~~", + "~01", + ] + expected = { + "": "", + "plain": "plain", + "has/slash": "has~1slash", + "has~tilde": "has~0tilde", + "~/mix": "~0~1mix", + "///": "~1~1~1", + "~~~": "~0~0~0", + "~01": "~001", + } + for token in tokens: + assert escape(token) == expected[token] + assert unescape(escape(token)) == token