Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,15 @@ def test_dict_diff_nested(benchmark):
benchmark(diff, a, b)


@pytest.mark.benchmark(group="dict-diff")
@pytest.mark.parametrize("n", [500, 1000, 2000])
def test_dict_diff_large_common(benchmark, n):
"""Benchmark: dicts where every key is common and every value changes."""
a = {f"k{i}": {"v": i} for i in range(n)}
b = {f"k{i}": {"v": i + 1} for i in range(n)}
benchmark(diff, a, b)


# ========================================
# Set Diff Benchmarks
# ========================================
Expand Down
79 changes: 47 additions & 32 deletions patchdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,44 +135,59 @@ def diff_lists(input: List, output: List, ptr: Pointer) -> Tuple[List, List]:


def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> Tuple[List, List]:
ops, rops = [], []
ops: List = []
input_only_rops: List = []
output_only_rops: List = []
common_rops_chunks: List[List] = []

input_keys = set(input.keys()) if input else set()
output_keys = set(output.keys()) if output else set()
if input_only := input_keys - output_keys:
for key in input_only:
key_ptr = ptr.append(key)
ops.append({"op": "remove", "path": key_ptr})
rops.insert(0, {"op": "add", "path": key_ptr, "value": input[key]})
if output_only := output_keys - input_keys:
for key in output_only:
key_ptr = ptr.append(key)
ops.append(
{
"op": "add",
"path": key_ptr,
"value": output[key],
}
)
rops.insert(0, {"op": "remove", "path": key_ptr})
if common := input_keys & output_keys:
for key in common:
key_ops, key_rops = diff(input[key], output[key], ptr.append(key))
ops.extend(key_ops)
key_rops.extend(rops)
rops = key_rops

for key in input_keys - output_keys:
key_ptr = ptr.append(key)
ops.append({"op": "remove", "path": key_ptr})
input_only_rops.append({"op": "add", "path": key_ptr, "value": input[key]})
input_only_rops.reverse()

for key in output_keys - input_keys:
key_ptr = ptr.append(key)
ops.append({"op": "add", "path": key_ptr, "value": output[key]})
output_only_rops.append({"op": "remove", "path": key_ptr})
output_only_rops.reverse()

for key in input_keys & output_keys:
key_ops, key_rops = diff(input[key], output[key], ptr.append(key))
ops.extend(key_ops)
if key_rops:
common_rops_chunks.append(key_rops)

# Match the historical insert(0,…) + key_rops.extend(rops) layering:
# later common chunks went in front of earlier ones, and the input/output
# singletons sat behind them in reverse iteration order.
rops: List = []
for chunk in reversed(common_rops_chunks):
rops.extend(chunk)
rops.extend(output_only_rops)
rops.extend(input_only_rops)
return ops, rops


def diff_sets(input: Set, output: Set, ptr: Pointer) -> Tuple[List, List]:
ops, rops = [], []
if input_only := input - output:
for value in input_only:
ops.append({"op": "remove", "path": ptr.append(value)})
rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value})
if output_only := output - input:
for value in output_only:
ops.append({"op": "add", "path": ptr.append("-"), "value": value})
rops.insert(0, {"op": "remove", "path": ptr.append(value)})
ops: List = []
input_only_rops: List = []
output_only_rops: List = []

for value in input - output:
ops.append({"op": "remove", "path": ptr.append(value)})
input_only_rops.append({"op": "add", "path": ptr.append("-"), "value": value})
input_only_rops.reverse()

for value in output - input:
ops.append({"op": "add", "path": ptr.append("-"), "value": value})
output_only_rops.append({"op": "remove", "path": ptr.append(value)})
output_only_rops.reverse()

rops = output_only_rops + input_only_rops
return ops, rops


Expand Down
59 changes: 58 additions & 1 deletion tests/test_diff.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from patchdiff import diff
import random

from patchdiff import apply, diff
from patchdiff.pointer import Pointer


Expand Down Expand Up @@ -168,3 +170,58 @@ def test_mixed():
{"op": "add", "path": Pointer(["a", 3, "-"]), "value": "a"},
{"op": "remove", "path": Pointer(["c"])},
]


def _random_dict(rng, n_keys, value_pool):
return {f"k{i}": rng.choice(value_pool) for i in range(n_keys)}


def _mutate_dict(rng, base, value_pool):
result = dict(base)
keys = list(result.keys())
# Replace
for key in rng.sample(keys, k=max(1, len(keys) // 3)):
result[key] = rng.choice(value_pool)
# Remove
for key in rng.sample(keys, k=max(1, len(keys) // 4)):
result.pop(key, None)
# Add
for i in range(max(1, len(keys) // 4)):
result[f"new_{i}_{rng.randint(0, 10_000)}"] = rng.choice(value_pool)
return result


def test_dict_diff_roundtrip_property():
rng = random.Random(20260526)
value_pool = [
0,
1,
"x",
"y",
(1, 2),
{"nested": 1},
[1, 2, 3],
{"a", "b"},
]
cases = 25
for _ in range(cases):
n_keys = rng.randint(0, 30)
a = _random_dict(rng, n_keys, value_pool)
b = _mutate_dict(rng, a, value_pool) if a else _random_dict(rng, 5, value_pool)
ops, rops = diff(a, b)
assert apply(a, ops) == b
assert apply(b, rops) == a


def test_set_diff_roundtrip_property():
rng = random.Random(20260527)
universe = list(range(50)) + [f"s{i}" for i in range(50)]
cases = 25
for _ in range(cases):
size_a = rng.randint(0, 30)
size_b = rng.randint(0, 30)
a = set(rng.sample(universe, k=size_a))
b = set(rng.sample(universe, k=size_b))
ops, rops = diff(a, b)
assert apply(a, ops) == b
assert apply(b, rops) == a
Loading