Skip to content

Commit d4f7bfa

Browse files
fix neighbors_v2 edge-type filter for Kuzu label IN bug (#121)
- Replace label(e) IN $list with OR of scalar label(e) = $p (Kuzu 0.11.x). - Avoid typed union rel patterns here: RETURN columns differ across rel types. - Add regression test for strict-subset edge_types; adjust FakeGraph stub. - Document Kuzu Cypher pitfalls in AGENTS.md, kuzu_queries.py, tests/README. Fixes #119. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent ed022c6 commit d4f7bfa

5 files changed

Lines changed: 71 additions & 5 deletions

File tree

AGENTS.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,22 @@ Read these directly. Don't rely on rule files to mirror them.
8585
"Re-index required" callout and bump `ontology_version` when
8686
enrichment semantics change.
8787

88+
## Kuzu Cypher pitfalls
89+
90+
When adding or editing Cypher run against Kuzu (for example in
91+
`kuzu_queries.py`, `mcp_v2.py`, or any `KuzuGraph._rows` caller):
92+
93+
- **Do not filter relationship types with** `label(e) IN $list` **or**
94+
`label(e) IN ["A","B"]` **in** `WHERE`. On supported versions this can
95+
be ignored or wrong; prefer **OR of scalar equalities**
96+
(`label(e) = $p OR label(e) = $q …`) with bound parameters, after
97+
validating labels against an allowlist (see `neighbors_v2` in
98+
`mcp_v2.py`).
99+
- **Typed union patterns** like `-[e:CALLS|HTTP_CALLS]->` are only safe if
100+
every column you `RETURN` from `e` exists on **all** of those
101+
relationship types in the graph schema. Otherwise prefer untyped `[e]`
102+
plus explicit label filtering, or split queries.
103+
88104
## Workflow
89105

90106
- Branch from `master`. Branch names: `cursor/<topic>` (CLI work),

kuzu_queries.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
66
The Kuzu database is opened read-only and cached per-process. This module is
77
intentionally dependency-light: nothing here imports LanceDB or sentence-transformers.
8+
9+
Cypher pitfalls (see also ``AGENTS.md``): avoid ``label(e) IN $list`` in ``WHERE`` for
10+
relationship-type filters; use OR of ``label(e) = $param`` with bound parameters.
11+
Typed unions ``-[e:A|B]-`` require every ``RETURN`` column on ``e`` to exist on all
12+
listed rel types, or the binder may fail.
813
"""
914
from __future__ import annotations
1015

mcp_v2.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,12 @@ def neighbors_v2(
532532
) -> NeighborsOutput:
533533
try:
534534
_NEIGHBOR_EDGE_TYPES_ADAPTER.validate_python(edge_types)
535+
# Kuzu 0.11.x can drop `label(e) IN $list` in WHERE; use OR of scalar equalities instead.
536+
# Typed unions like `[e:CALLS|HTTP_CALLS]` fail the binder when RETURN references rel
537+
# columns that exist on only some of the union members.
538+
labels = list(dict.fromkeys(edge_types))
539+
label_params = [f"l{i}" for i in range(len(labels))]
540+
label_predicate = "(" + " OR ".join(f"label(e) = ${name}" for name in label_params) + ")"
535541
g = graph or KuzuGraph.get()
536542
raw_filter = _coerce_filter(filter)
537543
nf = (
@@ -543,27 +549,30 @@ def neighbors_v2(
543549
results: list[Edge] = []
544550
for origin_id in origins:
545551
_resolve_node_kind(g, origin_id)
552+
q_params = {"id": origin_id, **dict(zip(label_params, labels, strict=True))}
546553
if direction == "out":
547554
rows = g._rows( # noqa: SLF001
548-
"MATCH (a)-[e]->(b) WHERE a.id = $id AND label(e) IN $edge_types "
555+
"MATCH (a)-[e]->(b) WHERE a.id = $id AND "
556+
f"{label_predicate} "
549557
"RETURN b.id AS other_id, label(e) AS edge_type, e.confidence AS confidence, "
550558
"e.strategy AS strategy, e.match AS match, e.mechanism AS mechanism, "
551559
"e.annotation AS annotation, e.field_or_param AS field_or_param, "
552560
"e.source AS source, e.call_site_line AS call_site_line, "
553561
"e.call_site_byte AS call_site_byte, e.arg_count AS arg_count, "
554562
"e.resolved AS resolved",
555-
{"id": origin_id, "edge_types": edge_types},
563+
q_params,
556564
)
557565
else:
558566
rows = g._rows( # noqa: SLF001
559-
"MATCH (a)<-[e]-(b) WHERE a.id = $id AND label(e) IN $edge_types "
567+
"MATCH (a)<-[e]-(b) WHERE a.id = $id AND "
568+
f"{label_predicate} "
560569
"RETURN b.id AS other_id, label(e) AS edge_type, e.confidence AS confidence, "
561570
"e.strategy AS strategy, e.match AS match, e.mechanism AS mechanism, "
562571
"e.annotation AS annotation, e.field_or_param AS field_or_param, "
563572
"e.source AS source, e.call_site_line AS call_site_line, "
564573
"e.call_site_byte AS call_site_byte, e.arg_count AS arg_count, "
565574
"e.resolved AS resolved",
566-
{"id": origin_id, "edge_types": edge_types},
575+
q_params,
567576
)
568577
for row in rows:
569578
other_id = str(row.get("other_id") or "")

tests/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ cd /path/to/java-codebase-rag
2929
.venv/bin/python -m pytest tests -v
3030
```
3131

32+
**Kuzu Cypher:** When writing queries or asserting on edge filters, follow the pitfalls note in [`AGENTS.md`](../AGENTS.md) (avoid `label(e) IN $list` for type filters; be careful with typed union rel patterns).
33+
3234
## CI merge gate and fixture tiers
3335

3436
**Merge gate (mechanical):** [`.github/workflows/test.yml`](../.github/workflows/test.yml) runs `pytest tests` with `JAVA_CODEBASE_RAG_RUN_HEAVY=0` on every pull request and on every push to `master`. Branch protection on `master` requires the `test` status check to pass before merge and disables force-push. Break-glass policy: `enforce_admins: false` so the sole maintainer can bypass for emergency hotfixes — explain the bypass in the merge commit.

tests/test_mcp_v2.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,18 @@ def _method_id_with_calls(kuzu_graph, direction: str) -> str:
2727
return str(rows[0]["id"])
2828

2929

30+
def _method_id_declares_client_and_other_out_edge(kuzu_graph) -> str | None:
31+
"""A method with DECLARES_CLIENT plus another out-label (Kuzu #119 strict-subset case)."""
32+
for pattern in (
33+
"MATCH (m:Symbol {kind: 'method'})-[:DECLARES_CLIENT]->() MATCH (m)-[:CALLS]->() RETURN m.id AS id LIMIT 1",
34+
"MATCH (m:Symbol {kind: 'method'})-[:DECLARES_CLIENT]->() MATCH (m)-[:HTTP_CALLS]->() RETURN m.id AS id LIMIT 1",
35+
):
36+
rows = kuzu_graph._rows(pattern) # noqa: SLF001
37+
if rows:
38+
return str(rows[0]["id"])
39+
return None
40+
41+
3042
def _first_route_with_handler(kuzu_graph) -> str:
3143
for route in kuzu_graph.list_routes(limit=200):
3244
if kuzu_graph.find_route_handlers(route_id=route["id"]):
@@ -281,6 +293,24 @@ def test_neighbors_out_calls(kuzu_graph) -> None:
281293
assert isinstance(out.results, list)
282294

283295

296+
def test_neighbors_edge_types_strict_subset_respects_label_filter(kuzu_graph) -> None:
297+
"""Regression (#119): Kuzu can drop `label(e) IN $list`; use OR of `label(e) = $p` instead."""
298+
mid = _method_id_declares_client_and_other_out_edge(kuzu_graph)
299+
if mid is None:
300+
pytest.skip("no method with DECLARES_CLIENT and CALLS or HTTP_CALLS out-edges")
301+
dc_rows = kuzu_graph._rows( # noqa: SLF001
302+
"MATCH (m:Symbol)-[e:DECLARES_CLIENT]->() WHERE m.id = $id RETURN count(e) AS n",
303+
{"id": mid},
304+
)
305+
assert dc_rows
306+
want_dc = int(dc_rows[0]["n"])
307+
assert want_dc >= 1
308+
out = neighbors_v2(mid, direction="out", edge_types=["DECLARES_CLIENT"], graph=kuzu_graph)
309+
assert out.success is True
310+
assert all(e.edge_type == "DECLARES_CLIENT" for e in out.results)
311+
assert len(out.results) == want_dc
312+
313+
284314
def test_neighbors_route_in_exposes_returns_handler(kuzu_graph) -> None:
285315
route_id = _first_route_with_handler(kuzu_graph)
286316
out = neighbors_v2(route_id, direction="in", edge_types=["EXPOSES"], graph=kuzu_graph)
@@ -291,7 +321,11 @@ def test_neighbors_route_in_exposes_returns_handler(kuzu_graph) -> None:
291321
def test_neighbors_route_in_http_calls_returns_callers(kuzu_graph) -> None:
292322
class FakeGraph:
293323
def _rows(self, query: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
294-
if "MATCH (a)<-[e]-(b)" in query:
324+
if (
325+
"MATCH (a)<-[e]-(b)" in query
326+
and "WHERE a.id" in query
327+
and "RETURN b.id AS other_id" in query
328+
):
295329
return [{"other_id": "sym:caller", "edge_type": "HTTP_CALLS", "confidence": 0.8, "match": "cross_service"}]
296330
if "MATCH (n:Symbol)" in query:
297331
return [

0 commit comments

Comments
 (0)