From ad274a062100508973811a501f9ad1824854750b Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Sat, 16 May 2026 19:24:51 +0300 Subject: [PATCH 1/2] =?UTF-8?q?flip=20HTTP=5FCALLS=20to=20Client=E2=86=92R?= =?UTF-8?q?oute=20and=20reshape=20RouteCaller=20APIs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emit HTTP_CALLS from Client nodes in pass5/6 and DDL; replace CallerInfo with RouteCaller in kuzu_queries (find_route_callers, trace_request_flow, impact expansion); two-hop HTTP in pr_analysis; update docs and PR-B tests. Co-authored-by: Cursor --- README.md | 4 +- build_ast_graph.py | 17 ++-- docs/AGENT-GUIDE.md | 8 +- docs/EDGE-NAVIGATION.md | 11 +-- docs/skills/java-codebase-explore.md | 8 +- java_ontology.py | 17 ++-- kuzu_queries.py | 101 +++++++++++++++----- pr_analysis.py | 8 +- tests/test_brownfield_clients.py | 4 +- tests/test_call_edges_e2e.py | 58 ++++++++++- tests/test_client_hint_recovery.py | 18 ++-- tests/test_cross_service_resolution_flag.py | 3 +- tests/test_feign_not_exposer.py | 5 +- tests/test_kuzu_queries.py | 32 +++++++ tests/test_mcp_hints.py | 2 +- tests/test_mcp_v2.py | 2 +- tests/test_mcp_v2_compose.py | 20 ++++ tests/test_pr_analysis.py | 23 ++++- tests/test_schema_consistency.py | 17 ++-- 19 files changed, 276 insertions(+), 82 deletions(-) diff --git a/README.md b/README.md index 9d32915..1cf6984 100644 --- a/README.md +++ b/README.md @@ -385,7 +385,7 @@ Unresolved targets become **phantom** nodes (`resolved=false`, FQN guessed from | `DECLARES_CLIENT` | type → client | Type declares an outbound call site. | | `CALLS` | method → method | In-process call (confidence-scored, strategy-tagged). | | `EXPOSES` | type → route | Type exposes an HTTP/async route. | -| `HTTP_CALLS` | symbol → route | Cross-service HTTP call (caller-side). | +| `HTTP_CALLS` | client → route | Cross-service HTTP call (caller-side Client to target Route). | | `ASYNC_CALLS` | symbol → route | Cross-service async (Kafka, Rabbit, JMS, …). | JDK / Spring / Lombok callees are represented as **phantom** method symbols at index time. Caller/callee traversals default to `exclude_external=true` so those edges are filtered by FQN prefix without dropping them from the graph. @@ -426,7 +426,7 @@ Resolution order for `microservice`: Current ontology version is **14**. Any index built before this version must be rebuilt via `cocoindex update ... --full-reprocess -f` or a full `java-codebase-rag reprocess` (no selective flags) so vectors and graph stay aligned. Until re-indexed, the server defensively JSON-decodes string-form list columns so nothing explodes, but filters like `array_contains` will not work. -Ontology **14** introduces `EDGE_SCHEMA` in `java_ontology.py` as the canonical edge navigation schema (see `docs/EDGE-NAVIGATION.md`). **This PR-A bump alone does not flip `HTTP_CALLS` / `ASYNC_CALLS` endpoints** — graphs rebuilt at v14 still use `Symbol → Route` for those edges until SCHEMA-V2 PR-B/C land. **PR-B** flips `HTTP_CALLS` to `Client → Route`; **PR-C** adds the `Producer` node, `DECLARES_PRODUCER`, and flips `ASYNC_CALLS` to `Producer → Route`. Run one full reprocess after upgrading through the SCHEMA-V2 sequence (or when you need the v14 ontology gate). +Ontology **14** introduces `EDGE_SCHEMA` in `java_ontology.py` as the canonical edge navigation schema (see `docs/EDGE-NAVIGATION.md`). **`HTTP_CALLS` is `Client → Route`** (SCHEMA-V2 PR-B). **`ASYNC_CALLS` remains `Symbol → Route` until PR-C**, which adds the `Producer` node, `DECLARES_PRODUCER`, and flips `ASYNC_CALLS` to `Producer → Route`. Run one full reprocess after upgrading through the SCHEMA-V2 sequence (or when you need the v14 ontology gate). Ontology **13** materializes stored `OVERRIDES` edges between method Symbols (subtype override → supertype declaration, matching `signature` on a direct `IMPLEMENTS` / `EXTENDS` hop). `neighbors(edge_types=["OVERRIDES"])` traverses this relationship; `OVERRIDDEN_BY*` keys in `edge_summary` remain describe-time rollups only. diff --git a/build_ast_graph.py b/build_ast_graph.py index 1080887..9c62967 100644 --- a/build_ast_graph.py +++ b/build_ast_graph.py @@ -244,7 +244,7 @@ class RouteExtractionStats: @dataclass class HttpCallRow: - symbol_id: str + client_id: str route_id: str confidence: float strategy: str @@ -1690,14 +1690,14 @@ def _phantom_async_route_id(call: OutgoingCallDecl) -> str: source_layer="builtin", ) ) - key = (member.node_id, rid) + key = (cid, rid) if key in http_seen: continue http_seen.add(key) conf = call.confidence_base * 0.3 * micro_factor tables.http_call_rows.append( HttpCallRow( - symbol_id=member.node_id, + client_id=cid, route_id=rid, confidence=conf, strategy=strategy, @@ -1947,7 +1947,8 @@ def _micro_factor(member: MemberEntry | None) -> float: for row in tables.http_call_rows: if row.match != "unresolved": continue - member = member_by_id.get(row.symbol_id) + client = clients_by_id.get(row.client_id) + member = member_by_id.get(client.member_id) if client else None base = row.confidence / max(1e-9, (0.3 * _micro_factor(member))) src_route = route_by_id.get(row.route_id) if src_route is None and member is not None: @@ -2202,7 +2203,7 @@ def _micro_factor(member: MemberEntry | None) -> float: "confidence DOUBLE, strategy STRING)" ) _SCHEMA_HTTP_CALLS = ( - "CREATE REL TABLE HTTP_CALLS(FROM Symbol TO Route, " + "CREATE REL TABLE HTTP_CALLS(FROM Client TO Route, " "confidence DOUBLE, strategy STRING, " "method_call STRING, raw_uri STRING, match STRING)" ) @@ -2402,8 +2403,8 @@ def _write_nodes( "CREATE (s)-[:DECLARES_CLIENT {confidence: $confidence, strategy: $strategy}]->(c)" ) _CREATE_HTTP_CALL = ( - "MATCH (s:Symbol {id: $sid}), (r:Route {id: $rid}) " - "CREATE (s)-[:HTTP_CALLS {confidence: $confidence, strategy: $strategy, " + "MATCH (c:Client {id: $cid}), (r:Route {id: $rid}) " + "CREATE (c)-[:HTTP_CALLS {confidence: $confidence, strategy: $strategy, " "method_call: $method_call, raw_uri: $raw_uri, match: $match}]->(r)" ) _CREATE_ASYNC_CALL = ( @@ -2543,7 +2544,7 @@ def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables) -> Non }) for row in tables.http_call_rows: conn.execute(_CREATE_HTTP_CALL, { - "sid": row.symbol_id, + "cid": row.client_id, "rid": row.route_id, "confidence": row.confidence, "strategy": row.strategy, diff --git a/docs/AGENT-GUIDE.md b/docs/AGENT-GUIDE.md index 01fe134..e330b5c 100644 --- a/docs/AGENT-GUIDE.md +++ b/docs/AGENT-GUIDE.md @@ -14,8 +14,8 @@ > > Calibrated against ontology version **14** (see `ast_java.ONTOLOGY_VERSION` / > `java_ontology.EDGE_SCHEMA` + valid sets): canonical edge navigation schema in -> `docs/EDGE-NAVIGATION.md`. v14 re-index required; PR-B flips `HTTP_CALLS` to -> `Client → Route`; PR-C adds `Producer` + `DECLARES_PRODUCER` and flips `ASYNC_CALLS`. +> `docs/EDGE-NAVIGATION.md`. v14 re-index required; `HTTP_CALLS` is `Client → Route`; +> PR-C adds `Producer` + `DECLARES_PRODUCER` and flips `ASYNC_CALLS`. > Still includes stored `OVERRIDES` Symbol→Symbol edges and v12 HTTP brownfield > (`@CodebaseHttpClient`, shared `CodebaseHttpMethod` enum, inbound layer-C HTTP routes > replace same-method built-in rows). **Design rationale:** navigation surface and tools — @@ -96,7 +96,7 @@ Use these strings **verbatim** in `neighbors(..., edge_types=[...])`: | Method overrides | `OVERRIDES` | Subtype **method** → supertype **declaration** method (same `signature`, one `IMPLEMENTS`/`EXTENDS` hop). `in` = overriders; `out` = overridden declarations | | Method calls | `CALLS` | `in` = callers; `out` = callees | | Service boundary | `EXPOSES` | Symbol → Route (handler exposes route) | -| Cross-service | `HTTP_CALLS`, `ASYNC_CALLS` | Symbol → Route across services | +| Cross-service | `HTTP_CALLS`, `ASYNC_CALLS` | `HTTP_CALLS`: Client → Route; `ASYNC_CALLS`: Symbol → Route until SCHEMA-V2 PR-C (`Producer` node) | Symmetric: cross-service and intra-service questions use the **same** `neighbors` call with different `edge_types`. @@ -263,7 +263,7 @@ Virtual keys (`OVERRIDDEN_BY`, …) and composed dot-keys are **not** valid `Edg ### Ontology glossary (version 14) -Source of truth: `java_ontology.py` (`EDGE_SCHEMA`, valid sets). Strings are case-sensitive. Edge navigation: [`docs/EDGE-NAVIGATION.md`](./EDGE-NAVIGATION.md) — use `*_current` traversal keys for `HTTP_CALLS` / `ASYNC_CALLS` until SCHEMA-V2 PR-B/C flip endpoints. +Source of truth: `java_ontology.py` (`EDGE_SCHEMA`, valid sets). Strings are case-sensitive. Edge navigation: [`docs/EDGE-NAVIGATION.md`](./EDGE-NAVIGATION.md) — for `HTTP_CALLS`, traverse via `DECLARES_CLIENT` from a method Symbol or `neighbors` outbound from a Client id; `ASYNC_CALLS` still uses `*_current` member traversals until SCHEMA-V2 PR-C. **Roles:** `CONTROLLER`, `SERVICE`, `REPOSITORY`, `COMPONENT`, `CONFIG`, `ENTITY`, `CLIENT`, `MAPPER`, `DTO`, `OTHER`. diff --git a/docs/EDGE-NAVIGATION.md b/docs/EDGE-NAVIGATION.md index 0dfabdc..e110d45 100644 --- a/docs/EDGE-NAVIGATION.md +++ b/docs/EDGE-NAVIGATION.md @@ -15,7 +15,7 @@ | CALLS | Symbol | Symbol | many_to_many | yes | yes | | EXPOSES | Symbol | Route | one_to_one | yes | yes | | DECLARES_CLIENT | Symbol | Client | one_to_many | yes | yes | -| HTTP_CALLS | Symbol | Route | many_to_many | yes | no | +| HTTP_CALLS | Client | Route | many_to_many | yes | no | | ASYNC_CALLS | Symbol | Route | many_to_many | yes | no | ## EXTENDS @@ -185,12 +185,12 @@ ## HTTP_CALLS -**Endpoints**: `Symbol → Route` +**Endpoints**: `Client → Route` **Cardinality**: `many_to_many` **Brownfield-resolver-sourced**: yes **Member-only** (hints): no -**Purpose**: resolved HTTP call from declaring method to target route (pre-flip: Symbol→Route; PR-B: Client→Route) +**Purpose**: resolved HTTP call from a declared Client to a target route **Attributes**: @@ -202,11 +202,10 @@ **Typical traversals**: -- `type_subject_current`: neighbors(['{id}'],'out',['DECLARES']) then neighbors(member_ids,'out',['HTTP_CALLS']) - `type_subject`: neighbors(['{id}'],'out',['DECLARES']) then neighbors(member_ids,'out',['DECLARES_CLIENT']) then neighbors(client_ids,'out',['HTTP_CALLS']) -- `member_subject_current`: neighbors(['{id}'],'out',['HTTP_CALLS']) - `member_subject`: neighbors(['{id}'],'out',['DECLARES_CLIENT']) then neighbors(client_ids,'out',['HTTP_CALLS']) -- `alien_subject`: HTTP_CALLS is Symbol→Route until PR-B; use member_subject_current. After PR-B (Client→Route), use member_subject via DECLARES_CLIENT +- `route_subject`: neighbors(['{id}'],'in',['HTTP_CALLS']) then neighbors(client_ids,'in',['DECLARES_CLIENT']) for declaring method +- `alien_subject`: HTTP_CALLS connects Client→Route; use DECLARES_CLIENT from a method Symbol, or neighbors(client_id,'out',['HTTP_CALLS']) from a Client id ## ASYNC_CALLS diff --git a/docs/skills/java-codebase-explore.md b/docs/skills/java-codebase-explore.md index 5df33d5..e27ac49 100644 --- a/docs/skills/java-codebase-explore.md +++ b/docs/skills/java-codebase-explore.md @@ -98,8 +98,8 @@ You cannot reason reliably about cross-service behaviour until these surfaces ex **Sequence:** 1. Cluster routes by path prefix; **`describe`** on representative `route:` ids. -2. For each major route, **`neighbors(direction="in", edge_types=["EXPOSES"])`** (and `HTTP_CALLS` / `ASYNC_CALLS` when tracing callers) to land on handler symbols; then outbound `CALLS` as needed. -3. Use **`find(kind="client", …)`** with the same microservice filter to list outbound integration points; follow **`HTTP_CALLS` / `ASYNC_CALLS`** edges when present. +2. For each major route, **`neighbors(direction="in", edge_types=["EXPOSES"])`** to land on handler symbols; for inbound **`HTTP_CALLS`**, expect **Client** callers (then **`DECLARES_CLIENT` inbound** to the declaring method); **`ASYNC_CALLS`** inbound still lands on Symbol callers until PR-C. +3. Use **`find(kind="client", …)`** with the same microservice filter to list outbound integration points; follow outbound **`HTTP_CALLS`** from each Client (or **`ASYNC_CALLS`** from methods until Producer lands). **Stopping rule:** You can summarize how traffic enters the service, what modules/controllers own key paths, and what external systems it calls—**without** claiming tests, runtime config, or unindexed siblings exist in MCP. @@ -116,7 +116,7 @@ You cannot reason reliably about cross-service behaviour until these surfaces ex **Sequence:** 1. **`neighbors(direction="in", edge_types=["EXPOSES"])`** onto the handling symbol; walk **`CALLS`** outbound method-by-method. -2. When a method shows outbound HTTP/async, use **`neighbors`** with **`HTTP_CALLS` / `ASYNC_CALLS`** (direction per question) and follow to target routes or async targets. +2. When a method makes outbound HTTP, **`neighbors(..., out, ["DECLARES_CLIENT"])`** then outbound **`HTTP_CALLS`** from each Client id; for async (pre-PR-C), **`ASYNC_CALLS`** may still be direct from the method Symbol. 3. Stop at leaves, framework boundaries, or unresolved edges; read **`edge.attrs`** (`attrs.confidence`, `attrs.strategy`, `attrs.match`) and report low-confidence segments as resolver gaps, not as facts. **Stopping rule:** You reach a stable leaf (external IO, message publish, clear terminal layer) **or** you document every unresolved hop with a concrete next non-MCP check. @@ -243,7 +243,7 @@ Ten edge types: | Method overrides | `OVERRIDES` | | Method calls | `CALLS` | | Service boundary | `EXPOSES` | -| Cross-service | `HTTP_CALLS`, `ASYNC_CALLS` | +| Cross-service | `HTTP_CALLS` (Client→Route), `ASYNC_CALLS` (Symbol→Route until Producer) | For exact argument shapes, recovery playbook, and slash aliases see [`docs/AGENT-GUIDE.md`](https://github.com/HumanBean17/java-codebase-rag/blob/master/docs/AGENT-GUIDE.md) in the java-codebase-rag repo. diff --git a/java_ontology.py b/java_ontology.py index 9b55b54..41737a9 100644 --- a/java_ontology.py +++ b/java_ontology.py @@ -303,7 +303,7 @@ class EdgeSpec: ), "HTTP_CALLS": EdgeSpec( name="HTTP_CALLS", - src="Symbol", + src="Client", dst="Route", cardinality="many_to_many", brownfield_resolver_sourced=True, @@ -314,25 +314,24 @@ class EdgeSpec: EdgeAttr("raw_uri", "STRING", "uninterpolated URI template from the call site"), EdgeAttr("match", "STRING", "cross_service|intra_service|ambiguous|phantom|unresolved"), ), - purpose="resolved HTTP call from declaring method to target route (pre-flip: Symbol→Route; PR-B: Client→Route)", + purpose="resolved HTTP call from a declared Client to a target route", typical_traversals={ - "type_subject_current": ( - "neighbors(['{id}'],'out',['DECLARES']) " - "then neighbors(member_ids,'out',['HTTP_CALLS'])" - ), "type_subject": ( "neighbors(['{id}'],'out',['DECLARES']) " "then neighbors(member_ids,'out',['DECLARES_CLIENT']) " "then neighbors(client_ids,'out',['HTTP_CALLS'])" ), - "member_subject_current": "neighbors(['{id}'],'out',['HTTP_CALLS'])", "member_subject": ( "neighbors(['{id}'],'out',['DECLARES_CLIENT']) " "then neighbors(client_ids,'out',['HTTP_CALLS'])" ), + "route_subject": ( + "neighbors(['{id}'],'in',['HTTP_CALLS']) " + "then neighbors(client_ids,'in',['DECLARES_CLIENT']) for declaring method" + ), "alien_subject": ( - "HTTP_CALLS is Symbol→Route until PR-B; use member_subject_current. " - "After PR-B (Client→Route), use member_subject via DECLARES_CLIENT" + "HTTP_CALLS connects Client→Route; use DECLARES_CLIENT from a method Symbol, " + "or neighbors(client_id,'out',['HTTP_CALLS']) from a Client id" ), }, ), diff --git a/kuzu_queries.py b/kuzu_queries.py index b8673ae..8168eb6 100644 --- a/kuzu_queries.py +++ b/kuzu_queries.py @@ -19,7 +19,7 @@ import threading from dataclasses import asdict, dataclass from pathlib import Path -from typing import Any +from typing import Any, Literal import kuzu @@ -46,7 +46,7 @@ def _coerce_id_list(raw: Any) -> list[str]: "CallEdge", "ViaEdge", "StageSymbol", - "CallerInfo", + "RouteCaller", "find_symbols_in_file_range", ] @@ -133,11 +133,15 @@ class StageSymbol: @dataclass -class CallerInfo: - caller_symbol_id: str +class RouteCaller: + caller_node_id: str + caller_node_kind: Literal["client", "producer"] caller_microservice: str + declaring_symbol_id: str confidence: float match: str + target_service: str = "" + raw_uri: str = "" def _symbol_return_for(alias: str) -> str: @@ -1332,19 +1336,35 @@ def _ingest_flow_row( ) scrf = (" AND " + " AND ".join(scope_rf)) if scope_rf else "" qrf = ( - "MATCH (root:Symbol)-[:DECLARES]->(m1:Symbol)-[e:HTTP_CALLS|ASYNC_CALLS]->(rt:Route)" - "<-[:EXPOSES]-(handler:Symbol)<-[:DECLARES]-(n:Symbol) " + "MATCH (root:Symbol)-[:DECLARES]->(m1:Symbol)-[:DECLARES_CLIENT]->(c:Client)" + "-[e:HTTP_CALLS]->(rt:Route)<-[:EXPOSES]-(handler:Symbol)<-[:DECLARES]-(n:Symbol) " "WHERE root.fqn IN $fqns AND n.role IN $roles " "AND n.resolved AND n.kind IN ['class','interface','enum','record','annotation'] " "AND e.confidence >= $mc AND root.microservice <> n.microservice " f"{scrf} " - f"RETURN {_symbol_return_for('n')}, label(e) AS edge_type, root.fqn AS from_fqn " + f"RETURN {_symbol_return_for('n')}, 'HTTP_CALLS' AS edge_type, root.fqn AS from_fqn " f"LIMIT {max(1, remaining * 4)}" ) for row in self._rows(qrf, params_rf): _ingest_flow_row(row, filter_external_fqn=True) if len(stage_results) >= stage_limit: break + if len(stage_results) < stage_limit: + remaining = stage_limit - len(stage_results) + qrf_async = ( + "MATCH (root:Symbol)-[:DECLARES]->(m1:Symbol)-[e:ASYNC_CALLS]->(rt:Route)" + "<-[:EXPOSES]-(handler:Symbol)<-[:DECLARES]-(n:Symbol) " + "WHERE root.fqn IN $fqns AND n.role IN $roles " + "AND n.resolved AND n.kind IN ['class','interface','enum','record','annotation'] " + "AND e.confidence >= $mc AND root.microservice <> n.microservice " + f"{scrf} " + f"RETURN {_symbol_return_for('n')}, 'ASYNC_CALLS' AS edge_type, root.fqn AS from_fqn " + f"LIMIT {max(1, remaining * 4)}" + ) + for row in self._rows(qrf_async, params_rf): + _ingest_flow_row(row, filter_external_fqn=True) + if len(stage_results) >= stage_limit: + break current_frontier = next_frontier if len(stage_results) >= stage_limit: @@ -1467,7 +1487,7 @@ def find_route_callers( microservice: str = "", path_template: str = "", method: str = "", - ) -> list[CallerInfo]: + ) -> list[RouteCaller]: rid = route_id or "" if not rid: params: dict[str, Any] = { @@ -1486,36 +1506,73 @@ def find_route_callers( rid = str(rows[0].get("id") or "") if not rid: return [] - rows = self._rows( - "MATCH (s:Symbol)-[e:HTTP_CALLS|ASYNC_CALLS]->(r:Route {id: $rid}) " - "RETURN s.id AS caller_symbol_id, s.microservice AS caller_microservice, " - "e.confidence AS confidence, e.match AS match " - "ORDER BY e.confidence DESC, s.id", + http_rows = self._rows( + "MATCH (s:Symbol)-[:DECLARES_CLIENT]->(c:Client)-[e:HTTP_CALLS]->(r:Route {id: $rid}) " + "RETURN c.id AS caller_node_id, c.microservice AS caller_microservice, " + "s.id AS declaring_symbol_id, e.confidence AS confidence, e.match AS match, " + "c.target_service AS target_service, e.raw_uri AS raw_uri " + "ORDER BY e.confidence DESC, c.id", {"rid": rid}, ) - out: list[CallerInfo] = [] - for row in rows: + async_rows = self._rows( + "MATCH (s:Symbol)-[e:ASYNC_CALLS]->(r:Route {id: $rid}) " + "RETURN s.id AS caller_node_id, s.microservice AS caller_microservice, " + "s.id AS declaring_symbol_id, e.confidence AS confidence, e.match AS match", + {"rid": rid}, + ) + out: list[RouteCaller] = [] + for row in http_rows: + out.append( + RouteCaller( + caller_node_id=str(row.get("caller_node_id") or ""), + caller_node_kind="client", + caller_microservice=str(row.get("caller_microservice") or ""), + declaring_symbol_id=str(row.get("declaring_symbol_id") or ""), + confidence=float(row.get("confidence") or 0.0), + match=str(row.get("match") or ""), + target_service=str(row.get("target_service") or ""), + raw_uri=str(row.get("raw_uri") or ""), + ), + ) + for row in async_rows: + sym_id = str(row.get("caller_node_id") or "") out.append( - CallerInfo( - caller_symbol_id=str(row.get("caller_symbol_id") or ""), + RouteCaller( + caller_node_id=sym_id, + caller_node_kind="client", caller_microservice=str(row.get("caller_microservice") or ""), + declaring_symbol_id=str(row.get("declaring_symbol_id") or ""), confidence=float(row.get("confidence") or 0.0), match=str(row.get("match") or ""), ), ) + out.sort(key=lambda c: (-c.confidence, c.caller_node_id)) return out def trace_request_flow(self, entry_route_id: str, max_hops: int = 5) -> dict[str, Any]: hops = max(1, min(int(max_hops), 8)) - inbound = self._rows( - f"MATCH (entry:Route {{id: $rid}})<-[e:HTTP_CALLS|ASYNC_CALLS]-(caller:Symbol) " + inbound_http = self._rows( + f"MATCH (entry:Route {{id: $rid}})<-[e:HTTP_CALLS]-(caller:Client)" + "<-[:DECLARES_CLIENT]-(decl:Symbol) " + f"OPTIONAL MATCH (origin:Symbol)-[:CALLS*0..{hops}]->(decl) " + "RETURN DISTINCT caller.id AS caller_node_id, 'client' AS caller_node_kind, " + "decl.id AS declaring_symbol_id, decl.fqn AS declaring_symbol_fqn, " + "caller.microservice AS microservice, e.confidence AS confidence, " + "e.match AS match, origin.id AS origin_symbol_id, origin.fqn AS origin_fqn " + "ORDER BY confidence DESC, caller_node_id", + {"rid": entry_route_id}, + ) + inbound_async = self._rows( + f"MATCH (entry:Route {{id: $rid}})<-[e:ASYNC_CALLS]-(caller:Symbol) " f"OPTIONAL MATCH (origin:Symbol)-[:CALLS*0..{hops}]->(caller) " - "RETURN DISTINCT caller.id AS caller_symbol_id, caller.fqn AS caller_fqn, " - "caller.microservice AS caller_microservice, e.confidence AS confidence, " + "RETURN DISTINCT caller.id AS caller_node_id, 'client' AS caller_node_kind, " + "caller.id AS declaring_symbol_id, caller.fqn AS declaring_symbol_fqn, " + "caller.microservice AS microservice, e.confidence AS confidence, " "e.match AS match, origin.id AS origin_symbol_id, origin.fqn AS origin_fqn " - "ORDER BY confidence DESC, caller_symbol_id", + "ORDER BY confidence DESC, caller_node_id", {"rid": entry_route_id}, ) + inbound = inbound_http + inbound_async outbound = self._rows( f"MATCH (handler:Symbol)-[:EXPOSES]->(entry:Route {{id: $rid}}) " f"OPTIONAL MATCH (handler)-[:CALLS*0..{hops}]->(next:Symbol) " diff --git a/pr_analysis.py b/pr_analysis.py index 3c812d6..72ffd05 100644 --- a/pr_analysis.py +++ b/pr_analysis.py @@ -433,7 +433,13 @@ def compute_risk(graph: Any, changed: list[ChangedSymbol]) -> PrRiskReport: if rid not in routes: routes.append(rid) callers = graph._rows( - "MATCH (s:Symbol)-[e:HTTP_CALLS|ASYNC_CALLS]->(r:Route {id: $rid}) " + "MATCH (s:Symbol)-[:DECLARES_CLIENT]->(c:Client)-[e:HTTP_CALLS]->(r:Route {id: $rid}) " + "WHERE e.match = 'cross_service' " + "RETURN c.id AS id LIMIT 500", + {"rid": rid}, + ) + callers += graph._rows( + "MATCH (s:Symbol)-[e:ASYNC_CALLS]->(r:Route {id: $rid}) " "WHERE e.match = 'cross_service' " "RETURN s.id AS id LIMIT 500", {"rid": rid}, diff --git a/tests/test_brownfield_clients.py b/tests/test_brownfield_clients.py index 353668d..ba902fe 100644 --- a/tests/test_brownfield_clients.py +++ b/tests/test_brownfield_clients.py @@ -44,8 +44,8 @@ def _http_calls(db_path: Path) -> list[dict]: db = kuzu.Database(str(db_path), read_only=True) conn = kuzu.Connection(db) r = conn.execute( - "MATCH (s:Symbol)-[c:HTTP_CALLS]->(rt:Route) " - "RETURN s.fqn AS fqn, c.strategy AS strategy, c.method_call AS method_call, " + "MATCH (c:Client)-[h:HTTP_CALLS]->(rt:Route) " + "RETURN c.member_fqn AS fqn, h.strategy AS strategy, h.method_call AS method_call, " "rt.path_template AS path_template, rt.feign_name AS feign_name ORDER BY fqn, path_template", ) out: list[dict] = [] diff --git a/tests/test_call_edges_e2e.py b/tests/test_call_edges_e2e.py index e42f995..9d7046e 100644 --- a/tests/test_call_edges_e2e.py +++ b/tests/test_call_edges_e2e.py @@ -1,5 +1,6 @@ from __future__ import annotations +import shutil from pathlib import Path import kuzu @@ -7,6 +8,8 @@ from ast_java import ONTOLOGY_VERSION from kuzu_queries import KuzuGraph +_STUB_ROOT = Path(__file__).resolve().parent / "fixtures" / "brownfield_client_stubs" + def _scalar(db_path: Path, query: str) -> int: conn = kuzu.Connection(kuzu.Database(str(db_path), read_only=True)) @@ -14,8 +17,27 @@ def _scalar(db_path: Path, query: str) -> int: return int(r.get_next()[0] or 0) if r.has_next() else 0 +def _build_repeatable_clients(tmp_path: Path) -> Path: + shutil.copytree(_STUB_ROOT, tmp_path, dirs_exist_ok=True) + java_dir = tmp_path / "p" + java_dir.mkdir(parents=True, exist_ok=True) + (java_dir / "X.java").write_text( + "package p; import com.example.rag.*; class X { " + "@CodebaseHttpClients({" + "@CodebaseHttpClient(clientKind=CodebaseClientKind.rest_template, path=\"/r1\", method=CodebaseHttpMethod.GET)," + "@CodebaseHttpClient(clientKind=CodebaseClientKind.rest_template, path=\"/r2\", method=CodebaseHttpMethod.POST)" + "}) void m() {} }", + encoding="utf-8", + ) + from _builders import build_kuzu_full_into + + db_path = tmp_path / "g.kuzu" + build_kuzu_full_into(tmp_path, db_path) + return db_path + + def test_http_calls_table_built_on_bank_chat(kuzu_db_path: Path) -> None: - assert _scalar(kuzu_db_path, "MATCH (:Symbol)-[r:HTTP_CALLS]->(:Route) RETURN count(r)") >= 2 + assert _scalar(kuzu_db_path, "MATCH (:Client)-[r:HTTP_CALLS]->(:Route) RETURN count(r)") >= 2 def test_async_calls_table_built_on_bank_chat(kuzu_db_path: Path) -> None: @@ -31,12 +53,12 @@ def test_phantom_routes_dedup_across_call_sites(kuzu_db_path_http_caller_smoke: db = kuzu_db_path_http_caller_smoke route_ids = _scalar( db, - "MATCH (s:Symbol)-[r:HTTP_CALLS]->(rt:Route) " + "MATCH (c:Client)-[r:HTTP_CALLS]->(rt:Route) " "WHERE rt.path_template='/api/users' AND rt.method='GET' AND rt.microservice='' RETURN count(DISTINCT rt.id)", ) edges = _scalar( db, - "MATCH (s:Symbol)-[r:HTTP_CALLS]->(rt:Route) " + "MATCH (c:Client)-[r:HTTP_CALLS]->(rt:Route) " "WHERE rt.path_template='/api/users' AND rt.method='GET' AND rt.microservice='' RETURN count(r)", ) assert route_ids == 1 @@ -55,3 +77,33 @@ def test_graph_meta_call_edge_counters(kuzu_db_path: Path) -> None: def test_ontology_version_matches_graph_meta(kuzu_db_path: Path) -> None: assert KuzuGraph(str(kuzu_db_path)).meta()["ontology_version"] == ONTOLOGY_VERSION + + +def test_call_edges_client_outbound_http_calls_returns_routes(kuzu_db_path_http_caller_smoke: Path) -> None: + db = kuzu_db_path_http_caller_smoke + n = _scalar( + db, + "MATCH (c:Client) WHERE c.path_template='/api/users' AND c.method='GET' " + "MATCH (c)-[:HTTP_CALLS]->(:Route) RETURN count(*)", + ) + assert n >= 1 + + +def test_call_edges_method_two_http_clients_two_routes(tmp_path: Path) -> None: + db = _build_repeatable_clients(tmp_path) + client_routes = _scalar( + db, + "MATCH (c:Client)-[:HTTP_CALLS]->(:Route) RETURN count(DISTINCT c.id)", + ) + assert client_routes >= 2 + + +def test_call_edges_cross_service_http_four_hop(kuzu_db_path_cross_service_smoke: Path) -> None: + db = kuzu_db_path_cross_service_smoke + n = _scalar( + db, + "MATCH (m:Symbol)-[:DECLARES_CLIENT]->(c:Client)-[:HTTP_CALLS]->(rt:Route)" + "<-[:EXPOSES]-(h:Symbol) RETURN count(*)", + ) + assert n >= 1 + diff --git a/tests/test_client_hint_recovery.py b/tests/test_client_hint_recovery.py index 5618191..89c8df0 100644 --- a/tests/test_client_hint_recovery.py +++ b/tests/test_client_hint_recovery.py @@ -21,8 +21,11 @@ def _member_id(tables: GraphTables, *, parent_fqn: str, method_name: str) -> str raise AssertionError(f"member not found: {parent_fqn}#{method_name}") -def _first_http_call_for_symbol(tables: GraphTables, symbol_id: str): - row = next((r for r in tables.http_call_rows if r.symbol_id == symbol_id), None) +def _first_http_call_for_member(tables: GraphTables, member_id: str): + client_ids = { + e.client_id for e in tables.declares_client_rows if e.symbol_id == member_id + } + row = next((r for r in tables.http_call_rows if r.client_id in client_ids), None) assert row is not None return row @@ -34,14 +37,14 @@ def test_pass6_uses_client_hints_for_feign_resolution() -> None: parent_fqn="smoke.a.BFeignClient", method_name="joinOperator", ) - row = _first_http_call_for_symbol(tables, caller_id) + row = _first_http_call_for_member(tables, caller_id) row.route_id = "missing:route:id" row.match = "unresolved" pass6_match_edges(tables, verbose=False) route_by_id = {r.id: r for r in tables.routes_rows} - resolved = _first_http_call_for_symbol(tables, caller_id) + resolved = _first_http_call_for_member(tables, caller_id) assert resolved.match == "cross_service" assert route_by_id[resolved.route_id].microservice == "svc-b" @@ -54,7 +57,7 @@ def test_cross_service_match_outcome_unchanged_after_client_migration() -> None: parent_fqn="smoke.a.BFeignClient", method_name="joinOperator", ) - row = _first_http_call_for_symbol(tables, caller_id) + row = _first_http_call_for_member(tables, caller_id) assert row.match == "cross_service" @@ -77,7 +80,8 @@ def test_find_route_callers_still_returns_expected_feign_caller(tmp_path: Path) path_template="/chat/joinOperator", method="POST", ) - assert any(c.caller_symbol_id == caller_id for c in callers) + assert any(c.declaring_symbol_id == caller_id for c in callers) + assert all(c.caller_node_kind == "client" for c in callers) def test_missing_client_hint_falls_back_to_existing_unresolved_or_phantom_flow() -> None: @@ -87,9 +91,9 @@ def test_missing_client_hint_falls_back_to_existing_unresolved_or_phantom_flow() parent_fqn="smoke.a.BFeignClient", method_name="joinOperator", ) + row = _first_http_call_for_member(tables, caller_id) tables.declares_client_rows = [r for r in tables.declares_client_rows if r.symbol_id != caller_id] tables.client_rows = [c for c in tables.client_rows if c.member_id != caller_id] - row = _first_http_call_for_symbol(tables, caller_id) row.route_id = "missing:route:id" row.match = "unresolved" diff --git a/tests/test_cross_service_resolution_flag.py b/tests/test_cross_service_resolution_flag.py index 32e6d04..591161f 100644 --- a/tests/test_cross_service_resolution_flag.py +++ b/tests/test_cross_service_resolution_flag.py @@ -36,8 +36,9 @@ def _http_row_for_method(tables: GraphTables, method_name: str, *, parent_fqn: s mid = m.node_id break assert mid is not None + client_ids = {e.client_id for e in tables.declares_client_rows if e.symbol_id == mid} for r in tables.http_call_rows: - if r.symbol_id == mid: + if r.client_id in client_ids: return r return None diff --git a/tests/test_feign_not_exposer.py b/tests/test_feign_not_exposer.py index 1120619..a483721 100644 --- a/tests/test_feign_not_exposer.py +++ b/tests/test_feign_not_exposer.py @@ -41,7 +41,10 @@ def test_feign_caller_resolves_to_target_endpoint(graph_tables_cross_service_smo parent_fqn="smoke.a.BFeignClient", method_name="joinOperator", ) - row = next((r for r in tables.http_call_rows if r.symbol_id == caller_id), None) + client_ids = { + e.client_id for e in tables.declares_client_rows if e.symbol_id == caller_id + } + row = next((r for r in tables.http_call_rows if r.client_id in client_ids), None) assert row is not None route = next((r for r in tables.routes_rows if r.id == row.route_id), None) assert route is not None diff --git a/tests/test_kuzu_queries.py b/tests/test_kuzu_queries.py index ff5d974..f1caa32 100644 --- a/tests/test_kuzu_queries.py +++ b/tests/test_kuzu_queries.py @@ -463,3 +463,35 @@ def test_get_route_by_path_microservice_isolated(kuzu_graph_route_extraction_smo assert ra["microservice"] == "service-a" assert rb["microservice"] == "service-b" assert ra["id"] != rb["id"] + + +def test_find_route_callers_returns_route_caller_client_node(kuzu_db_path_cross_service_smoke: Path) -> None: + from kuzu_queries import RouteCaller + + g = KuzuGraph(str(kuzu_db_path_cross_service_smoke)) + routes = g.list_routes(limit=50) + callers: list[RouteCaller] = [] + for route in routes: + callers = g.find_route_callers(route["id"]) + if callers: + break + assert callers + http_callers = [c for c in callers if c.match] + assert any(c.caller_node_kind == "client" for c in http_callers) + assert all(c.caller_node_id for c in http_callers) + + +def test_trace_request_flow_inbound_includes_caller_node_id(kuzu_db_path_cross_service_smoke: Path) -> None: + g = KuzuGraph(str(kuzu_db_path_cross_service_smoke)) + route_id = None + for route in g.list_routes(limit=50): + flow = g.trace_request_flow(route["id"], max_hops=2) + inbound = flow.get("inbound") or [] + if inbound: + route_id = route["id"] + break + assert route_id is not None + flow = g.trace_request_flow(route_id, max_hops=2) + inbound = flow.get("inbound") or [] + assert inbound + assert any(row.get("caller_node_id") for row in inbound) diff --git a/tests/test_mcp_hints.py b/tests/test_mcp_hints.py index 2e2c004..17cdff3 100644 --- a/tests/test_mcp_hints.py +++ b/tests/test_mcp_hints.py @@ -56,7 +56,7 @@ def _interface_method_with_override_rollups(kuzu_graph) -> str: def _method_id_declares_client_and_other_out_edge(kuzu_graph) -> str | None: for pattern in ( "MATCH (m:Symbol {kind: 'method'})-[:DECLARES_CLIENT]->() MATCH (m)-[:CALLS]->() RETURN m.id AS id LIMIT 1", - "MATCH (m:Symbol {kind: 'method'})-[:DECLARES_CLIENT]->() MATCH (m)-[:HTTP_CALLS]->() RETURN m.id AS id LIMIT 1", + "MATCH (m:Symbol {kind: 'method'})-[:DECLARES_CLIENT]->(:Client)-[:HTTP_CALLS]->() RETURN m.id AS id LIMIT 1", ): rows = kuzu_graph._rows(pattern) # noqa: SLF001 if rows: diff --git a/tests/test_mcp_v2.py b/tests/test_mcp_v2.py index 201e919..a40f92e 100644 --- a/tests/test_mcp_v2.py +++ b/tests/test_mcp_v2.py @@ -56,7 +56,7 @@ def _method_id_declares_client_and_other_out_edge(kuzu_graph) -> str | None: """A method with DECLARES_CLIENT plus another out-label (Kuzu #119 strict-subset case).""" for pattern in ( "MATCH (m:Symbol {kind: 'method'})-[:DECLARES_CLIENT]->() MATCH (m)-[:CALLS]->() RETURN m.id AS id LIMIT 1", - "MATCH (m:Symbol {kind: 'method'})-[:DECLARES_CLIENT]->() MATCH (m)-[:HTTP_CALLS]->() RETURN m.id AS id LIMIT 1", + "MATCH (m:Symbol {kind: 'method'})-[:DECLARES_CLIENT]->(:Client)-[:HTTP_CALLS]->() RETURN m.id AS id LIMIT 1", ): rows = kuzu_graph._rows(pattern) # noqa: SLF001 if rows: diff --git a/tests/test_mcp_v2_compose.py b/tests/test_mcp_v2_compose.py index 98a3d0c..92ca5ea 100644 --- a/tests/test_mcp_v2_compose.py +++ b/tests/test_mcp_v2_compose.py @@ -537,3 +537,23 @@ def edge_pairs(db_path: Path) -> list[tuple[str, str]]: build_kuzu_to(_OVERRIDE_AXIS_FIXTURE, p1, max_pass=5) build_kuzu_to(_OVERRIDE_AXIS_FIXTURE, p2, max_pass=5) assert edge_pairs(p1) == edge_pairs(p2) + + +def test_describe_client_edge_summary_includes_http_calls_out( + kuzu_db_path_cross_service_smoke: Path, +) -> None: + from kuzu_queries import KuzuGraph + + g = KuzuGraph(str(kuzu_db_path_cross_service_smoke)) + rows = g._rows( # noqa: SLF001 + "MATCH (c:Client)-[:HTTP_CALLS]->() RETURN c.id AS id LIMIT 1", + {}, + ) + assert rows + cid = str(rows[0]["id"]) + out = describe_v2(cid, graph=g) + assert out.success is True + assert out.record is not None + assert out.record.edge_summary is not None + http_out = out.record.edge_summary.get("HTTP_CALLS", {"in": 0, "out": 0}) + assert int(http_out.get("out", 0)) >= 1 diff --git a/tests/test_pr_analysis.py b/tests/test_pr_analysis.py index 372df34..f8d4610 100644 --- a/tests/test_pr_analysis.py +++ b/tests/test_pr_analysis.py @@ -1,6 +1,7 @@ """PR-B: unified diff parsing, hunk→symbol mapping, and risk scoring (plan §4 tests 31–37).""" from __future__ import annotations +from pathlib import Path from types import SimpleNamespace from kuzu_queries import find_symbols_in_file_range @@ -193,8 +194,10 @@ def _rows(self, query, params): "parent_id": "", "resolved": True, }] - if "MATCH (s:Symbol)-[e:HTTP_CALLS|ASYNC_CALLS]->(r:Route {id: $rid})" in query: + if "MATCH (s:Symbol)-[:DECLARES_CLIENT]->(c:Client)-[e:HTTP_CALLS]->(r:Route {id: $rid})" in query: return [{"id": str(i)} for i in range(6)] + if "MATCH (s:Symbol)-[e:ASYNC_CALLS]->(r:Route {id: $rid})" in query: + return [] return [] def impact_analysis(self, name, **kwargs): @@ -252,10 +255,12 @@ def _rows(self, query, params): "parent_id": "", "resolved": True, }] - if "MATCH (s:Symbol)-[e:HTTP_CALLS|ASYNC_CALLS]->(r:Route {id: $rid})" in query: + if "MATCH (s:Symbol)-[:DECLARES_CLIENT]->(c:Client)-[e:HTTP_CALLS]->(r:Route {id: $rid})" in query: if self._include_callers: return [{"id": "caller-1"}] return [] + if "MATCH (s:Symbol)-[e:ASYNC_CALLS]->(r:Route {id: $rid})" in query: + return [] return [] def impact_analysis(self, name, **kwargs): @@ -298,6 +303,20 @@ def find_callers(self, name, **kwargs): assert abs((rep.risk_score - baseline.risk_score) - 0.2) < 1e-9 +def test_pr_analysis_changed_methods_finds_routes_via_declares_client( + kuzu_db_path_cross_service_smoke: Path, +) -> None: + from kuzu_queries import KuzuGraph + + g = KuzuGraph(str(kuzu_db_path_cross_service_smoke)) + rows = g._rows( # noqa: SLF001 + "MATCH (s:Symbol)-[:DECLARES_CLIENT]->(c:Client)-[e:HTTP_CALLS]->(r:Route) " + "WHERE e.match = 'cross_service' RETURN count(*) AS n", + {}, + ) + assert int(rows[0].get("n") or 0) >= 1 + + def test_36_removed_symbol_from_minus_only_hunk(kuzu_graph) -> None: diff = """diff --git a/chat-assign/src/main/java/com/bank/chat/assign/service/ChatManagementService.java b/chat-assign/src/main/java/com/bank/chat/assign/service/ChatManagementService.java --- a/chat-assign/src/main/java/com/bank/chat/assign/service/ChatManagementService.java diff --git a/tests/test_schema_consistency.py b/tests/test_schema_consistency.py index 2f161cc..5931427 100644 --- a/tests/test_schema_consistency.py +++ b/tests/test_schema_consistency.py @@ -56,9 +56,9 @@ def test_schema_consistency_all_ddl_endpoints_match_edge_schema() -> None: assert spec.dst == dst, f"{name}: schema dst {spec.dst!r} != DDL {dst!r}" -def test_schema_consistency_http_calls_pre_flip_symbol_to_route() -> None: +def test_schema_consistency_http_calls_post_flip_client_to_route() -> None: spec = EDGE_SCHEMA["HTTP_CALLS"] - assert spec.src == "Symbol" + assert spec.src == "Client" assert spec.dst == "Route" @@ -79,12 +79,13 @@ def test_edge_schema_member_only_flags_on_method_level_edges() -> None: def test_http_async_typical_traversals_include_pre_flip_current_keys() -> None: - for edge in ("HTTP_CALLS", "ASYNC_CALLS"): - trav = EDGE_SCHEMA[edge].typical_traversals - assert "member_subject_current" in trav - assert "HTTP_CALLS" in trav["member_subject_current"] or "ASYNC_CALLS" in trav["member_subject_current"] - assert "member_subject" in trav - assert "DECLARES" in trav["member_subject"] or "DECLARES_PRODUCER" in trav["member_subject"] + http_trav = EDGE_SCHEMA["HTTP_CALLS"].typical_traversals + assert "member_subject" in http_trav + assert "DECLARES_CLIENT" in http_trav["member_subject"] + async_trav = EDGE_SCHEMA["ASYNC_CALLS"].typical_traversals + assert "member_subject_current" in async_trav + assert "ASYNC_CALLS" in async_trav["member_subject_current"] + assert "member_subject" in async_trav def test_brownfield_resolver_strategy_literals_emitted_in_builder_subset() -> None: From eb61ee61d006ea56ffeaa2f103af9f41d33b4e0d Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Sat, 16 May 2026 19:32:41 +0300 Subject: [PATCH 2/2] address PR-B review: omit async from RouteCaller until PR-C Stop labeling Symbol-backed async callers as client in find_route_callers and trace_request_flow; surface caller_client_id on trace_flow HTTP hops; strengthen UC9 test via compute_risk; fix neighbors mock for Client callers. Co-authored-by: Cursor --- kuzu_queries.py | 56 +++++++++++---------------------------- tests/test_mcp_v2.py | 25 ++++++++++++++++- tests/test_pr_analysis.py | 32 +++++++++++++++++++--- 3 files changed, 68 insertions(+), 45 deletions(-) diff --git a/kuzu_queries.py b/kuzu_queries.py index 8168eb6..41c0c33 100644 --- a/kuzu_queries.py +++ b/kuzu_queries.py @@ -116,9 +116,10 @@ class ViaEdge: in the same chain (e.g. `INJECTS` vs `IMPLEMENTS` vs `CALLS`) and at what hop from the frontier they were reached. """ - edge_type: str # INJECTS | EXTENDS | IMPLEMENTS | CALLS + edge_type: str # INJECTS | EXTENDS | IMPLEMENTS | CALLS | HTTP_CALLS | ASYNC_CALLS from_fqn: str hop: int # 1 = direct neighbour of previous-stage frontier + caller_node_id: str = "" # Client id when edge_type is HTTP_CALLS (SCHEMA v2) @dataclass @@ -1281,6 +1282,7 @@ def _ingest_flow_row( edge_type=str(row.get("edge_type") or ""), from_fqn=str(row.get("from_fqn") or ""), hop=hop, + caller_node_id=str(row.get("caller_client_id") or ""), ) existing = stage_results.get(sym.fqn) if existing is None: @@ -1342,7 +1344,8 @@ def _ingest_flow_row( "AND n.resolved AND n.kind IN ['class','interface','enum','record','annotation'] " "AND e.confidence >= $mc AND root.microservice <> n.microservice " f"{scrf} " - f"RETURN {_symbol_return_for('n')}, 'HTTP_CALLS' AS edge_type, root.fqn AS from_fqn " + f"RETURN {_symbol_return_for('n')}, 'HTTP_CALLS' AS edge_type, " + f"root.fqn AS from_fqn, c.id AS caller_client_id " f"LIMIT {max(1, remaining * 4)}" ) for row in self._rows(qrf, params_rf): @@ -1352,14 +1355,14 @@ def _ingest_flow_row( if len(stage_results) < stage_limit: remaining = stage_limit - len(stage_results) qrf_async = ( - "MATCH (root:Symbol)-[:DECLARES]->(m1:Symbol)-[e:ASYNC_CALLS]->(rt:Route)" - "<-[:EXPOSES]-(handler:Symbol)<-[:DECLARES]-(n:Symbol) " - "WHERE root.fqn IN $fqns AND n.role IN $roles " - "AND n.resolved AND n.kind IN ['class','interface','enum','record','annotation'] " - "AND e.confidence >= $mc AND root.microservice <> n.microservice " - f"{scrf} " - f"RETURN {_symbol_return_for('n')}, 'ASYNC_CALLS' AS edge_type, root.fqn AS from_fqn " - f"LIMIT {max(1, remaining * 4)}" + "MATCH (root:Symbol)-[:DECLARES]->(m1:Symbol)-[e:ASYNC_CALLS]->(rt:Route)" + "<-[:EXPOSES]-(handler:Symbol)<-[:DECLARES]-(n:Symbol) " + "WHERE root.fqn IN $fqns AND n.role IN $roles " + "AND n.resolved AND n.kind IN ['class','interface','enum','record','annotation'] " + "AND e.confidence >= $mc AND root.microservice <> n.microservice " + f"{scrf} " + f"RETURN {_symbol_return_for('n')}, 'ASYNC_CALLS' AS edge_type, root.fqn AS from_fqn " + f"LIMIT {max(1, remaining * 4)}" ) for row in self._rows(qrf_async, params_rf): _ingest_flow_row(row, filter_external_fqn=True) @@ -1488,6 +1491,7 @@ def find_route_callers( path_template: str = "", method: str = "", ) -> list[RouteCaller]: + """HTTP callers via Client (two-hop). Async callers omitted until PR-C (Producer).""" rid = route_id or "" if not rid: params: dict[str, Any] = { @@ -1514,12 +1518,6 @@ def find_route_callers( "ORDER BY e.confidence DESC, c.id", {"rid": rid}, ) - async_rows = self._rows( - "MATCH (s:Symbol)-[e:ASYNC_CALLS]->(r:Route {id: $rid}) " - "RETURN s.id AS caller_node_id, s.microservice AS caller_microservice, " - "s.id AS declaring_symbol_id, e.confidence AS confidence, e.match AS match", - {"rid": rid}, - ) out: list[RouteCaller] = [] for row in http_rows: out.append( @@ -1534,22 +1532,10 @@ def find_route_callers( raw_uri=str(row.get("raw_uri") or ""), ), ) - for row in async_rows: - sym_id = str(row.get("caller_node_id") or "") - out.append( - RouteCaller( - caller_node_id=sym_id, - caller_node_kind="client", - caller_microservice=str(row.get("caller_microservice") or ""), - declaring_symbol_id=str(row.get("declaring_symbol_id") or ""), - confidence=float(row.get("confidence") or 0.0), - match=str(row.get("match") or ""), - ), - ) - out.sort(key=lambda c: (-c.confidence, c.caller_node_id)) return out def trace_request_flow(self, entry_route_id: str, max_hops: int = 5) -> dict[str, Any]: + """Inbound HTTP via Client two-hop. Async inbound omitted until PR-C (Producer).""" hops = max(1, min(int(max_hops), 8)) inbound_http = self._rows( f"MATCH (entry:Route {{id: $rid}})<-[e:HTTP_CALLS]-(caller:Client)" @@ -1562,17 +1548,7 @@ def trace_request_flow(self, entry_route_id: str, max_hops: int = 5) -> dict[str "ORDER BY confidence DESC, caller_node_id", {"rid": entry_route_id}, ) - inbound_async = self._rows( - f"MATCH (entry:Route {{id: $rid}})<-[e:ASYNC_CALLS]-(caller:Symbol) " - f"OPTIONAL MATCH (origin:Symbol)-[:CALLS*0..{hops}]->(caller) " - "RETURN DISTINCT caller.id AS caller_node_id, 'client' AS caller_node_kind, " - "caller.id AS declaring_symbol_id, caller.fqn AS declaring_symbol_fqn, " - "caller.microservice AS microservice, e.confidence AS confidence, " - "e.match AS match, origin.id AS origin_symbol_id, origin.fqn AS origin_fqn " - "ORDER BY confidence DESC, caller_node_id", - {"rid": entry_route_id}, - ) - inbound = inbound_http + inbound_async + inbound = inbound_http outbound = self._rows( f"MATCH (handler:Symbol)-[:EXPOSES]->(entry:Route {{id: $rid}}) " f"OPTIONAL MATCH (handler)-[:CALLS*0..{hops}]->(next:Symbol) " diff --git a/tests/test_mcp_v2.py b/tests/test_mcp_v2.py index a40f92e..c167f56 100644 --- a/tests/test_mcp_v2.py +++ b/tests/test_mcp_v2.py @@ -420,7 +420,28 @@ def _rows(self, query: str, params: dict[str, Any] | None = None) -> list[dict[s and "WHERE a.id" in query and "RETURN b.id AS other_id" in query ): - return [{"other_id": "sym:caller", "edge_type": "HTTP_CALLS", "confidence": 0.8, "match": "cross_service"}] + return [{"other_id": "client:caller", "edge_type": "HTTP_CALLS", "confidence": 0.8, "match": "cross_service"}] + if "MATCH (n:Client)" in query: + return [ + { + "id": "client:caller", + "client_kind": "feign_method", + "target_service": "chat-core", + "method": "POST", + "path": "/chat/joinOperator", + "path_template": "/chat/joinOperator", + "path_regex": "", + "member_fqn": "com.example.Caller#call()", + "member_id": "sym:caller", + "microservice": "chat-core", + "module": "chat-app", + "filename": "Caller.java", + "start_line": 1, + "end_line": 2, + "resolved": True, + "source_layer": "builtin", + } + ] if "MATCH (n:Symbol)" in query: return [ { @@ -455,6 +476,8 @@ def _rows(self, query: str, params: dict[str, Any] | None = None) -> list[dict[s ) assert out.success is True assert len(out.results) == 1 + assert out.results[0].other.id == "client:caller" + assert out.results[0].other.kind == "client" def test_neighbors_batch_ids_carries_origin_id(kuzu_graph) -> None: diff --git a/tests/test_pr_analysis.py b/tests/test_pr_analysis.py index f8d4610..3b6e0ba 100644 --- a/tests/test_pr_analysis.py +++ b/tests/test_pr_analysis.py @@ -308,13 +308,37 @@ def test_pr_analysis_changed_methods_finds_routes_via_declares_client( ) -> None: from kuzu_queries import KuzuGraph + from pr_analysis import ChangedSymbol, compute_risk + g = KuzuGraph(str(kuzu_db_path_cross_service_smoke)) - rows = g._rows( # noqa: SLF001 - "MATCH (s:Symbol)-[:DECLARES_CLIENT]->(c:Client)-[e:HTTP_CALLS]->(r:Route) " - "WHERE e.match = 'cross_service' RETURN count(*) AS n", + route_rows = g._rows( # noqa: SLF001 + "MATCH (r:Route) " + "WHERE r.microservice = 'svc-b' AND r.path_template = '/chat/joinOperator' " + "AND r.method = 'POST' RETURN r.id AS id LIMIT 1", {}, ) - assert int(rows[0].get("n") or 0) >= 1 + assert route_rows + rid = str(route_rows[0]["id"]) + handler_rows = g._rows( # noqa: SLF001 + "MATCH (s:Symbol)-[:EXPOSES]->(r:Route {id: $rid}) RETURN s.id AS id, s.fqn AS fqn LIMIT 1", + {"rid": rid}, + ) + assert handler_rows + rep = compute_risk( + g, + [ + ChangedSymbol( + symbol_id=str(handler_rows[0]["id"]), + fqn=str(handler_rows[0]["fqn"]), + kind="method", + change_type="modified", + file="", + hunk_lines=[1], + ), + ], + ) + assert rep.changed_symbols + assert rep.changed_symbols[0].cross_service_callers_count >= 1 def test_36_removed_symbol_from_minus_only_hunk(kuzu_graph) -> None: