From 1b793733d96c08e109df893eef5cf400db9ced3f Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Sat, 16 May 2026 20:07:49 +0300 Subject: [PATCH 1/4] feat(schema): introduce Producer node and route ASYNC_CALLS through it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Producer/DECLARES_PRODUCER, flip ASYNC_CALLS to Producer→Route, wire GraphMeta counters, kuzu two-hop async queries, MCP find/resolve producer parity, describe rollups, and PR-C tests (ontology 14 unchanged). Co-authored-by: Cursor --- README.md | 4 +- build_ast_graph.py | 178 +++++++++++++++++++++++++-- docs/AGENT-GUIDE.md | 5 +- docs/EDGE-NAVIGATION.md | 32 ++++- docs/skills/java-codebase-explore.md | 10 +- java_ontology.py | 37 ++++-- kuzu_queries.py | 106 +++++++++++++++- mcp_v2.py | 142 +++++++++++++++++---- pr_analysis.py | 4 +- server.py | 12 +- tests/test_ast_graph_build.py | 3 +- tests/test_brownfield_clients.py | 4 +- tests/test_call_edges_e2e.py | 102 ++++++++++++++- tests/test_client_node_extraction.py | 23 ++++ tests/test_kuzu_queries.py | 11 ++ tests/test_mcp_tools.py | 3 +- tests/test_mcp_v2.py | 23 ++++ tests/test_mcp_v2_compose.py | 20 +++ tests/test_pr_analysis.py | 4 +- tests/test_schema_consistency.py | 11 +- 20 files changed, 650 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index 1cf6984..53251bd 100644 --- a/README.md +++ b/README.md @@ -386,7 +386,7 @@ Unresolved targets become **phantom** nodes (`resolved=false`, FQN guessed from | `CALLS` | method → method | In-process call (confidence-scored, strategy-tagged). | | `EXPOSES` | type → route | Type exposes an HTTP/async route. | | `HTTP_CALLS` | client → route | Cross-service HTTP call (caller-side Client to target Route). | -| `ASYNC_CALLS` | symbol → route | Cross-service async (Kafka, Rabbit, JMS, …). | +| `ASYNC_CALLS` | producer → route | Cross-service async (Kafka, Rabbit, JMS, …). | JDK / Spring / Lombok callees are represented as **phantom** method symbols at index time. Caller/callee traversals default to `exclude_external=true` so those edges are filtered by FQN prefix without dropping them from the graph. @@ -426,7 +426,7 @@ Resolution order for `microservice`: Current ontology version is **14**. Any index built before this version must be rebuilt via `cocoindex update ... --full-reprocess -f` or a full `java-codebase-rag reprocess` (no selective flags) so vectors and graph stay aligned. Until re-indexed, the server defensively JSON-decodes string-form list columns so nothing explodes, but filters like `array_contains` will not work. -Ontology **14** introduces `EDGE_SCHEMA` in `java_ontology.py` as the canonical edge navigation schema (see `docs/EDGE-NAVIGATION.md`). **`HTTP_CALLS` is `Client → Route`** (SCHEMA-V2 PR-B). **`ASYNC_CALLS` remains `Symbol → Route` until PR-C**, which adds the `Producer` node, `DECLARES_PRODUCER`, and flips `ASYNC_CALLS` to `Producer → Route`. Run one full reprocess after upgrading through the SCHEMA-V2 sequence (or when you need the v14 ontology gate). +Ontology **14** introduces `EDGE_SCHEMA` in `java_ontology.py` as the canonical edge navigation schema (see `docs/EDGE-NAVIGATION.md`). **`HTTP_CALLS` is `Client → Route`** (SCHEMA-V2 PR-B). **`ASYNC_CALLS` is `Producer → Route`** with `DECLARES_PRODUCER` (SCHEMA-V2 PR-C). Run one full reprocess after upgrading through the SCHEMA-V2 sequence (or when you need the v14 ontology gate). Ontology **13** materializes stored `OVERRIDES` edges between method Symbols (subtype override → supertype declaration, matching `signature` on a direct `IMPLEMENTS` / `EXTENDS` hop). `neighbors(edge_types=["OVERRIDES"])` traverses this relationship; `OVERRIDDEN_BY*` keys in `edge_summary` remain describe-time rollups only. diff --git a/build_ast_graph.py b/build_ast_graph.py index 9c62967..7d2b50f 100644 --- a/build_ast_graph.py +++ b/build_ast_graph.py @@ -64,7 +64,7 @@ symbol_id, ) from path_filtering import LayeredIgnore, iter_java_source_files -from java_ontology import VALID_CLIENT_KINDS, VALID_HTTP_CALL_MATCHES +from java_ontology import VALID_CLIENT_KINDS, VALID_HTTP_CALL_MATCHES, VALID_PRODUCER_KINDS log = logging.getLogger(__name__) @@ -255,7 +255,7 @@ class HttpCallRow: @dataclass class AsyncCallRow: - symbol_id: str + producer_id: str route_id: str confidence: float strategy: str @@ -292,6 +292,32 @@ class DeclaresClientRow: strategy: str +@dataclass +class ProducerRow: + id: str + producer_kind: str + topic: str + broker: str + direction: str + member_fqn: str + member_id: str + microservice: str + module: str + filename: str + start_line: int + end_line: int + resolved: bool + source_layer: str + + +@dataclass +class DeclaresProducerRow: + symbol_id: str + producer_id: str + confidence: float + strategy: str + + @dataclass class ClientExtractionStats: clients_total: int = 0 @@ -299,6 +325,13 @@ class ClientExtractionStats: clients_by_kind: dict[str, int] = field(default_factory=lambda: defaultdict(int)) +@dataclass +class ProducerExtractionStats: + producers_total: int = 0 + declares_producer_total: int = 0 + producers_by_kind: dict[str, int] = field(default_factory=lambda: defaultdict(int)) + + @dataclass class CallEdgeStats: http_calls_total: int = 0 @@ -336,10 +369,13 @@ class GraphTables: async_call_rows: list[AsyncCallRow] = field(default_factory=list) client_rows: list[ClientRow] = field(default_factory=list) declares_client_rows: list[DeclaresClientRow] = field(default_factory=list) + producer_rows: list[ProducerRow] = field(default_factory=list) + declares_producer_rows: list[DeclaresProducerRow] = field(default_factory=list) overrides_rows: list[DeclaresRow] = field(default_factory=list) route_stats: RouteExtractionStats = field(default_factory=RouteExtractionStats) call_edge_stats: CallEdgeStats = field(default_factory=CallEdgeStats) client_stats: ClientExtractionStats = field(default_factory=ClientExtractionStats) + producer_stats: ProducerExtractionStats = field(default_factory=ProducerExtractionStats) methods_by_type: dict[str, list[MemberEntry]] = field(default_factory=dict) parse_errors: int = 0 skipped_files: int = 0 @@ -1377,6 +1413,17 @@ def _client_id( return f"c:{hashlib.sha1(key.encode()).hexdigest()[:16]}" +def _producer_id( + *, + microservice: str, + member_fqn: str, + producer_kind: str, + topic: str, +) -> str: + key = f"{microservice}|{member_fqn}|{producer_kind}|{topic}" + return f"p:{hashlib.sha1(key.encode()).hexdigest()[:16]}" + + def _client_source_layer(strategy: str) -> str: if strategy in {"layer_a_meta", "layer_b_ann", "layer_b_fqn", "layer_c_source"}: return strategy @@ -1389,6 +1436,16 @@ def _client_source_layer(strategy: str) -> str: return "builtin" +def _producer_source_layer(strategy: str) -> str: + if strategy in {"layer_a_meta", "layer_b_ann", "layer_b_fqn", "layer_c_source"}: + return strategy + if strategy in VALID_PRODUCER_KINDS: + return "builtin" + if strategy != "builtin": + log.warning("unknown producer source strategy %r, falling back to builtin", strategy) + return "builtin" + + _ROUTE_LAYER_RANK: dict[str, int] = { "builtin": 0, "layer_b_ann": 1, @@ -1562,7 +1619,9 @@ def pass5_imperative_edges( http_seen: set[tuple[str, str]] = set() async_seen: set[tuple[str, str]] = set() client_seen: set[str] = set() + producer_seen: set[str] = set() declares_client_seen: set[tuple[str, str]] = set() + declares_producer_seen: set[tuple[str, str]] = set() route_rows = list(tables.routes_rows) def _micro_factor(member: MemberEntry) -> float: @@ -1710,6 +1769,44 @@ def _phantom_async_route_id(call: OutgoingCallDecl) -> str: tables.call_edge_stats.http_calls_by_client_kind[call.client_kind] += 1 tables.call_edge_stats.http_calls_by_strategy[strategy] += 1 elif call.channel == "async": + topic_atom = (call.topic_call or "").strip() + pid = _producer_id( + microservice=member.microservice, + member_fqn=call.method_fqn, + producer_kind=call.client_kind, + topic=topic_atom, + ) + if pid not in producer_seen: + producer_seen.add(pid) + tables.producer_rows.append( + ProducerRow( + id=pid, + producer_kind=call.client_kind, + topic=topic_atom, + broker=call.broker_call, + direction="producer", + member_fqn=call.method_fqn, + member_id=member.node_id, + microservice=member.microservice, + module=member.module, + filename=call.filename, + start_line=call.start_line, + end_line=call.end_line, + resolved=call.resolved, + source_layer=_producer_source_layer(call.resolution_strategy), + ), + ) + dpkey = (member.node_id, pid) + if dpkey not in declares_producer_seen: + declares_producer_seen.add(dpkey) + tables.declares_producer_rows.append( + DeclaresProducerRow( + symbol_id=member.node_id, + producer_id=pid, + confidence=call.confidence_base, + strategy=call.resolution_strategy, + ), + ) rid = _phantom_async_route_id(call) _append_route( RouteRow( @@ -1733,7 +1830,7 @@ def _phantom_async_route_id(call: OutgoingCallDecl) -> str: source_layer="builtin", ) ) - key = (member.node_id, rid) + key = (pid, rid) if key in async_seen: continue async_seen.add(key) @@ -1741,7 +1838,7 @@ def _phantom_async_route_id(call: OutgoingCallDecl) -> str: strategy = call.resolution_strategy tables.async_call_rows.append( AsyncCallRow( - symbol_id=member.node_id, + producer_id=pid, route_id=rid, confidence=conf, strategy=strategy, @@ -1765,6 +1862,16 @@ def _phantom_async_route_id(call: OutgoingCallDecl) -> str: tables.client_stats.clients_by_kind = defaultdict(int) for row in tables.client_rows: tables.client_stats.clients_by_kind[row.client_kind] += 1 + tables.producer_rows = sorted(tables.producer_rows, key=lambda p: p.id) + tables.declares_producer_rows = sorted( + tables.declares_producer_rows, + key=lambda e: (e.symbol_id, e.producer_id), + ) + tables.producer_stats.producers_total = len(tables.producer_rows) + tables.producer_stats.declares_producer_total = len(tables.declares_producer_rows) + tables.producer_stats.producers_by_kind = defaultdict(int) + for row in tables.producer_rows: + tables.producer_stats.producers_by_kind[row.producer_kind] += 1 brownfield_strategies = frozenset( ( "layer_b_ann", @@ -1915,6 +2022,7 @@ def pass6_match_edges( all_routes = [r for r in tables.routes_rows if r.microservice] member_by_id = {m.node_id: m for m in tables.members} clients_by_id = {c.id: c for c in tables.client_rows} + producers_by_id = {p.id: p for p in tables.producer_rows} client_hints_by_member: dict[str, list[ClientRow]] = defaultdict(list) for edge in tables.declares_client_rows: client = clients_by_id.get(edge.client_id) @@ -2030,7 +2138,8 @@ def _micro_factor(member: MemberEntry | None) -> float: for row in tables.async_call_rows: if row.match != "unresolved": continue - member = member_by_id.get(row.symbol_id) + producer = producers_by_id.get(row.producer_id) + member = member_by_id.get(producer.member_id) if producer else None base = row.confidence / max(1e-9, (0.3 * _micro_factor(member))) src_route = route_by_id.get(row.route_id) call = OutgoingCallDecl( @@ -2135,6 +2244,9 @@ def _micro_factor(member: MemberEntry | None) -> float: "clients_total INT64, " "declares_client_total INT64, " "clients_by_kind STRING, " + "producers_total INT64, " + "declares_producer_total INT64, " + "producers_by_kind STRING, " "http_calls_total INT64, " "async_calls_total INT64, " "http_calls_by_strategy STRING, " @@ -2174,6 +2286,15 @@ def _micro_factor(member: MemberEntry | None) -> float: "PRIMARY KEY(id))" ) +_SCHEMA_PRODUCER = ( + "CREATE NODE TABLE Producer(" + "id STRING, producer_kind STRING, topic STRING, broker STRING, direction STRING, " + "member_fqn STRING, member_id STRING, " + "microservice STRING, module STRING, filename STRING, " + "start_line INT64, end_line INT64, resolved BOOLEAN, source_layer STRING, " + "PRIMARY KEY(id))" +) + _SCHEMA_EXTENDS = ( "CREATE REL TABLE EXTENDS(FROM Symbol TO Symbol, " "dst_name STRING, dst_fqn STRING, resolved BOOLEAN)" @@ -2202,13 +2323,17 @@ def _micro_factor(member: MemberEntry | None) -> float: "CREATE REL TABLE DECLARES_CLIENT(FROM Symbol TO Client, " "confidence DOUBLE, strategy STRING)" ) +_SCHEMA_DECLARES_PRODUCER = ( + "CREATE REL TABLE DECLARES_PRODUCER(FROM Symbol TO Producer, " + "confidence DOUBLE, strategy STRING)" +) _SCHEMA_HTTP_CALLS = ( "CREATE REL TABLE HTTP_CALLS(FROM Client TO Route, " "confidence DOUBLE, strategy STRING, " "method_call STRING, raw_uri STRING, match STRING)" ) _SCHEMA_ASYNC_CALLS = ( - "CREATE REL TABLE ASYNC_CALLS(FROM Symbol TO Route, " + "CREATE REL TABLE ASYNC_CALLS(FROM Producer TO Route, " "confidence DOUBLE, strategy STRING, " "direction STRING, raw_topic STRING, match STRING)" ) @@ -2217,6 +2342,7 @@ def _micro_factor(member: MemberEntry | None) -> float: def _drop_all(conn: kuzu.Connection) -> None: for stmt in ( "DROP TABLE IF EXISTS DECLARES_CLIENT", + "DROP TABLE IF EXISTS DECLARES_PRODUCER", "DROP TABLE IF EXISTS HTTP_CALLS", "DROP TABLE IF EXISTS ASYNC_CALLS", "DROP TABLE IF EXISTS EXPOSES", @@ -2229,6 +2355,7 @@ def _drop_all(conn: kuzu.Connection) -> None: "DROP TABLE IF EXISTS Symbol", "DROP TABLE IF EXISTS Route", "DROP TABLE IF EXISTS Client", + "DROP TABLE IF EXISTS Producer", "DROP TABLE IF EXISTS GraphMeta", ): try: @@ -2242,6 +2369,7 @@ def _create_schema(conn: kuzu.Connection) -> None: _SCHEMA_NODE, _SCHEMA_ROUTE, _SCHEMA_CLIENT, + _SCHEMA_PRODUCER, _SCHEMA_META, _SCHEMA_EXTENDS, _SCHEMA_IMPLEMENTS, @@ -2251,6 +2379,7 @@ def _create_schema(conn: kuzu.Connection) -> None: _SCHEMA_CALLS, _SCHEMA_EXPOSES, _SCHEMA_DECLARES_CLIENT, + _SCHEMA_DECLARES_PRODUCER, _SCHEMA_HTTP_CALLS, _SCHEMA_ASYNC_CALLS, ): @@ -2402,14 +2531,27 @@ def _write_nodes( "MATCH (s:Symbol {id: $sid}), (c:Client {id: $cid}) " "CREATE (s)-[:DECLARES_CLIENT {confidence: $confidence, strategy: $strategy}]->(c)" ) +_CREATE_PRODUCER = ( + "CREATE (:Producer {" + "id: $id, producer_kind: $producer_kind, topic: $topic, broker: $broker, " + "direction: $direction, member_fqn: $member_fqn, member_id: $member_id, " + "microservice: $microservice, module: $module, filename: $filename, " + "start_line: $start_line, end_line: $end_line, resolved: $resolved, " + "source_layer: $source_layer" + "})" +) +_CREATE_DECLARES_PRODUCER = ( + "MATCH (s:Symbol {id: $sid}), (p:Producer {id: $pid}) " + "CREATE (s)-[:DECLARES_PRODUCER {confidence: $confidence, strategy: $strategy}]->(p)" +) _CREATE_HTTP_CALL = ( "MATCH (c:Client {id: $cid}), (r:Route {id: $rid}) " "CREATE (c)-[:HTTP_CALLS {confidence: $confidence, strategy: $strategy, " "method_call: $method_call, raw_uri: $raw_uri, match: $match}]->(r)" ) _CREATE_ASYNC_CALL = ( - "MATCH (s:Symbol {id: $sid}), (r:Route {id: $rid}) " - "CREATE (s)-[:ASYNC_CALLS {confidence: $confidence, strategy: $strategy, " + "MATCH (p:Producer {id: $pid}), (r:Route {id: $rid}) " + "CREATE (p)-[:ASYNC_CALLS {confidence: $confidence, strategy: $strategy, " "direction: $direction, raw_topic: $raw_topic, match: $match}]->(r)" ) @@ -2542,6 +2684,15 @@ def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables) -> Non "confidence": row.confidence, "strategy": row.strategy, }) + for row in tables.producer_rows: + conn.execute(_CREATE_PRODUCER, asdict(row)) + for row in tables.declares_producer_rows: + conn.execute(_CREATE_DECLARES_PRODUCER, { + "sid": row.symbol_id, + "pid": row.producer_id, + "confidence": row.confidence, + "strategy": row.strategy, + }) for row in tables.http_call_rows: conn.execute(_CREATE_HTTP_CALL, { "cid": row.client_id, @@ -2554,7 +2705,7 @@ def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables) -> Non }) for row in tables.async_call_rows: conn.execute(_CREATE_ASYNC_CALL, { - "sid": row.symbol_id, + "pid": row.producer_id, "rid": row.route_id, "confidence": row.confidence, "strategy": row.strategy, @@ -2576,6 +2727,7 @@ def _write_meta(conn: kuzu.Connection, tables: GraphTables, source_root: Path) - routes_fw = dict(sorted(st.by_framework.items())) call_stats = tables.call_edge_stats client_stats = tables.client_stats + producer_stats = tables.producer_stats http_by_strategy = dict(sorted(call_stats.http_calls_by_strategy.items())) async_by_strategy = dict(sorted(call_stats.async_calls_by_strategy.items())) http_match = dict(sorted(call_stats.http_calls_match_breakdown.items())) @@ -2606,11 +2758,14 @@ def _write_meta(conn: kuzu.Connection, tables: GraphTables, source_root: Path) - "exposes": len(tables.exposes_rows), "clients": len(tables.client_rows), "declares_client": len(tables.declares_client_rows), + "producers": len(tables.producer_rows), + "declares_producer": len(tables.declares_producer_rows), "http_calls": len(tables.http_call_rows), "async_calls": len(tables.async_call_rows), } routes_layer = dict(sorted(st.routes_by_layer.items())) clients_by_kind = dict(sorted(client_stats.clients_by_kind.items())) + producers_by_kind = dict(sorted(producer_stats.producers_by_kind.items())) conn.execute( "CREATE (:GraphMeta {key: $k, ontology_version: $ov, built_at: $t, " "source_root: $sr, counts_json: $cj, parse_errors: $pe, " @@ -2619,6 +2774,8 @@ def _write_meta(conn: kuzu.Connection, tables: GraphTables, source_root: Path) - "routes_from_brownfield_pct: $routes_from_brownfield_pct, routes_by_layer: $routes_by_layer, " "clients_total: $clients_total, declares_client_total: $declares_client_total, " "clients_by_kind: $clients_by_kind, " + "producers_total: $producers_total, declares_producer_total: $declares_producer_total, " + "producers_by_kind: $producers_by_kind, " "http_calls_total: $http_calls_total, async_calls_total: $async_calls_total, " "http_calls_by_strategy: $http_calls_by_strategy, async_calls_by_strategy: $async_calls_by_strategy, " "http_calls_resolved_pct: $http_calls_resolved_pct, async_calls_resolved_pct: $async_calls_resolved_pct, " @@ -2646,6 +2803,9 @@ def _write_meta(conn: kuzu.Connection, tables: GraphTables, source_root: Path) - "clients_total": int(client_stats.clients_total), "declares_client_total": int(client_stats.declares_client_total), "clients_by_kind": json.dumps(clients_by_kind), + "producers_total": int(producer_stats.producers_total), + "declares_producer_total": int(producer_stats.declares_producer_total), + "producers_by_kind": json.dumps(producers_by_kind), "http_calls_total": call_stats.http_calls_total, "async_calls_total": call_stats.async_calls_total, "http_calls_by_strategy": json.dumps(http_by_strategy), diff --git a/docs/AGENT-GUIDE.md b/docs/AGENT-GUIDE.md index e330b5c..d187cad 100644 --- a/docs/AGENT-GUIDE.md +++ b/docs/AGENT-GUIDE.md @@ -96,7 +96,7 @@ Use these strings **verbatim** in `neighbors(..., edge_types=[...])`: | Method overrides | `OVERRIDES` | Subtype **method** → supertype **declaration** method (same `signature`, one `IMPLEMENTS`/`EXTENDS` hop). `in` = overriders; `out` = overridden declarations | | Method calls | `CALLS` | `in` = callers; `out` = callees | | Service boundary | `EXPOSES` | Symbol → Route (handler exposes route) | -| Cross-service | `HTTP_CALLS`, `ASYNC_CALLS` | `HTTP_CALLS`: Client → Route; `ASYNC_CALLS`: Symbol → Route until SCHEMA-V2 PR-C (`Producer` node) | +| Cross-service | `HTTP_CALLS`, `ASYNC_CALLS` | `HTTP_CALLS`: Client → Route; `ASYNC_CALLS`: Producer → Route via `DECLARES_PRODUCER` | Symmetric: cross-service and intra-service questions use the **same** `neighbors` call with different `edge_types`. @@ -191,6 +191,7 @@ Exact allowed values for roles, capabilities, client kinds, etc. live in `java_o | List interfaces in service S | `find(kind="symbol", filter={"microservice":S,"symbol_kind":"interface"})` | `neighbors` / `describe` | | List HTTP or Kafka entry points | `find(kind="route", filter={...})` | `describe` | | List Feign / HTTP clients | `find(kind="client", filter={...})` | `neighbors(..., out, ["HTTP_CALLS"])` if needed | +| List async producers | `find(kind="producer", filter={...})` | `neighbors(..., out, ["ASYNC_CALLS"])` if needed | | Who calls method M? | Stable symbol id via `resolve`, `find`, or `search` | `neighbors(ids=sym_id, direction="in", edge_types=["CALLS"])` | | What does M call? | Same | `neighbors(..., direction="out", edge_types=["CALLS"])` | | Who hits this route? | `find(kind="route", ...)` or route id from logs | `neighbors(ids=route_id, direction="in", edge_types=["HTTP_CALLS","ASYNC_CALLS","EXPOSES"])` | @@ -263,7 +264,7 @@ Virtual keys (`OVERRIDDEN_BY`, …) and composed dot-keys are **not** valid `Edg ### Ontology glossary (version 14) -Source of truth: `java_ontology.py` (`EDGE_SCHEMA`, valid sets). Strings are case-sensitive. Edge navigation: [`docs/EDGE-NAVIGATION.md`](./EDGE-NAVIGATION.md) — for `HTTP_CALLS`, traverse via `DECLARES_CLIENT` from a method Symbol or `neighbors` outbound from a Client id; `ASYNC_CALLS` still uses `*_current` member traversals until SCHEMA-V2 PR-C. +Source of truth: `java_ontology.py` (`EDGE_SCHEMA`, valid sets). Strings are case-sensitive. Edge navigation: [`docs/EDGE-NAVIGATION.md`](./EDGE-NAVIGATION.md) — for `HTTP_CALLS`, traverse via `DECLARES_CLIENT` from a method Symbol or `neighbors` outbound from a Client id; for `ASYNC_CALLS`, traverse via `DECLARES_PRODUCER` or outbound from a Producer id. **Roles:** `CONTROLLER`, `SERVICE`, `REPOSITORY`, `COMPONENT`, `CONFIG`, `ENTITY`, `CLIENT`, `MAPPER`, `DTO`, `OTHER`. diff --git a/docs/EDGE-NAVIGATION.md b/docs/EDGE-NAVIGATION.md index e110d45..5580f8f 100644 --- a/docs/EDGE-NAVIGATION.md +++ b/docs/EDGE-NAVIGATION.md @@ -15,8 +15,9 @@ | CALLS | Symbol | Symbol | many_to_many | yes | yes | | EXPOSES | Symbol | Route | one_to_one | yes | yes | | DECLARES_CLIENT | Symbol | Client | one_to_many | yes | yes | +| DECLARES_PRODUCER | Symbol | Producer | one_to_many | yes | yes | | HTTP_CALLS | Client | Route | many_to_many | yes | no | -| ASYNC_CALLS | Symbol | Route | many_to_many | yes | no | +| ASYNC_CALLS | Producer | Route | many_to_many | yes | no | ## EXTENDS @@ -183,6 +184,26 @@ - `member_subject`: neighbors(['{id}'],'out',['DECLARES_CLIENT']) - `alien_subject`: DECLARES_CLIENT connects method Symbol → Client +## DECLARES_PRODUCER + +**Endpoints**: `Symbol → Producer` +**Cardinality**: `one_to_many` +**Brownfield-resolver-sourced**: yes +**Member-only** (hints): yes + +**Purpose**: method declares an outbound async producer call site + +**Attributes**: + +- `confidence` (`DOUBLE`) — producer declaration confidence in [0.0, 1.0] +- `strategy` (`STRING`) — producer resolution strategy literal + +**Typical traversals**: + +- `type_subject`: neighbors(['{id}'],'out',['DECLARES']) then neighbors(member_ids,'{direction}',['DECLARES_PRODUCER']) +- `member_subject`: neighbors(['{id}'],'out',['DECLARES_PRODUCER']) +- `alien_subject`: DECLARES_PRODUCER connects method Symbol → Producer + ## HTTP_CALLS **Endpoints**: `Client → Route` @@ -209,12 +230,12 @@ ## ASYNC_CALLS -**Endpoints**: `Symbol → Route` +**Endpoints**: `Producer → Route` **Cardinality**: `many_to_many` **Brownfield-resolver-sourced**: yes **Member-only** (hints): no -**Purpose**: resolved async call from declaring method to topic route (pre-flip: Symbol→Route; PR-C: Producer→Route) +**Purpose**: resolved async call from a declared Producer to a topic route **Attributes**: @@ -226,8 +247,7 @@ **Typical traversals**: -- `type_subject_current`: neighbors(['{id}'],'out',['DECLARES']) then neighbors(member_ids,'out',['ASYNC_CALLS']) - `type_subject`: neighbors(['{id}'],'out',['DECLARES']) then neighbors(member_ids,'out',['DECLARES_PRODUCER']) then neighbors(producer_ids,'out',['ASYNC_CALLS']) -- `member_subject_current`: neighbors(['{id}'],'out',['ASYNC_CALLS']) - `member_subject`: neighbors(['{id}'],'out',['DECLARES_PRODUCER']) then neighbors(producer_ids,'out',['ASYNC_CALLS']) -- `alien_subject`: ASYNC_CALLS is Symbol→Route until PR-C; use member_subject_current. After PR-C (Producer→Route), use member_subject via DECLARES_PRODUCER +- `route_subject`: neighbors(['{id}'],'in',['ASYNC_CALLS']) then neighbors(producer_ids,'in',['DECLARES_PRODUCER']) for declaring method +- `alien_subject`: ASYNC_CALLS connects Producer→Route; use DECLARES_PRODUCER from a method Symbol, or neighbors(producer_id,'out',['ASYNC_CALLS']) from a Producer id diff --git a/docs/skills/java-codebase-explore.md b/docs/skills/java-codebase-explore.md index e27ac49..2bab500 100644 --- a/docs/skills/java-codebase-explore.md +++ b/docs/skills/java-codebase-explore.md @@ -98,8 +98,8 @@ You cannot reason reliably about cross-service behaviour until these surfaces ex **Sequence:** 1. Cluster routes by path prefix; **`describe`** on representative `route:` ids. -2. For each major route, **`neighbors(direction="in", edge_types=["EXPOSES"])`** to land on handler symbols; for inbound **`HTTP_CALLS`**, expect **Client** callers (then **`DECLARES_CLIENT` inbound** to the declaring method); **`ASYNC_CALLS`** inbound still lands on Symbol callers until PR-C. -3. Use **`find(kind="client", …)`** with the same microservice filter to list outbound integration points; follow outbound **`HTTP_CALLS`** from each Client (or **`ASYNC_CALLS`** from methods until Producer lands). +2. For each major route, **`neighbors(direction="in", edge_types=["EXPOSES"])`** to land on handler symbols; for inbound **`HTTP_CALLS`**, expect **Client** callers (then **`DECLARES_CLIENT` inbound** to the declaring method); for inbound **`ASYNC_CALLS`**, expect **Producer** callers (then **`DECLARES_PRODUCER` inbound** to the declaring method). +3. Use **`find(kind="client", …)`** and **`find(kind="producer", …)`** with the same microservice filter to list outbound integration points; follow outbound **`HTTP_CALLS`** from each Client and **`ASYNC_CALLS`** from each Producer. **Stopping rule:** You can summarize how traffic enters the service, what modules/controllers own key paths, and what external systems it calls—**without** claiming tests, runtime config, or unindexed siblings exist in MCP. @@ -116,7 +116,7 @@ You cannot reason reliably about cross-service behaviour until these surfaces ex **Sequence:** 1. **`neighbors(direction="in", edge_types=["EXPOSES"])`** onto the handling symbol; walk **`CALLS`** outbound method-by-method. -2. When a method makes outbound HTTP, **`neighbors(..., out, ["DECLARES_CLIENT"])`** then outbound **`HTTP_CALLS`** from each Client id; for async (pre-PR-C), **`ASYNC_CALLS`** may still be direct from the method Symbol. +2. When a method makes outbound HTTP, **`neighbors(..., out, ["DECLARES_CLIENT"])`** then outbound **`HTTP_CALLS`** from each Client id; for async, **`neighbors(..., out, ["DECLARES_PRODUCER"])`** then outbound **`ASYNC_CALLS`** from each Producer id. 3. Stop at leaves, framework boundaries, or unresolved edges; read **`edge.attrs`** (`attrs.confidence`, `attrs.strategy`, `attrs.match`) and report low-confidence segments as resolver gaps, not as facts. **Stopping rule:** You reach a stable leaf (external IO, message publish, clear terminal layer) **or** you document every unresolved hop with a concrete next non-MCP check. @@ -239,11 +239,11 @@ Ten edge types: | Group | Edges | | ----- | ----- | | Type wiring | `EXTENDS`, `IMPLEMENTS`, `INJECTS` | -| Containment | `DECLARES`, `DECLARES_CLIENT` | +| Containment | `DECLARES`, `DECLARES_CLIENT`, `DECLARES_PRODUCER` | | Method overrides | `OVERRIDES` | | Method calls | `CALLS` | | Service boundary | `EXPOSES` | -| Cross-service | `HTTP_CALLS` (Client→Route), `ASYNC_CALLS` (Symbol→Route until Producer) | +| Cross-service | `HTTP_CALLS` (Client→Route), `ASYNC_CALLS` (Producer→Route) | For exact argument shapes, recovery playbook, and slash aliases see [`docs/AGENT-GUIDE.md`](https://github.com/HumanBean17/java-codebase-rag/blob/master/docs/AGENT-GUIDE.md) in the java-codebase-rag repo. diff --git a/java_ontology.py b/java_ontology.py index 41737a9..10d61f6 100644 --- a/java_ontology.py +++ b/java_ontology.py @@ -301,6 +301,26 @@ class EdgeSpec: "alien_subject": "DECLARES_CLIENT connects method Symbol → Client", }, ), + "DECLARES_PRODUCER": EdgeSpec( + name="DECLARES_PRODUCER", + src="Symbol", + dst="Producer", + cardinality="one_to_many", + brownfield_resolver_sourced=True, + attrs=( + EdgeAttr("confidence", "DOUBLE", "producer declaration confidence in [0.0, 1.0]"), + EdgeAttr("strategy", "STRING", "producer resolution strategy literal"), + ), + purpose="method declares an outbound async producer call site", + member_only=True, + typical_traversals={ + "type_subject": _SYMBOL_TYPE_TRAVERSAL.format( + id="{id}", direction="{direction}", edge="DECLARES_PRODUCER", + ), + "member_subject": "neighbors(['{id}'],'out',['DECLARES_PRODUCER'])", + "alien_subject": "DECLARES_PRODUCER connects method Symbol → Producer", + }, + ), "HTTP_CALLS": EdgeSpec( name="HTTP_CALLS", src="Client", @@ -337,7 +357,7 @@ class EdgeSpec: ), "ASYNC_CALLS": EdgeSpec( name="ASYNC_CALLS", - src="Symbol", + src="Producer", dst="Route", cardinality="many_to_many", brownfield_resolver_sourced=True, @@ -348,25 +368,24 @@ class EdgeSpec: EdgeAttr("raw_topic", "STRING", "uninterpolated topic template from the call site"), EdgeAttr("match", "STRING", "cross_service|intra_service|ambiguous|phantom|unresolved"), ), - purpose="resolved async call from declaring method to topic route (pre-flip: Symbol→Route; PR-C: Producer→Route)", + purpose="resolved async call from a declared Producer to a topic route", typical_traversals={ - "type_subject_current": ( - "neighbors(['{id}'],'out',['DECLARES']) " - "then neighbors(member_ids,'out',['ASYNC_CALLS'])" - ), "type_subject": ( "neighbors(['{id}'],'out',['DECLARES']) " "then neighbors(member_ids,'out',['DECLARES_PRODUCER']) " "then neighbors(producer_ids,'out',['ASYNC_CALLS'])" ), - "member_subject_current": "neighbors(['{id}'],'out',['ASYNC_CALLS'])", "member_subject": ( "neighbors(['{id}'],'out',['DECLARES_PRODUCER']) " "then neighbors(producer_ids,'out',['ASYNC_CALLS'])" ), + "route_subject": ( + "neighbors(['{id}'],'in',['ASYNC_CALLS']) " + "then neighbors(producer_ids,'in',['DECLARES_PRODUCER']) for declaring method" + ), "alien_subject": ( - "ASYNC_CALLS is Symbol→Route until PR-C; use member_subject_current. " - "After PR-C (Producer→Route), use member_subject via DECLARES_PRODUCER" + "ASYNC_CALLS connects Producer→Route; use DECLARES_PRODUCER from a method Symbol, " + "or neighbors(producer_id,'out',['ASYNC_CALLS']) from a Producer id" ), }, ), diff --git a/kuzu_queries.py b/kuzu_queries.py index 41c0c33..1b991d5 100644 --- a/kuzu_queries.py +++ b/kuzu_queries.py @@ -143,6 +143,8 @@ class RouteCaller: match: str target_service: str = "" raw_uri: str = "" + topic: str = "" + broker: str = "" def _symbol_return_for(alias: str) -> str: @@ -208,6 +210,7 @@ def _scope_filters( "CALLS", "EXPOSES", "DECLARES_CLIENT", + "DECLARES_PRODUCER", "HTTP_CALLS", "ASYNC_CALLS", ) @@ -628,6 +631,7 @@ def member_edge_rollup_for(self, type_id: str) -> dict[str, dict[str, int]]: rollup: dict[str, dict[str, int]] = {} for key, rel in ( ("DECLARES.DECLARES_CLIENT", "DECLARES_CLIENT"), + ("DECLARES.DECLARES_PRODUCER", "DECLARES_PRODUCER"), ("DECLARES.EXPOSES", "EXPOSES"), ): rows = self._rows( @@ -687,6 +691,9 @@ def override_axis_rollup_for(self, method_id: str) -> dict[str, dict[str, int]]: n_dc = self._edge_row_count_from_method_ids(distinct_impl, "DECLARES_CLIENT") if n_dc > 0: rollup["OVERRIDDEN_BY.DECLARES_CLIENT"] = {"in": 0, "out": n_dc} + n_dp = self._edge_row_count_from_method_ids(distinct_impl, "DECLARES_PRODUCER") + if n_dp > 0: + rollup["OVERRIDDEN_BY.DECLARES_PRODUCER"] = {"in": 0, "out": n_dp} n_ex = self._edge_row_count_from_method_ids(distinct_impl, "EXPOSES") if n_ex > 0: rollup["OVERRIDDEN_BY.EXPOSES"] = {"in": 0, "out": n_ex} @@ -1355,13 +1362,14 @@ def _ingest_flow_row( if len(stage_results) < stage_limit: remaining = stage_limit - len(stage_results) qrf_async = ( - "MATCH (root:Symbol)-[:DECLARES]->(m1:Symbol)-[e:ASYNC_CALLS]->(rt:Route)" - "<-[:EXPOSES]-(handler:Symbol)<-[:DECLARES]-(n:Symbol) " + "MATCH (root:Symbol)-[:DECLARES]->(m1:Symbol)-[:DECLARES_PRODUCER]->(pr:Producer)" + "-[e:ASYNC_CALLS]->(rt:Route)<-[:EXPOSES]-(handler:Symbol)<-[:DECLARES]-(n:Symbol) " "WHERE root.fqn IN $fqns AND n.role IN $roles " "AND n.resolved AND n.kind IN ['class','interface','enum','record','annotation'] " "AND e.confidence >= $mc AND root.microservice <> n.microservice " f"{scrf} " - f"RETURN {_symbol_return_for('n')}, 'ASYNC_CALLS' AS edge_type, root.fqn AS from_fqn " + f"RETURN {_symbol_return_for('n')}, 'ASYNC_CALLS' AS edge_type, " + f"root.fqn AS from_fqn, pr.id AS caller_producer_id " f"LIMIT {max(1, remaining * 4)}" ) for row in self._rows(qrf_async, params_rf): @@ -1491,7 +1499,7 @@ def find_route_callers( path_template: str = "", method: str = "", ) -> list[RouteCaller]: - """HTTP callers via Client (two-hop). Async callers omitted until PR-C (Producer).""" + """HTTP callers via Client; async callers via Producer (two-hop each).""" rid = route_id or "" if not rid: params: dict[str, Any] = { @@ -1518,6 +1526,14 @@ def find_route_callers( "ORDER BY e.confidence DESC, c.id", {"rid": rid}, ) + async_rows = self._rows( + "MATCH (s:Symbol)-[:DECLARES_PRODUCER]->(p:Producer)-[e:ASYNC_CALLS]->(r:Route {id: $rid}) " + "RETURN p.id AS caller_node_id, p.microservice AS caller_microservice, " + "s.id AS declaring_symbol_id, e.confidence AS confidence, e.match AS match, " + "p.topic AS topic, p.broker AS broker " + "ORDER BY e.confidence DESC, p.id", + {"rid": rid}, + ) out: list[RouteCaller] = [] for row in http_rows: out.append( @@ -1532,10 +1548,23 @@ def find_route_callers( raw_uri=str(row.get("raw_uri") or ""), ), ) + for row in async_rows: + out.append( + RouteCaller( + caller_node_id=str(row.get("caller_node_id") or ""), + caller_node_kind="producer", + caller_microservice=str(row.get("caller_microservice") or ""), + declaring_symbol_id=str(row.get("declaring_symbol_id") or ""), + confidence=float(row.get("confidence") or 0.0), + match=str(row.get("match") or ""), + topic=str(row.get("topic") or ""), + broker=str(row.get("broker") or ""), + ), + ) return out def trace_request_flow(self, entry_route_id: str, max_hops: int = 5) -> dict[str, Any]: - """Inbound HTTP via Client two-hop. Async inbound omitted until PR-C (Producer).""" + """Inbound HTTP via Client; async inbound via Producer (two-hop each).""" hops = max(1, min(int(max_hops), 8)) inbound_http = self._rows( f"MATCH (entry:Route {{id: $rid}})<-[e:HTTP_CALLS]-(caller:Client)" @@ -1548,7 +1577,18 @@ def trace_request_flow(self, entry_route_id: str, max_hops: int = 5) -> dict[str "ORDER BY confidence DESC, caller_node_id", {"rid": entry_route_id}, ) - inbound = inbound_http + inbound_async = self._rows( + f"MATCH (entry:Route {{id: $rid}})<-[e:ASYNC_CALLS]-(caller:Producer)" + "<-[:DECLARES_PRODUCER]-(decl:Symbol) " + f"OPTIONAL MATCH (origin:Symbol)-[:CALLS*0..{hops}]->(decl) " + "RETURN DISTINCT caller.id AS caller_node_id, 'producer' AS caller_node_kind, " + "decl.id AS declaring_symbol_id, decl.fqn AS declaring_symbol_fqn, " + "caller.microservice AS microservice, e.confidence AS confidence, " + "e.match AS match, origin.id AS origin_symbol_id, origin.fqn AS origin_fqn " + "ORDER BY confidence DESC, caller_node_id", + {"rid": entry_route_id}, + ) + inbound = inbound_http + inbound_async outbound = self._rows( f"MATCH (handler:Symbol)-[:EXPOSES]->(entry:Route {{id: $rid}}) " f"OPTIONAL MATCH (handler)-[:CALLS*0..{hops}]->(next:Symbol) " @@ -1632,6 +1672,60 @@ def list_clients( ) return [self._row_to_client_dict(r) for r in self._rows(q, params)] + _PRODUCER_RETURN = ( + "p.id AS id, p.producer_kind AS producer_kind, p.topic AS topic, p.broker AS broker, " + "p.direction AS direction, p.member_fqn AS member_fqn, p.member_id AS member_id, " + "p.microservice AS microservice, p.module AS module, p.filename AS filename, " + "p.start_line AS start_line, p.end_line AS end_line, p.resolved AS resolved, " + "p.source_layer AS source_layer" + ) + + @staticmethod + def _row_to_producer_dict(row: dict[str, Any]) -> dict[str, Any]: + return { + "id": str(row.get("id") or ""), + "producer_kind": str(row.get("producer_kind") or ""), + "topic": str(row.get("topic") or ""), + "broker": str(row.get("broker") or ""), + "direction": str(row.get("direction") or ""), + "member_fqn": str(row.get("member_fqn") or ""), + "member_id": str(row.get("member_id") or ""), + "microservice": str(row.get("microservice") or ""), + "module": str(row.get("module") or ""), + "filename": str(row.get("filename") or ""), + "start_line": int(row.get("start_line") or 0), + "end_line": int(row.get("end_line") or 0), + "resolved": bool(row.get("resolved", True)), + "source_layer": str(row.get("source_layer") or "builtin"), + } + + def list_producers( + self, + *, + microservice: str | None = None, + producer_kind: str | None = None, + topic_prefix: str | None = None, + limit: int = 100, + ) -> list[dict[str, Any]]: + lim = max(1, min(int(limit), 500)) + params: dict[str, Any] = {"lim": lim} + preds: list[str] = [] + if microservice: + params["microservice"] = microservice + preds.append("p.microservice = $microservice") + if producer_kind: + params["producer_kind"] = producer_kind + preds.append("p.producer_kind = $producer_kind") + if topic_prefix: + params["topic_prefix"] = topic_prefix + preds.append("p.topic STARTS WITH $topic_prefix") + where = (" WHERE " + " AND ".join(preds)) if preds else "" + q = ( + f"MATCH (p:Producer){where} RETURN {self._PRODUCER_RETURN} " + f"ORDER BY p.microservice, p.producer_kind, p.topic, p.id LIMIT $lim" + ) + return [self._row_to_producer_dict(r) for r in self._rows(q, params)] + # ---- used by search_lancedb.graph_expand ---- def expand_fqns(self, fqns: list[str], *, depth: int = 1, diff --git a/mcp_v2.py b/mcp_v2.py index e972352..6151ac8 100644 --- a/mcp_v2.py +++ b/mcp_v2.py @@ -46,6 +46,7 @@ "OVERRIDES", "DECLARES", "DECLARES_CLIENT", + "DECLARES_PRODUCER", "CALLS", "EXPOSES", "HTTP_CALLS", @@ -114,11 +115,13 @@ class NodeFilter(BaseModel): client_kind: str | None = None target_service: str | None = None target_path_prefix: str | None = None + producer_kind: str | None = None + topic_prefix: str | None = None _NODEFILTER_FIELD_ORDER: tuple[str, ...] = tuple(NodeFilter.model_fields.keys()) -_NODEFILTER_APPLICABLE_FIELDS: dict[Literal["symbol", "route", "client"], tuple[str, ...]] = { +_NODEFILTER_APPLICABLE_FIELDS: dict[Literal["symbol", "route", "client", "producer"], tuple[str, ...]] = { "symbol": ( "microservice", "module", @@ -146,6 +149,13 @@ class NodeFilter(BaseModel): "target_path_prefix", "http_method", ), + "producer": ( + "microservice", + "module", + "source_layer", + "producer_kind", + "topic_prefix", + ), } @@ -165,13 +175,17 @@ def _populated_nodefilter_fields(nf: NodeFilter) -> set[str]: return populated -def _nodefilter_inapplicable_fields(kind: Literal["symbol", "route", "client"], nf: NodeFilter) -> list[str]: +def _nodefilter_inapplicable_fields( + kind: Literal["symbol", "route", "client", "producer"], nf: NodeFilter, +) -> list[str]: populated = _populated_nodefilter_fields(nf) applicable = set(_NODEFILTER_APPLICABLE_FIELDS[kind]) return _ordered_nodefilter_fields(populated - applicable) -def _nodefilter_applicability_error(kind: Literal["symbol", "route", "client"], nf: NodeFilter) -> str | None: +def _nodefilter_applicability_error( + kind: Literal["symbol", "route", "client", "producer"], nf: NodeFilter, +) -> str | None: inapplicable = _nodefilter_inapplicable_fields(kind, nf) if not inapplicable: return None @@ -245,7 +259,7 @@ class SearchHit(BaseModel): class NodeRef(BaseModel): id: str - kind: Literal["symbol", "route", "client"] + kind: Literal["symbol", "route", "client", "producer"] fqn: str symbol_kind: str | None = None microservice: str | None = None @@ -255,7 +269,7 @@ class NodeRef(BaseModel): class NodeRecord(BaseModel): id: str - kind: Literal["symbol", "route", "client"] + kind: Literal["symbol", "route", "client", "producer"] fqn: str data: dict[str, Any] = Field(default_factory=dict) edge_summary: dict[str, dict[str, int]] | None = Field( @@ -263,11 +277,12 @@ class NodeRecord(BaseModel): description=( "Per graph edge label, in/out incident counts. For type Symbols (class, interface, " "enum, record, annotation), may also include composed dot-keys " - "`DECLARES.DECLARES_CLIENT` and `DECLARES.EXPOSES`: 2-hop summaries " + "`DECLARES.DECLARES_CLIENT`, `DECLARES.DECLARES_PRODUCER`, and `DECLARES.EXPOSES`: 2-hop summaries " "(DECLARES to member, then that edge) — edge-row counts, not EdgeType literals; " "do not pass them to neighbors(edge_types=…). For method Symbols, may include " "override-axis virtual keys `OVERRIDDEN_BY`, `OVERRIDDEN_BY.DECLARES_CLIENT`, " - "`OVERRIDDEN_BY.EXPOSES`, plus an `OVERRIDES` map entry that **merges** stored " + "`OVERRIDDEN_BY.DECLARES_PRODUCER`, `OVERRIDDEN_BY.EXPOSES`, plus an `OVERRIDES` map entry " + "that **merges** stored " "`[:OVERRIDES]` in/out counts with the describe-time dispatch-up rollup (per " "direction `max`, so inbound stored overrides are not dropped). Those virtual / " "dot-keys are not valid neighbors(edge_types=…) arguments. The stored relationship " @@ -369,6 +384,14 @@ class NeighborsOutput(BaseModel): "c.source_layer AS source_layer" ) +_PRODUCER_RESOLVE_RETURN = ( + "p.id AS id, p.producer_kind AS producer_kind, p.topic AS topic, p.broker AS broker, " + "p.direction AS direction, p.member_fqn AS member_fqn, p.member_id AS member_id, " + "p.microservice AS microservice, p.module AS module, p.filename AS filename, " + "p.start_line AS start_line, p.end_line AS end_line, p.resolved AS resolved, " + "p.source_layer AS source_layer" +) + _RESOLVE_PRE_DEDUP_LIMIT = 50 @@ -392,17 +415,19 @@ class ResolveOutput(BaseModel): hints: list[str] = Field(default_factory=list, description=MCP_HINTS_FIELD_DESCRIPTION) -def _node_kind_from_id(id_str: str) -> Literal["symbol", "route", "client"]: +def _node_kind_from_id(id_str: str) -> Literal["symbol", "route", "client", "producer"]: if id_str.startswith("sym:"): return "symbol" if id_str.startswith("route:") or id_str.startswith("r:"): return "route" if id_str.startswith("client:") or id_str.startswith("c:"): return "client" + if id_str.startswith("producer:") or id_str.startswith("p:"): + return "producer" raise ValueError(f"Unknown id prefix for `{id_str}`") -def _resolve_node_kind(graph: KuzuGraph, node_id: str) -> Literal["symbol", "route", "client"]: +def _resolve_node_kind(graph: KuzuGraph, node_id: str) -> Literal["symbol", "route", "client", "producer"]: try: return _node_kind_from_id(node_id) except ValueError: @@ -413,6 +438,8 @@ def _resolve_node_kind(graph: KuzuGraph, node_id: str) -> Literal["symbol", "rou return "route" if graph._rows("MATCH (n:Client) WHERE n.id = $id RETURN n.id AS id LIMIT 1", {"id": node_id}): # noqa: SLF001 return "client" + if graph._rows("MATCH (n:Producer) WHERE n.id = $id RETURN n.id AS id LIMIT 1", {"id": node_id}): # noqa: SLF001 + return "producer" raise ValueError(f"Unknown id prefix for `{node_id}`") @@ -492,7 +519,7 @@ def _symbol_where_from_filter(f: NodeFilter) -> tuple[str, dict[str, Any]]: return where, params -def _node_ref_from_row(kind: Literal["symbol", "route", "client"], row: dict[str, Any]) -> NodeRef: +def _node_ref_from_row(kind: Literal["symbol", "route", "client", "producer"], row: dict[str, Any]) -> NodeRef: symbol_kind: str | None = None if kind == "symbol": fqn = str(row.get("fqn") or "") @@ -504,12 +531,17 @@ def _node_ref_from_row(kind: Literal["symbol", "route", "client"], row: dict[str path = str(row.get("path_template") or row.get("path") or "") fqn = f"{method} {path}".strip() role = None - else: + elif kind == "client": method = str(row.get("method") or "") target = str(row.get("target_service") or "") path = str(row.get("path_template") or row.get("path") or "") fqn = f"{target} {method} {path}".strip() role = None + else: + topic = str(row.get("topic") or "") + broker = str(row.get("broker") or "") + fqn = f"{topic} {broker}".strip() + role = None return NodeRef( id=str(row.get("id") or ""), kind=kind, @@ -521,7 +553,9 @@ def _node_ref_from_row(kind: Literal["symbol", "route", "client"], row: dict[str ) -def _load_node_record(graph: KuzuGraph, node_id: str, kind: Literal["symbol", "route", "client"]) -> dict[str, Any] | None: +def _load_node_record( + graph: KuzuGraph, node_id: str, kind: Literal["symbol", "route", "client", "producer"], +) -> dict[str, Any] | None: if kind == "symbol": projection = ( "n.id AS id, n.kind AS kind, n.name AS name, n.fqn AS fqn, n.package AS package, " @@ -541,7 +575,7 @@ def _load_node_record(graph: KuzuGraph, node_id: str, kind: Literal["symbol", "r "n.start_line AS start_line, n.end_line AS end_line, n.resolved AS resolved" ) label = "Route" - else: + elif kind == "client": projection = ( "n.id AS id, n.client_kind AS client_kind, n.target_service AS target_service, " "n.method AS method, n.path AS path, n.path_template AS path_template, " @@ -551,6 +585,15 @@ def _load_node_record(graph: KuzuGraph, node_id: str, kind: Literal["symbol", "r "n.source_layer AS source_layer" ) label = "Client" + else: + projection = ( + "n.id AS id, n.producer_kind AS producer_kind, n.topic AS topic, n.broker AS broker, " + "n.direction AS direction, n.member_fqn AS member_fqn, n.member_id AS member_id, " + "n.microservice AS microservice, n.module AS module, n.filename AS filename, " + "n.start_line AS start_line, n.end_line AS end_line, n.resolved AS resolved, " + "n.source_layer AS source_layer" + ) + label = "Producer" rows = graph._rows(f"MATCH (n:{label}) WHERE n.id = $id RETURN {projection}", {"id": node_id}) # noqa: SLF001 if not rows: return None @@ -598,14 +641,16 @@ def _edge_summary_for_node( return summary -def _node_matches_filter(kind: Literal["symbol", "route", "client"], row: dict[str, Any], f: NodeFilter | None) -> bool: +def _node_matches_filter( + kind: Literal["symbol", "route", "client", "producer"], row: dict[str, Any], f: NodeFilter | None, +) -> bool: if f is None: return True if f.microservice and str(row.get("microservice") or "") != f.microservice: return False if f.module and str(row.get("module") or "") != f.module: return False - if kind == "client" and f.source_layer and str(row.get("source_layer") or "") != f.source_layer: + if kind in ("client", "producer") and f.source_layer and str(row.get("source_layer") or "") != f.source_layer: return False if kind == "symbol": role = str(row.get("role") or "") @@ -634,7 +679,7 @@ def _node_matches_filter(kind: Literal["symbol", "route", "client"], row: dict[s return False if f.framework and str(row.get("framework") or "") != f.framework: return False - else: + elif kind == "client": if f.client_kind and str(row.get("client_kind") or "") != f.client_kind: return False if f.target_service and str(row.get("target_service") or "") != f.target_service: @@ -645,6 +690,13 @@ def _node_matches_filter(kind: Literal["symbol", "route", "client"], row: dict[s return False if f.http_method and str(row.get("method") or "") != f.http_method: return False + else: + if f.producer_kind and str(row.get("producer_kind") or "") != f.producer_kind: + return False + if f.topic_prefix: + topic = str(row.get("topic") or "") + if not topic.startswith(f.topic_prefix): + return False return True @@ -730,7 +782,7 @@ def search_v2( def find_v2( - kind: Literal["symbol", "route", "client"], + kind: Literal["symbol", "route", "client", "producer"], filter: NodeFilter | dict[str, Any] | str, limit: int = 25, offset: int = 0, @@ -776,7 +828,7 @@ def find_v2( limit=max(500, fetch_cap), ) rows = [r for r in rows if _node_matches_filter("route", r, nf)] - else: + elif kind == "client": rows = g.list_clients( microservice=nf.microservice, client_kind=nf.client_kind, @@ -786,6 +838,14 @@ def find_v2( limit=max(500, fetch_cap), ) rows = [r for r in rows if _node_matches_filter("client", r, nf)] + else: + rows = g.list_producers( + microservice=nf.microservice, + producer_kind=nf.producer_kind, + topic_prefix=nf.topic_prefix, + limit=max(500, fetch_cap), + ) + rows = [r for r in rows if _node_matches_filter("producer", r, nf)] has_more_results = len(rows) > int(offset) + int(limit) rows = rows[offset : offset + limit] refs = [_node_ref_from_row(kind, r) for r in rows] @@ -868,10 +928,10 @@ def _resolve_validate_identifier(raw: str) -> tuple[str | None, str | None]: def _resolve_kinds_to_search( - hint_kind: Literal["symbol", "route", "client"] | None, -) -> list[Literal["symbol", "route", "client"]]: + hint_kind: Literal["symbol", "route", "client", "producer"] | None, +) -> list[Literal["symbol", "route", "client", "producer"]]: if hint_kind is None: - return ["symbol", "route", "client"] + return ["symbol", "route", "client", "producer"] return [hint_kind] @@ -1028,6 +1088,38 @@ def _resolve_client_candidates( return out +def _resolve_producer_candidates( + g: KuzuGraph, + identifier: str, +) -> list[tuple[NodeRef, ResolveReason, int]]: + out: list[tuple[NodeRef, ResolveReason, int]] = [] + lim = _RESOLVE_PRE_DEDUP_LIMIT + + rows = g._rows( # noqa: SLF001 + f"MATCH (p:Producer) WHERE p.id = $id RETURN {_PRODUCER_RESOLVE_RETURN} LIMIT $lim", + {"id": identifier, "lim": lim}, + ) + for row in rows: + out.append((_node_ref_from_row("producer", row), "exact_id", len(identifier))) + + rows = g._rows( # noqa: SLF001 + f"MATCH (p:Producer) WHERE p.topic = $topic RETURN {_PRODUCER_RESOLVE_RETURN} LIMIT $lim", + {"topic": identifier, "lim": lim}, + ) + for row in rows: + out.append((_node_ref_from_row("producer", row), "client_target", len(identifier))) + + if not identifier.startswith("/"): + rows = g._rows( # noqa: SLF001 + f"MATCH (p:Producer) WHERE p.topic STARTS WITH $topic RETURN {_PRODUCER_RESOLVE_RETURN} LIMIT $lim", + {"topic": identifier, "lim": lim}, + ) + for row in rows: + out.append((_node_ref_from_row("producer", row), "client_target_path", len(identifier))) + + return out + + def _resolve_dedupe_candidates( raw: list[tuple[NodeRef, ResolveReason, int]], ) -> list[tuple[NodeRef, ResolveReason, int]]: @@ -1107,7 +1199,7 @@ def _resolve_seeds_for_hints(identifier: str) -> tuple[str | None, str | None]: def _resolve_finalize_success( trimmed: str, - hint_kind: Literal["symbol", "route", "client"] | None, + hint_kind: Literal["symbol", "route", "client", "producer"] | None, matches: list[ResolveCandidate], ) -> ResolveOutput: if not matches: @@ -1150,7 +1242,7 @@ def _resolve_finalize_success( def resolve_v2( identifier: str, - hint_kind: Literal["symbol", "route", "client"] | None = None, + hint_kind: Literal["symbol", "route", "client", "producer"] | None = None, graph: KuzuGraph | None = None, ) -> ResolveOutput: try: @@ -1177,8 +1269,10 @@ def resolve_v2( raw.extend(_resolve_symbol_candidates(g, trimmed)) elif kind == "route": raw.extend(_resolve_route_candidates(g, trimmed)) - else: + elif kind == "client": raw.extend(_resolve_client_candidates(g, trimmed)) + else: + raw.extend(_resolve_producer_candidates(g, trimmed)) deduped = _resolve_dedupe_candidates(raw) ranked = _resolve_rank_candidates(deduped) diff --git a/pr_analysis.py b/pr_analysis.py index 72ffd05..8c0e26a 100644 --- a/pr_analysis.py +++ b/pr_analysis.py @@ -439,9 +439,9 @@ def compute_risk(graph: Any, changed: list[ChangedSymbol]) -> PrRiskReport: {"rid": rid}, ) callers += graph._rows( - "MATCH (s:Symbol)-[e:ASYNC_CALLS]->(r:Route {id: $rid}) " + "MATCH (s:Symbol)-[:DECLARES_PRODUCER]->(p:Producer)-[e:ASYNC_CALLS]->(r:Route {id: $rid}) " "WHERE e.match = 'cross_service' " - "RETURN s.id AS id LIMIT 500", + "RETURN p.id AS id LIMIT 500", {"rid": rid}, ) cs_cross_service += len(callers) diff --git a/server.py b/server.py index ac3975e..54b169e 100644 --- a/server.py +++ b/server.py @@ -30,7 +30,7 @@ "resolve (identifier-shaped lookup for symbol/route/client — three statuses one|many|none). " "NodeFilter `filter` is a JSON object (preferred); a JSON-encoded string is also accepted as a fallback. " "Unknown filter keys and populated fields not applicable to the effective node kind fail with success=false and message. " - "Edge labels: EXTENDS, IMPLEMENTS, INJECTS, OVERRIDES, DECLARES, DECLARES_CLIENT, CALLS, EXPOSES, HTTP_CALLS, ASYNC_CALLS. " + "Edge labels: EXTENDS, IMPLEMENTS, INJECTS, OVERRIDES, DECLARES, DECLARES_CLIENT, DECLARES_PRODUCER, CALLS, EXPOSES, HTTP_CALLS, ASYNC_CALLS. " "Reprocess/init, meta, tables, diagnose-ignore, analyze-pr: use java-codebase-rag CLI — not MCP." ) @@ -392,7 +392,7 @@ async def search( ), ) async def find( - kind: Literal["symbol", "route", "client"] = Field( + kind: Literal["symbol", "route", "client", "producer"] = Field( description=( "Which graph table to search. 'symbol' = declarations, " "'route' = endpoints, 'client' = outbound clients." @@ -414,8 +414,8 @@ async def find( name="describe", description=( "Full node record plus `edge_summary` (in/out counts per stored edge label, plus optional describe-time keys). Type Symbols may add " - "composed keys DECLARES.DECLARES_CLIENT and DECLARES.EXPOSES; method Symbols may add " - "override-axis virtual keys (OVERRIDDEN_BY, OVERRIDDEN_BY.DECLARES_CLIENT, OVERRIDDEN_BY.EXPOSES, " + "composed keys DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, and DECLARES.EXPOSES; method Symbols may add " + "override-axis virtual keys (OVERRIDDEN_BY, OVERRIDDEN_BY.DECLARES_CLIENT, OVERRIDDEN_BY.DECLARES_PRODUCER, OVERRIDDEN_BY.EXPOSES, " "plus an `OVERRIDES` map entry that merges stored `[:OVERRIDES]` counts with the dispatch-up rollup per direction). Those dot-keys and virtual keys are " "read-only summaries—not valid `neighbors(edge_types=…)` values. The stored `OVERRIDES` relationship " "is a normal edge label and may be traversed via neighbors(edge_types=[..., \"OVERRIDES\", ...]). " @@ -498,7 +498,7 @@ async def neighbors( "route path template, client target_service, or target+path pair). Returns " "status=one (single node), many (≥2 ranked candidates with reason), or none " "(no match — fall back to search(query=...) for natural language or fuzzy text). " - "Optional hint_kind narrows to symbol, route, or client. " + "Optional hint_kind narrows to symbol, route, client, or producer. " "Successful responses may include advisory hints (same contract as other v2 tools). " "Malformed empty/whitespace identifier returns success=false. " "Examples: resolve('com.foo.Bar', hint_kind='symbol'); " @@ -510,7 +510,7 @@ async def resolve( identifier: str = Field( description="Identifier-shaped node lookup (FQN, id prefix, route path, client target, …)", ), - hint_kind: Literal["symbol", "route", "client"] | None = Field( + hint_kind: Literal["symbol", "route", "client", "producer"] | None = Field( default=None, description="Optional kind constraint. Omit to search all three kinds.", ), diff --git a/tests/test_ast_graph_build.py b/tests/test_ast_graph_build.py index 3fff42b..50f1187 100644 --- a/tests/test_ast_graph_build.py +++ b/tests/test_ast_graph_build.py @@ -51,7 +51,8 @@ def test_schema_has_all_expected_tables(kuzu_db_path: Path) -> None: # free to add more (e.g. CALLS later) without breaking this test. expected = { "Symbol", "Route", "Client", "GraphMeta", - "EXTENDS", "IMPLEMENTS", "INJECTS", "DECLARES", "OVERRIDES", "CALLS", "EXPOSES", "DECLARES_CLIENT", + "EXTENDS", "IMPLEMENTS", "INJECTS", "DECLARES", "OVERRIDES", "CALLS", "EXPOSES", + "DECLARES_CLIENT", "DECLARES_PRODUCER", } missing = expected - tables assert not missing, f"missing schema tables: {missing}; saw {tables}" diff --git a/tests/test_brownfield_clients.py b/tests/test_brownfield_clients.py index ba902fe..e58d42e 100644 --- a/tests/test_brownfield_clients.py +++ b/tests/test_brownfield_clients.py @@ -67,8 +67,8 @@ def _async_calls(db_path: Path) -> list[dict]: db = kuzu.Database(str(db_path), read_only=True) conn = kuzu.Connection(db) r = conn.execute( - "MATCH (s:Symbol)-[c:ASYNC_CALLS]->(rt:Route) " - "RETURN s.fqn AS fqn, c.strategy AS strategy, rt.topic AS topic ORDER BY fqn, topic", + "MATCH (pr:Producer)-[c:ASYNC_CALLS]->(rt:Route) " + "RETURN pr.member_fqn AS fqn, c.strategy AS strategy, rt.topic AS topic ORDER BY fqn, topic", ) out: list[dict] = [] while r.has_next(): diff --git a/tests/test_call_edges_e2e.py b/tests/test_call_edges_e2e.py index 9d7046e..99ffaae 100644 --- a/tests/test_call_edges_e2e.py +++ b/tests/test_call_edges_e2e.py @@ -41,7 +41,7 @@ def test_http_calls_table_built_on_bank_chat(kuzu_db_path: Path) -> None: def test_async_calls_table_built_on_bank_chat(kuzu_db_path: Path) -> None: - assert _scalar(kuzu_db_path, "MATCH (:Symbol)-[r:ASYNC_CALLS]->(:Route) RETURN count(r)") >= 5 + assert _scalar(kuzu_db_path, "MATCH (:Producer)-[r:ASYNC_CALLS]->(:Route) RETURN count(r)") >= 5 def test_pr_d1_emits_unresolved_match_for_all(kuzu_db_path: Path) -> None: @@ -107,3 +107,103 @@ def test_call_edges_cross_service_http_four_hop(kuzu_db_path_cross_service_smoke ) assert n >= 1 + +def _build_producer_stub(tmp_path: Path, java_body: str) -> Path: + shutil.copytree(_STUB_ROOT, tmp_path, dirs_exist_ok=True) + java_dir = tmp_path / "p" + java_dir.mkdir(parents=True, exist_ok=True) + (java_dir / "X.java").write_text(java_body, encoding="utf-8") + from _builders import build_kuzu_full_into + + db_path = tmp_path / "g.kuzu" + build_kuzu_full_into(tmp_path, db_path) + return db_path + + +def test_call_edges_declares_producer_then_async_calls_to_topic(tmp_path: Path) -> None: + db = _build_producer_stub( + tmp_path, + "package p; import com.example.rag.*; class X { " + "@CodebaseProducer(topic=\"orders\") void m() {} }", + ) + n = _scalar( + db, + "MATCH (s:Symbol)-[:DECLARES_PRODUCER]->(pr:Producer)-[:ASYNC_CALLS]->(r:Route) " + "WHERE pr.topic = 'orders' RETURN count(*)", + ) + assert n >= 1 + + +def test_call_edges_topic_inbound_async_calls_lists_producers(tmp_path: Path) -> None: + db = _build_producer_stub( + tmp_path, + "package p; import com.example.rag.*; class X { " + "@CodebaseProducer(topic=\"inbound-topic\") void m() {} }", + ) + n = _scalar( + db, + "MATCH (pr:Producer)-[:ASYNC_CALLS]->(r:Route {topic: 'inbound-topic'}) RETURN count(DISTINCT pr.id)", + ) + assert n >= 1 + + +def test_call_edges_method_two_producers_two_topics(tmp_path: Path) -> None: + db = _build_producer_stub( + tmp_path, + "package p; import com.example.rag.*; class X { " + "@CodebaseProducers({" + "@CodebaseProducer(topic=\"t1\")," + "@CodebaseProducer(topic=\"t2\")" + "}) void m() {} }", + ) + producer_topics = _scalar( + db, + "MATCH (s:Symbol)-[:DECLARES_PRODUCER]->(pr:Producer)-[:ASYNC_CALLS]->(:Route) " + "RETURN count(DISTINCT pr.id)", + ) + assert producer_topics >= 2 + + +def test_call_edges_unresolved_producer_empty_async_out(tmp_path: Path) -> None: + db = _build_producer_stub( + tmp_path, + "package p; import com.example.rag.*; class X { " + "@CodebaseProducer(topic=\"orphan-topic\") void m() {} }", + ) + producers = _scalar(db, "MATCH (pr:Producer) WHERE pr.topic = 'orphan-topic' RETURN count(pr)") + outbound = _scalar( + db, + "MATCH (pr:Producer {topic: 'orphan-topic'})-[:ASYNC_CALLS]->() RETURN count(*)", + ) + assert producers >= 1 + assert outbound >= 0 + + +def test_call_edges_cross_service_async_four_hop(kuzu_db_path_cross_service_smoke: Path) -> None: + db = kuzu_db_path_cross_service_smoke + n = _scalar( + db, + "MATCH (m:Symbol)-[:DECLARES_PRODUCER]->(pr:Producer)-[:ASYNC_CALLS]->(rt:Route)" + "<-[:EXPOSES]-(h:Symbol) RETURN count(*)", + ) + assert n >= 1 + + +def test_call_edges_method_mixed_http_client_and_async_producer(tmp_path: Path) -> None: + db = _build_producer_stub( + tmp_path, + "package p; import com.example.rag.*; class X { " + "@CodebaseHttpClient(clientKind=CodebaseClientKind.rest_template, path=\"/api\", method=CodebaseHttpMethod.GET) " + "@CodebaseProducer(topic=\"mixed-topic\") void m() {} }", + ) + http_n = _scalar( + db, + "MATCH (:Symbol)-[:DECLARES_CLIENT]->(:Client)-[:HTTP_CALLS]->(:Route) RETURN count(*)", + ) + async_n = _scalar( + db, + "MATCH (:Symbol)-[:DECLARES_PRODUCER]->(:Producer)-[:ASYNC_CALLS]->(:Route) RETURN count(*)", + ) + assert http_n >= 1 + assert async_n >= 1 + diff --git a/tests/test_client_node_extraction.py b/tests/test_client_node_extraction.py index 9ec322c..fb3e8ab 100644 --- a/tests/test_client_node_extraction.py +++ b/tests/test_client_node_extraction.py @@ -226,6 +226,29 @@ def test_client_schema_persisted_and_queryable(tmp_path: Path) -> None: assert int(meta_rows[0][2] or 0) >= 1 +def test_graph_meta_counts_producers_and_declares_producer(tmp_path: Path) -> None: + db = _build( + tmp_path, + None, + { + "p/X.java": ( + "package p; import com.example.rag.*; class X { " + "@CodebaseProducer(topic=\"meta-topic\") void m() {} }" + ), + }, + ) + tables = {row[1] for row in _rows(db, "CALL show_tables() RETURN *")} + assert "Producer" in tables + assert "DECLARES_PRODUCER" in tables + meta_rows = _rows( + db, + "MATCH (m:GraphMeta) RETURN m.producers_total, m.declares_producer_total, m.producers_by_kind", + ) + assert meta_rows + assert int(meta_rows[0][0] or 0) >= 1 + assert int(meta_rows[0][1] or 0) >= 1 + + def teardown_module() -> None: _load_brownfield_overrides.cache_clear() collect_annotation_meta_chain.cache_clear() diff --git a/tests/test_kuzu_queries.py b/tests/test_kuzu_queries.py index f1caa32..5e31128 100644 --- a/tests/test_kuzu_queries.py +++ b/tests/test_kuzu_queries.py @@ -465,6 +465,17 @@ def test_get_route_by_path_microservice_isolated(kuzu_graph_route_extraction_smo assert ra["id"] != rb["id"] +def test_find_route_callers_includes_producer_callers(kuzu_db_path_cross_service_smoke: Path) -> None: + g = KuzuGraph(str(kuzu_db_path_cross_service_smoke)) + topic_routes = [r for r in g.list_routes(limit=100) if str(r.get("topic") or "")] + callers: list = [] + for route in topic_routes: + callers = g.find_route_callers(route["id"]) + if any(c.caller_node_kind == "producer" for c in callers): + break + assert any(c.caller_node_kind == "producer" for c in callers) + + def test_find_route_callers_returns_route_caller_client_node(kuzu_db_path_cross_service_smoke: Path) -> None: from kuzu_queries import RouteCaller diff --git a/tests/test_mcp_tools.py b/tests/test_mcp_tools.py index e49b18a..2c1de8c 100644 --- a/tests/test_mcp_tools.py +++ b/tests/test_mcp_tools.py @@ -66,7 +66,7 @@ async def test_tool_input_schema_includes_expected_enums(mcp_server) -> None: neighbors_edge_types = ((neighbors_schema.get("properties") or {}).get("edge_types") or {}) assert {"java", "sql", "yaml", "all"} in _enum_sets(search_table) - assert {"symbol", "route", "client"} in _enum_sets(find_kind) + assert {"symbol", "route", "client", "producer"} in _enum_sets(find_kind) assert {"in", "out"} in _enum_sets(neighbors_direction) assert { "EXTENDS", @@ -75,6 +75,7 @@ async def test_tool_input_schema_includes_expected_enums(mcp_server) -> None: "OVERRIDES", "DECLARES", "DECLARES_CLIENT", + "DECLARES_PRODUCER", "CALLS", "EXPOSES", "HTTP_CALLS", diff --git a/tests/test_mcp_v2.py b/tests/test_mcp_v2.py index c167f56..0bfd5a0 100644 --- a/tests/test_mcp_v2.py +++ b/tests/test_mcp_v2.py @@ -183,6 +183,29 @@ def test_find_route_by_path_prefix(kuzu_graph) -> None: assert isinstance(out.results, list) +def test_find_kind_producer_returns_producer_nodes(kuzu_graph) -> None: + out = find_v2("producer", filter={}, graph=kuzu_graph) + if not out.results: + pytest.skip("no Producer nodes in session fixture") + assert out.success is True + assert all(r.kind == "producer" for r in out.results) + + +def test_resolve_hint_kind_producer(kuzu_graph) -> None: + rows = kuzu_graph.list_producers(limit=10) + if not rows: + pytest.skip("no Producer nodes in session fixture") + topic = str(rows[0].get("topic") or "") + if not topic: + pytest.skip("producer row missing topic") + out = resolve_v2(topic, hint_kind="producer", graph=kuzu_graph) + assert out.success is True + assert out.status in {"one", "many"} + if out.status == "one": + assert out.node is not None + assert out.node.kind == "producer" + + def test_find_client_by_client_kind(kuzu_graph) -> None: out = find_v2("client", {"client_kind": "feign_method"}, graph=kuzu_graph) assert out.success is True diff --git a/tests/test_mcp_v2_compose.py b/tests/test_mcp_v2_compose.py index 92ca5ea..edd4857 100644 --- a/tests/test_mcp_v2_compose.py +++ b/tests/test_mcp_v2_compose.py @@ -23,6 +23,7 @@ "CALLS", "DECLARES", "DECLARES_CLIENT", + "DECLARES_PRODUCER", "EXPOSES", "EXTENDS", "HTTP_CALLS", @@ -278,6 +279,25 @@ def test_search_describe_neighbors_chain_end_to_end(kuzu_graph, monkeypatch) -> assert neighbors_out.results +def test_describe_type_rollups_include_declares_producer(kuzu_graph) -> None: + rows = kuzu_graph._rows( # noqa: SLF001 + "MATCH (t:Symbol)-[:DECLARES]->(m:Symbol)-[e:DECLARES_PRODUCER]->(:Producer) " + "WHERE t.kind IN $kinds " + "RETURN t.id AS id, count(e) AS n ORDER BY n DESC LIMIT 1", + {"kinds": _ROLLUP_TYPE_KINDS}, + ) + if not rows: + pytest.skip("no type with DECLARES_PRODUCER members in fixture") + tid = str(rows[0]["id"]) + n = int(rows[0]["n"] or 0) + assert n >= 1 + out = describe_v2(tid, graph=kuzu_graph) + assert out.success is True + assert out.record is not None + assert out.record.edge_summary is not None + assert out.record.edge_summary["DECLARES.DECLARES_PRODUCER"]["out"] == n + + def test_describe_class_with_brownfield_clients_emits_composed_key(kuzu_graph) -> None: rows = kuzu_graph._rows( # noqa: SLF001 "MATCH (t:Symbol)-[:DECLARES]->(m:Symbol)-[e:DECLARES_CLIENT]->(:Client) " diff --git a/tests/test_pr_analysis.py b/tests/test_pr_analysis.py index 3b6e0ba..afdf341 100644 --- a/tests/test_pr_analysis.py +++ b/tests/test_pr_analysis.py @@ -196,7 +196,7 @@ def _rows(self, query, params): }] if "MATCH (s:Symbol)-[:DECLARES_CLIENT]->(c:Client)-[e:HTTP_CALLS]->(r:Route {id: $rid})" in query: return [{"id": str(i)} for i in range(6)] - if "MATCH (s:Symbol)-[e:ASYNC_CALLS]->(r:Route {id: $rid})" in query: + if "MATCH (s:Symbol)-[:DECLARES_PRODUCER]->(p:Producer)-[e:ASYNC_CALLS]->(r:Route {id: $rid})" in query: return [] return [] @@ -259,7 +259,7 @@ def _rows(self, query, params): if self._include_callers: return [{"id": "caller-1"}] return [] - if "MATCH (s:Symbol)-[e:ASYNC_CALLS]->(r:Route {id: $rid})" in query: + if "MATCH (s:Symbol)-[:DECLARES_PRODUCER]->(p:Producer)-[e:ASYNC_CALLS]->(r:Route {id: $rid})" in query: return [] return [] diff --git a/tests/test_schema_consistency.py b/tests/test_schema_consistency.py index 5931427..4bae2cd 100644 --- a/tests/test_schema_consistency.py +++ b/tests/test_schema_consistency.py @@ -62,30 +62,29 @@ def test_schema_consistency_http_calls_post_flip_client_to_route() -> None: assert spec.dst == "Route" -def test_schema_consistency_async_calls_pre_flip_symbol_to_route() -> None: +def test_schema_consistency_async_calls_post_flip_producer_to_route() -> None: spec = EDGE_SCHEMA["ASYNC_CALLS"] - assert spec.src == "Symbol" + assert spec.src == "Producer" assert spec.dst == "Route" def test_edge_schema_member_only_flags_on_method_level_edges() -> None: assert EDGE_SCHEMA["DECLARES_CLIENT"].member_only is True + assert EDGE_SCHEMA["DECLARES_PRODUCER"].member_only is True assert EDGE_SCHEMA["EXPOSES"].member_only is True assert EDGE_SCHEMA["OVERRIDES"].member_only is True assert EDGE_SCHEMA["CALLS"].member_only is True - assert "DECLARES_PRODUCER" not in EDGE_SCHEMA assert EDGE_SCHEMA["HTTP_CALLS"].member_only is False assert EDGE_SCHEMA["ASYNC_CALLS"].member_only is False -def test_http_async_typical_traversals_include_pre_flip_current_keys() -> None: +def test_http_async_typical_traversals_post_flip() -> None: http_trav = EDGE_SCHEMA["HTTP_CALLS"].typical_traversals assert "member_subject" in http_trav assert "DECLARES_CLIENT" in http_trav["member_subject"] async_trav = EDGE_SCHEMA["ASYNC_CALLS"].typical_traversals - assert "member_subject_current" in async_trav - assert "ASYNC_CALLS" in async_trav["member_subject_current"] assert "member_subject" in async_trav + assert "DECLARES_PRODUCER" in async_trav["member_subject"] def test_brownfield_resolver_strategy_literals_emitted_in_builder_subset() -> None: From 361fc0d3bfb4b4d2fe99c167085c6cb8aa540e27 Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Sat, 16 May 2026 20:13:25 +0300 Subject: [PATCH 2/4] address PR review: MCP producer strings and pass6 producer_kind Update server tool descriptions for the four-kind graph surface; use ProducerRow.producer_kind in pass6 async rematch; fix AGENT-GUIDE v14 banner tense. Co-authored-by: Cursor --- build_ast_graph.py | 3 ++- docs/AGENT-GUIDE.md | 2 +- server.py | 27 +++++++++++++++++---------- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/build_ast_graph.py b/build_ast_graph.py index 7d2b50f..ee1f587 100644 --- a/build_ast_graph.py +++ b/build_ast_graph.py @@ -2142,10 +2142,11 @@ def _micro_factor(member: MemberEntry | None) -> float: member = member_by_id.get(producer.member_id) if producer else None base = row.confidence / max(1e-9, (0.3 * _micro_factor(member))) src_route = route_by_id.get(row.route_id) + async_kind = producer.producer_kind if producer else "kafka_send" call = OutgoingCallDecl( method_fqn=f"{member.parent_fqn}#{member.decl.signature}" if member else "", method_sig=member.decl.signature if member else "", - client_kind="kafka_send", + client_kind=async_kind, channel="async", feign_target_name="", feign_target_url="", diff --git a/docs/AGENT-GUIDE.md b/docs/AGENT-GUIDE.md index d187cad..1efacc8 100644 --- a/docs/AGENT-GUIDE.md +++ b/docs/AGENT-GUIDE.md @@ -15,7 +15,7 @@ > Calibrated against ontology version **14** (see `ast_java.ONTOLOGY_VERSION` / > `java_ontology.EDGE_SCHEMA` + valid sets): canonical edge navigation schema in > `docs/EDGE-NAVIGATION.md`. v14 re-index required; `HTTP_CALLS` is `Client → Route`; -> PR-C adds `Producer` + `DECLARES_PRODUCER` and flips `ASYNC_CALLS`. +> `Producer` + `DECLARES_PRODUCER` and `ASYNC_CALLS` (`Producer → Route`) ship in v14. > Still includes stored `OVERRIDES` Symbol→Symbol edges and v12 HTTP brownfield > (`@CodebaseHttpClient`, shared `CodebaseHttpMethod` enum, inbound layer-C HTTP routes > replace same-method built-in rows). **Design rationale:** navigation surface and tools — diff --git a/server.py b/server.py index 54b169e..1aa9343 100644 --- a/server.py +++ b/server.py @@ -27,7 +27,7 @@ "Java codebase graph navigator (LanceDB + Kuzu). " "Tools: search (NL/code locate), find (structured NodeFilter), describe (one node + edge_summary: stored edge-label counts and optional composed keys for type Symbols and override-axis virtual keys for method Symbols), " "neighbors (one hop; you MUST pass direction in|out AND edge_types list — no defaults), " - "resolve (identifier-shaped lookup for symbol/route/client — three statuses one|many|none). " + "resolve (identifier-shaped lookup for symbol/route/client/producer — three statuses one|many|none). " "NodeFilter `filter` is a JSON object (preferred); a JSON-encoded string is also accepted as a fallback. " "Unknown filter keys and populated fields not applicable to the effective node kind fail with success=false and message. " "Edge labels: EXTENDS, IMPLEMENTS, INJECTS, OVERRIDES, DECLARES, DECLARES_CLIENT, DECLARES_PRODUCER, CALLS, EXPOSES, HTTP_CALLS, ASYNC_CALLS. " @@ -385,7 +385,8 @@ async def search( "Exact structured listing for one node kind. Per-kind applicable fields: **symbol** — " "microservice, module, role, exclude_roles, annotation, capability, fqn_prefix, symbol_kind, symbol_kinds; " "**route** — microservice, module, http_method, path_prefix, framework; **client** — microservice, module, " - "source_layer, client_kind, target_service, target_path_prefix, http_method. " + "source_layer, client_kind, target_service, target_path_prefix, http_method; **producer** — microservice, " + "module, source_layer, producer_kind, topic_prefix. " "Wildcards in prefix fields are rejected. An empty filter (`{}`) or `filter=None` means no predicate (all nodes of " "that kind; use pagination). Unknown keys or inapplicable populated fields return success=false. " "Successful responses echo `limit`/`offset` and may include `hints` (advisory next-step strings)." @@ -395,7 +396,8 @@ async def find( kind: Literal["symbol", "route", "client", "producer"] = Field( description=( "Which graph table to search. 'symbol' = declarations, " - "'route' = endpoints, 'client' = outbound clients." + "'route' = endpoints, 'client' = outbound HTTP clients, " + "'producer' = outbound async producers." ) ), filter: dict[str, Any] | str = Field( @@ -429,8 +431,9 @@ async def describe( id: str | None = Field( default=None, description=( - "Graph node id: sym:, route:, or client: prefix " - '(e.g. sym:com.bank.chat.core.api.ChatController#joinOperator(JoinOperatorRequest)). ' + "Graph node id: sym:, route:, client:, or producer: prefix " + '(e.g. sym:com.bank.chat.core.api.ChatController#joinOperator(JoinOperatorRequest); ' + "producer:svc|com.foo.Bar#send()|kafka_send|orders.created). " "When set, takes precedence over fqn." ), ), @@ -453,7 +456,9 @@ async def describe( ), ) async def neighbors( - ids: str | list[str] = Field(description="Origin symbol/route/client id, or list for batch"), + ids: str | list[str] = Field( + description="Origin symbol/route/client/producer id, or list for batch", + ), direction: Literal["in", "out"] = Field( description="Required. 'in' = predecessors (callers), 'out' = successors (callees). No default.", ), @@ -494,8 +499,8 @@ async def neighbors( @mcp.tool( name="resolve", description=( - "Identifier-shaped node lookup (FQN, sym:/route:/client: id, HTTP method+path, " - "route path template, client target_service, or target+path pair). Returns " + "Identifier-shaped node lookup (FQN, sym:/route:/client:/producer: id, HTTP method+path, " + "route path template, client target_service, target+path pair, or producer topic). Returns " "status=one (single node), many (≥2 ranked candidates with reason), or none " "(no match — fall back to search(query=...) for natural language or fuzzy text). " "Optional hint_kind narrows to symbol, route, client, or producer. " @@ -508,11 +513,13 @@ async def neighbors( ) async def resolve( identifier: str = Field( - description="Identifier-shaped node lookup (FQN, id prefix, route path, client target, …)", + description=( + "Identifier-shaped node lookup (FQN, id prefix, route path, client target, producer topic, …)" + ), ), hint_kind: Literal["symbol", "route", "client", "producer"] | None = Field( default=None, - description="Optional kind constraint. Omit to search all three kinds.", + description="Optional kind constraint. Omit to search symbol, route, client, and producer.", ), ) -> mcp_v2.ResolveOutput: return await asyncio.to_thread(mcp_v2.resolve_v2, identifier, hint_kind, None) From e19ddc7540b955d43193bc43ce7e29a0f856214a Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Sat, 16 May 2026 20:17:29 +0300 Subject: [PATCH 3/4] fix PR follow-ups: producer id example, resolve reasons, pass6 test Use p: in describe id help; add producer_topic resolve reasons; document topic-level _producer_id; lock pass6 async_kind via stream_bridge test. Co-authored-by: Cursor --- build_ast_graph.py | 2 ++ java_ontology.py | 4 ++++ mcp_v2.py | 6 ++++-- server.py | 3 ++- tests/test_client_hint_recovery.py | 25 ++++++++++++++++++++++++- tests/test_mcp_v2.py | 29 +++++++++++++++++++++++++++++ 6 files changed, 65 insertions(+), 4 deletions(-) diff --git a/build_ast_graph.py b/build_ast_graph.py index ee1f587..aa01e04 100644 --- a/build_ast_graph.py +++ b/build_ast_graph.py @@ -1420,6 +1420,8 @@ def _producer_id( producer_kind: str, topic: str, ) -> str: + # Topic-level identity per method+kind; broker is intentionally omitted so the same + # resolved topic on one method shares one Producer node across call sites. key = f"{microservice}|{member_fqn}|{producer_kind}|{topic}" return f"p:{hashlib.sha1(key.encode()).hexdigest()[:16]}" diff --git a/java_ontology.py b/java_ontology.py index 10d61f6..a0d0af6 100644 --- a/java_ontology.py +++ b/java_ontology.py @@ -84,6 +84,8 @@ "route_method_path", "client_target", "client_target_path", + "producer_topic", + "producer_topic_prefix", )) # Brownfield / fallback edge resolution strategies (hints v2 neighbors fuzzy signal). @@ -400,6 +402,8 @@ class EdgeSpec: "route_method_path", "client_target", "client_target_path", + "producer_topic", + "producer_topic_prefix", ] __all__ = [ diff --git a/mcp_v2.py b/mcp_v2.py index 6151ac8..ac44c5c 100644 --- a/mcp_v2.py +++ b/mcp_v2.py @@ -356,10 +356,12 @@ class NeighborsOutput(BaseModel): "exact_fqn": 1, "route_method_path": 1, "client_target_path": 1, + "producer_topic_prefix": 1, "fqn_suffix": 2, "route_template": 2, "short_name": 3, "client_target": 3, + "producer_topic": 3, } _SYMBOL_RESOLVE_RETURN = ( @@ -1107,7 +1109,7 @@ def _resolve_producer_candidates( {"topic": identifier, "lim": lim}, ) for row in rows: - out.append((_node_ref_from_row("producer", row), "client_target", len(identifier))) + out.append((_node_ref_from_row("producer", row), "producer_topic", len(identifier))) if not identifier.startswith("/"): rows = g._rows( # noqa: SLF001 @@ -1115,7 +1117,7 @@ def _resolve_producer_candidates( {"topic": identifier, "lim": lim}, ) for row in rows: - out.append((_node_ref_from_row("producer", row), "client_target_path", len(identifier))) + out.append((_node_ref_from_row("producer", row), "producer_topic_prefix", len(identifier))) return out diff --git a/server.py b/server.py index 1aa9343..151dd49 100644 --- a/server.py +++ b/server.py @@ -433,7 +433,8 @@ async def describe( description=( "Graph node id: sym:, route:, client:, or producer: prefix " '(e.g. sym:com.bank.chat.core.api.ChatController#joinOperator(JoinOperatorRequest); ' - "producer:svc|com.foo.Bar#send()|kafka_send|orders.created). " + "producer: p:a1b2c3d4e5f67890 — the stored id from the graph, not a human-readable " + "pipe key). For producers by topic, prefer resolve(identifier=, hint_kind='producer'). " "When set, takes precedence over fqn." ), ), diff --git a/tests/test_client_hint_recovery.py b/tests/test_client_hint_recovery.py index 89c8df0..9ff8d58 100644 --- a/tests/test_client_hint_recovery.py +++ b/tests/test_client_hint_recovery.py @@ -1,11 +1,13 @@ from __future__ import annotations from pathlib import Path +from unittest.mock import patch -from build_ast_graph import GraphTables, pass6_match_edges, write_kuzu +from build_ast_graph import GraphTables, _match_call_edge, pass6_match_edges, write_kuzu from kuzu_queries import KuzuGraph _FIXTURE = Path(__file__).resolve().parent / "fixtures" / "cross_service_smoke" +_HTTP_CALLER = Path(__file__).resolve().parent / "fixtures" / "http_caller_smoke" def _build_tables() -> GraphTables: @@ -84,6 +86,27 @@ def test_find_route_callers_still_returns_expected_feign_caller(tmp_path: Path) assert all(c.caller_node_kind == "client" for c in callers) +def test_pass6_async_rematch_uses_producer_row_kind() -> None: + from _builders import build_graph_tables_to + + tables = build_graph_tables_to(_HTTP_CALLER, max_pass=5) + producer = next(p for p in tables.producer_rows if p.producer_kind == "stream_bridge_send") + row = next(r for r in tables.async_call_rows if r.producer_id == producer.id) + assert row.match == "unresolved" + row.route_id = "missing:route:id" + + seen_kinds: list[str] = [] + + def capture(call, routes, caller_microservice): + seen_kinds.append(call.client_kind) + return _match_call_edge(call, routes, caller_microservice) + + with patch("build_ast_graph._match_call_edge", capture): + pass6_match_edges(tables, verbose=False) + + assert "stream_bridge_send" in seen_kinds + + def test_missing_client_hint_falls_back_to_existing_unresolved_or_phantom_flow() -> None: tables = _build_tables() caller_id = _member_id( diff --git a/tests/test_mcp_v2.py b/tests/test_mcp_v2.py index 0bfd5a0..d6685d6 100644 --- a/tests/test_mcp_v2.py +++ b/tests/test_mcp_v2.py @@ -1156,6 +1156,7 @@ def test_resolve_wildcard_identifier_returns_none(kuzu_graph) -> None: def test_resolve_every_reason_in_closed_set_appears() -> None: from mcp_v2 import ( _resolve_client_candidates, + _resolve_producer_candidates, _resolve_route_candidates, _resolve_symbol_candidates, ) @@ -1205,6 +1206,22 @@ def test_resolve_every_reason_in_closed_set_appears() -> None: "resolved": True, "source_layer": "builtin", } + producer_row = { + "id": "p:reasonhash000000", + "producer_kind": "kafka_send", + "topic": "orders.created", + "broker": "", + "direction": "produce", + "member_fqn": "com.reason.Producer#send()", + "member_id": "sym:reasonproducer", + "microservice": "svc", + "module": "mod", + "filename": "P.java", + "start_line": 1, + "end_line": 1, + "resolved": True, + "source_layer": "builtin", + } class ReasonGraph: def _rows(self, query: str, params: dict | None = None) -> list: @@ -1228,6 +1245,12 @@ def _rows(self, query: str, params: dict | None = None) -> list: return [client_row] if "WHERE c.target_service = $target" in query: return [client_row] + if "WHERE p.id = $id" in query: + return [producer_row] + if "WHERE p.topic = $topic" in query: + return [producer_row] + if "p.topic STARTS WITH $topic" in query: + return [producer_row] return [] g = ReasonGraph() # type: ignore[arg-type] @@ -1250,6 +1273,12 @@ def _rows(self, query: str, params: dict | None = None) -> list: seen.add(reason) for _node, reason, _spec in _resolve_client_candidates(g, "reasonsvc /reason"): seen.add(reason) + for _node, reason, _spec in _resolve_producer_candidates(g, "p:reasonhash000000"): + seen.add(reason) + for _node, reason, _spec in _resolve_producer_candidates(g, "orders.created"): + seen.add(reason) + for _node, reason, _spec in _resolve_producer_candidates(g, "orders"): + seen.add(reason) assert seen == set(VALID_RESOLVE_REASONS) From c7f8a0624a3e6623d06037d251c96883ab2c1b5d Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Sat, 16 May 2026 20:19:25 +0300 Subject: [PATCH 4/4] move SCHEMA-V2 propose and plans to completed Relocate SCHEMA-V2-PROPOSE, PLAN-SCHEMA-V2, and CURSOR-PROMPTS-SCHEMA-V2; update cross-links and PROPOSES-ORDER (PR-D remains under HINTS-V3). Co-authored-by: Cursor --- docs/PROPOSES-ORDER.md | 16 ++++----- plans/CURSOR-PROMPTS-HINTS-V3.md | 2 +- plans/PLAN-HINTS-V3.md | 2 +- .../CURSOR-PROMPTS-SCHEMA-V2.md | 34 +++++++++---------- plans/{ => completed}/PLAN-SCHEMA-V2.md | 20 +++++------ propose/HINTS-V3-PROPOSE.md | 4 +-- propose/{ => completed}/SCHEMA-V2-PROPOSE.md | 8 ++--- 7 files changed, 42 insertions(+), 44 deletions(-) rename plans/{ => completed}/CURSOR-PROMPTS-SCHEMA-V2.md (87%) rename plans/{ => completed}/PLAN-SCHEMA-V2.md (96%) rename propose/{ => completed}/SCHEMA-V2-PROPOSE.md (98%) diff --git a/docs/PROPOSES-ORDER.md b/docs/PROPOSES-ORDER.md index 3c71fbb..bf5d5a1 100644 --- a/docs/PROPOSES-ORDER.md +++ b/docs/PROPOSES-ORDER.md @@ -11,11 +11,11 @@ When two or more proposes touch overlapping subsystems, the order they lock and ## Current in-flight set (as of 2026-05-16) -1. **SCHEMA-V2** — `propose/SCHEMA-V2-PROPOSE.md` (`Status: locked — implementing`; propose [#151](https://github.com/HumanBean17/java-codebase-rag/pull/151), plan [#155](https://github.com/HumanBean17/java-codebase-rag/pull/155)) - - 4 code PRs: PR-A (`EDGE_SCHEMA` + ontology v14 bump), PR-B (`HTTP_CALLS` flip + downstream API), PR-C (`Producer` node + `ASYNC_CALLS` flip + GraphMeta + MCP parity), PR-D (hints v3). -2. **HINTS-V3** — `propose/HINTS-V3-PROPOSE.md` (`Status: locked — implementing via SCHEMA-V2 PR-D`; propose [#154](https://github.com/HumanBean17/java-codebase-rag/pull/154), plan [#155](https://github.com/HumanBean17/java-codebase-rag/pull/155)) +1. **HINTS-V3** — `propose/HINTS-V3-PROPOSE.md` (`Status: locked — implementing via SCHEMA-V2 PR-D`; propose [#154](https://github.com/HumanBean17/java-codebase-rag/pull/154), plan [#155](https://github.com/HumanBean17/java-codebase-rag/pull/155)) - Implementation = SCHEMA-V2 PR-D (same PR). +**SCHEMA-V2** (PR-A/B/C) is **completed** — artefacts in `propose/completed/SCHEMA-V2-PROPOSE.md`, `plans/completed/PLAN-SCHEMA-V2.md`, `plans/completed/CURSOR-PROMPTS-SCHEMA-V2.md`. PR-D remains under HINTS-V3. + No other proposes are in flight. ## Lock and merge order @@ -23,7 +23,7 @@ No other proposes are in flight. ### Phase 1 — propose artefacts ``` -SCHEMA-V2-PROPOSE.md [merged #151 — locked for code sequence] +SCHEMA-V2-PROPOSE.md [merged #151 — completed; propose/completed/] ↓ HINTS-V3-PROPOSE.md [merged #154 — implementing in PR-D] ``` @@ -35,13 +35,13 @@ HINTS-V3-PROPOSE.md [merged #154 — implementing in PR-D] ### Phase 2 — plan + cursor-prompt artefacts ``` -plans/PLAN-SCHEMA-V2.md [landed #155] -plans/CURSOR-PROMPTS-SCHEMA-V2.md +plans/completed/PLAN-SCHEMA-V2.md [landed #155; PR-A/B/C done] +plans/completed/CURSOR-PROMPTS-SCHEMA-V2.md plans/PLAN-HINTS-V3.md plans/CURSOR-PROMPTS-HINTS-V3.md ``` -SCHEMA-V2 Decision 29: `PLAN-SCHEMA-V2.md` + `CURSOR-PROMPTS-SCHEMA-V2.md` are merge gates for **PR-A** (satisfied when [#155](https://github.com/HumanBean17/java-codebase-rag/pull/155) is on `master`). +SCHEMA-V2 Decision 29: plan + prompts merge gates for **PR-A** — satisfied ([#155](https://github.com/HumanBean17/java-codebase-rag/pull/155) on `master`; artefacts now under `plans/completed/`). By analogy: `PLAN-HINTS-V3.md` + `CURSOR-PROMPTS-HINTS-V3.md` are merge gates for **PR-D** (same PR). @@ -76,4 +76,4 @@ No PR in this set is parallelizable. ## Maintenance -Update this file when a propose enters draft, locks, or its code PRs land. After PR-D merges, collapse to "no proposes in flight" until the next effort starts. +Update this file when a propose enters draft, locks, or its code PRs land. After PR-D merges, move HINTS-V3 artefacts to `completed/` and collapse to "no proposes in flight" until the next effort starts. diff --git a/plans/CURSOR-PROMPTS-HINTS-V3.md b/plans/CURSOR-PROMPTS-HINTS-V3.md index 20e1f59..691d3ab 100644 --- a/plans/CURSOR-PROMPTS-HINTS-V3.md +++ b/plans/CURSOR-PROMPTS-HINTS-V3.md @@ -29,7 +29,7 @@ One prompt: **PR-D** (= SCHEMA-V2 PR-D in sequence doc). - `@plans/PLAN-HINTS-V3.md` - `@propose/HINTS-V3-PROPOSE.md` (§3–§4, §6, Decisions §7) -- `@propose/SCHEMA-V2-PROPOSE.md` (§3.12 preview — read only) +- `@propose/completed/SCHEMA-V2-PROPOSE.md` (§3.12 preview — read only) - `@java_ontology.py` (`EDGE_SCHEMA`, `FUZZY_STRATEGY_SET`) - `@mcp_hints.py` - `@mcp_v2.py` (`neighbors_v2`, `_load_node_record`) diff --git a/plans/PLAN-HINTS-V3.md b/plans/PLAN-HINTS-V3.md index b416465..921e035 100644 --- a/plans/PLAN-HINTS-V3.md +++ b/plans/PLAN-HINTS-V3.md @@ -5,7 +5,7 @@ Status: **active (implementing)**. This plan implements Depends on: -- [`propose/SCHEMA-V2-PROPOSE.md`](../propose/SCHEMA-V2-PROPOSE.md) — `EDGE_SCHEMA`, post-flip endpoints, `brownfield_resolver_sourced`, `typical_traversals`, `BROWNFIELD_RESOLVER_STRATEGY_SET` (PR-A–C on `master`). +- [`propose/completed/SCHEMA-V2-PROPOSE.md`](../propose/completed/SCHEMA-V2-PROPOSE.md) — `EDGE_SCHEMA`, post-flip endpoints, `brownfield_resolver_sourced`, `typical_traversals`, `BROWNFIELD_RESOLVER_STRATEGY_SET` (PR-A–C on `master`). - **Code PR-D** runs only after SCHEMA-V2 **PR-A, PR-B, PR-C** are merged. - **Propose gate:** `HINTS-V3-PROPOSE.md` merged to `master` before SCHEMA-V2 **PR-A** starts (may stay GitHub `draft`). - **Lock gate:** `HINTS-V3-PROPOSE.md` `Status: locked` before **PR-D** merges. diff --git a/plans/CURSOR-PROMPTS-SCHEMA-V2.md b/plans/completed/CURSOR-PROMPTS-SCHEMA-V2.md similarity index 87% rename from plans/CURSOR-PROMPTS-SCHEMA-V2.md rename to plans/completed/CURSOR-PROMPTS-SCHEMA-V2.md index e095364..ad727a7 100644 --- a/plans/CURSOR-PROMPTS-SCHEMA-V2.md +++ b/plans/completed/CURSOR-PROMPTS-SCHEMA-V2.md @@ -1,12 +1,12 @@ # Cursor task prompts — SCHEMA-V2 -Status: **active (implementing)**. Plan: -[`plans/PLAN-SCHEMA-V2.md`](./PLAN-SCHEMA-V2.md). Propose: -[`propose/SCHEMA-V2-PROPOSE.md`](../propose/SCHEMA-V2-PROPOSE.md). Sequence: -[`docs/PROPOSES-ORDER.md`](../docs/PROPOSES-ORDER.md). +Status: **completed** (reference template). Plan: +[`plans/completed/PLAN-SCHEMA-V2.md`](./PLAN-SCHEMA-V2.md). Propose: +[`propose/completed/SCHEMA-V2-PROPOSE.md`](../../propose/completed/SCHEMA-V2-PROPOSE.md). Sequence: +[`docs/PROPOSES-ORDER.md`](../../docs/PROPOSES-ORDER.md). One prompt per code PR (**PR-A / PR-B / PR-C**). PR-D (hints) is in -[`plans/CURSOR-PROMPTS-HINTS-V3.md`](./CURSOR-PROMPTS-HINTS-V3.md). +[`plans/CURSOR-PROMPTS-HINTS-V3.md`](../CURSOR-PROMPTS-HINTS-V3.md). **Landing order:** PR-SCHEMA-V2-A → PR-SCHEMA-V2-B → PR-SCHEMA-V2-C. Do not start the next PR until the previous is merged to `master`. @@ -24,13 +24,13 @@ One prompt per code PR (**PR-A / PR-B / PR-C**). PR-D (hints) is in **Branch:** `feat/schema-v2-edge-schema` off `master`. **Base:** `master` (with HINTS-V3 propose merged). -**Plan section:** [`plans/PLAN-SCHEMA-V2.md`](./PLAN-SCHEMA-V2.md) § PR-A. +**Plan section:** [`plans/completed/PLAN-SCHEMA-V2.md`](./PLAN-SCHEMA-V2.md) § PR-A. **PR title:** `feat(schema): add EDGE_SCHEMA to java_ontology, generate docs/EDGE-NAVIGATION.md, bump ontology to v14` **Attach (`@-files`):** -- `@plans/PLAN-SCHEMA-V2.md` (PR-A only) -- `@propose/SCHEMA-V2-PROPOSE.md` (§3.1, §3.5–§3.6, §6 PR-A, Appendix A, Decisions 6–9, 28–29, 31) +- `@plans/completed/PLAN-SCHEMA-V2.md` (PR-A only) +- `@propose/completed/SCHEMA-V2-PROPOSE.md` (§3.1, §3.5–§3.6, §6 PR-A, Appendix A, Decisions 6–9, 28–29, 31) - `@propose/HINTS-V3-PROPOSE.md` (§3.4–§3.5 `member_only` / `typical_traversals` — read only) - `@java_ontology.py` - `@build_ast_graph.py` (DDL constants — do not flip endpoints) @@ -42,7 +42,7 @@ One prompt per code PR (**PR-A / PR-B / PR-C**). PR-D (hints) is in **Prompt:** ```` -You are implementing PR-SCHEMA-V2-A from `plans/PLAN-SCHEMA-V2.md` (the **PR-A** section). +You are implementing PR-SCHEMA-V2-A from `plans/completed/PLAN-SCHEMA-V2.md` (the **PR-A** section). Read PR-A **File-by-file changes** and **Tests for PR-A** before coding. Plan wins over this prompt; propose supplies locked shapes. @@ -107,13 +107,13 @@ Before PR open: **Branch:** `feat/schema-v2-http-calls-client-route` off `master` **after PR-A merged**. **Base:** `master` at merge commit of PR-A. -**Plan section:** [`plans/PLAN-SCHEMA-V2.md`](./PLAN-SCHEMA-V2.md) § PR-B. +**Plan section:** [`plans/completed/PLAN-SCHEMA-V2.md`](./PLAN-SCHEMA-V2.md) § PR-B. **PR title:** `feat(schema): HTTP_CALLS originates from Client, not Symbol` **Attach (`@-files`):** -- `@plans/PLAN-SCHEMA-V2.md` (PR-B) -- `@propose/SCHEMA-V2-PROPOSE.md` (§3.3–§3.4, §3.7, §3.10, §4 HTTP UCs, PR-B §6) +- `@plans/completed/PLAN-SCHEMA-V2.md` (PR-B) +- `@propose/completed/SCHEMA-V2-PROPOSE.md` (§3.3–§3.4, §3.7, §3.10, §4 HTTP UCs, PR-B §6) - `@java_ontology.py` - `@build_ast_graph.py` - `@kuzu_queries.py` @@ -131,7 +131,7 @@ Before PR open: **Prompt:** ```` -You are implementing PR-SCHEMA-V2-B from `plans/PLAN-SCHEMA-V2.md` (**PR-B**). +You are implementing PR-SCHEMA-V2-B from `plans/completed/PLAN-SCHEMA-V2.md` (**PR-B**). PR-A is on `master` (`EDGE_SCHEMA`, ontology 14, doc generator). Do not re-land PR-A work. @@ -186,13 +186,13 @@ Before PR open: `.venv/bin/ruff check .` and `.venv/bin/python -m pytest tests - **Branch:** `feat/schema-v2-producer-async-calls` off `master` **after PR-B merged**. **Base:** `master` at merge commit of PR-B. -**Plan section:** [`plans/PLAN-SCHEMA-V2.md`](./PLAN-SCHEMA-V2.md) § PR-C. +**Plan section:** [`plans/completed/PLAN-SCHEMA-V2.md`](./PLAN-SCHEMA-V2.md) § PR-C. **PR title:** `feat(schema): introduce Producer node and route ASYNC_CALLS through it` **Attach (`@-files`):** -- `@plans/PLAN-SCHEMA-V2.md` (PR-C) -- `@propose/SCHEMA-V2-PROPOSE.md` (§3.2, §3.3–§3.4, §3.6–§3.9, §4 async UCs, PR-C §6) +- `@plans/completed/PLAN-SCHEMA-V2.md` (PR-C) +- `@propose/completed/SCHEMA-V2-PROPOSE.md` (§3.2, §3.3–§3.4, §3.6–§3.9, §4 async UCs, PR-C §6) - `@graph_enrich.py` (`AsyncProducerHint`) - `@java_ontology.py` - `@build_ast_graph.py` @@ -209,7 +209,7 @@ Before PR open: `.venv/bin/ruff check .` and `.venv/bin/python -m pytest tests - **Prompt:** ```` -You are implementing PR-SCHEMA-V2-C from `plans/PLAN-SCHEMA-V2.md` (**PR-C**). +You are implementing PR-SCHEMA-V2-C from `plans/completed/PLAN-SCHEMA-V2.md` (**PR-C**). PR-B is on `master` (HTTP_CALLS from Client). Do not revert HTTP shape. diff --git a/plans/PLAN-SCHEMA-V2.md b/plans/completed/PLAN-SCHEMA-V2.md similarity index 96% rename from plans/PLAN-SCHEMA-V2.md rename to plans/completed/PLAN-SCHEMA-V2.md index eda4649..cbb6bb4 100644 --- a/plans/PLAN-SCHEMA-V2.md +++ b/plans/completed/PLAN-SCHEMA-V2.md @@ -1,13 +1,13 @@ # Plan: SCHEMA-V2 (edge navigation schema, HTTP/ASYNC caller-side flips, Producer node) -Status: **active (implementing)**. This plan implements -[`propose/SCHEMA-V2-PROPOSE.md`](../propose/SCHEMA-V2-PROPOSE.md). +Status: **completed** — PR-A/B/C landed; PR-D in [`plans/PLAN-HINTS-V3.md`](../PLAN-HINTS-V3.md). Implements +[`propose/completed/SCHEMA-V2-PROPOSE.md`](../../propose/completed/SCHEMA-V2-PROPOSE.md). Depends on: - [`propose/HINTS-V3-PROPOSE.md`](../propose/HINTS-V3-PROPOSE.md) **merged to `master`** before **PR-A** implementation starts (SCHEMA-V2 Decision 30; GitHub PR may stay `draft`). - [`docs/PROPOSES-ORDER.md`](../docs/PROPOSES-ORDER.md) for lock/merge sequence across proposes and code PRs. -- **PR-D (hints v3)** is specified in [`plans/PLAN-HINTS-V3.md`](./PLAN-HINTS-V3.md) — not repeated here beyond overview/tracking. +- **PR-D (hints v3)** is specified in [`plans/PLAN-HINTS-V3.md`](../PLAN-HINTS-V3.md) — not repeated here beyond overview/tracking. ## Goal @@ -34,7 +34,7 @@ Depends on: | PR-A | `EDGE_SCHEMA`, doc generator, CI invariants, `BROWNFIELD_RESOLVER_STRATEGY_SET`, v14 bump (pre-flip DDL) | **13 → 14** | `typical_traversals` map shape (hints PR-A contract); strategy-set union completeness; v13 refusal gate timing | `test_schema_consistency.py`, `test_edge_navigation_doc.py`, ontology gate | PR-B/C/D | | PR-B | `HTTP_CALLS` flip + downstream Cypher/API + HTTP docs | Uses v14 (no second bump) | `grep` completeness for `HTTP_CALLS`; `HttpCallRow` key change; MCP/PR-analysis two-hop queries | `test_call_edges_e2e.py`, `test_kuzu_queries.py`, `test_pr_analysis.py`, brownfield HTTP | PR-C/D | | PR-C | `Producer`, `DECLARES_PRODUCER`, `ASYNC_CALLS` flip, GraphMeta, MCP producer parity, describe rollups, async docs | Uses v14 | Producer field grounding vs `Client` copy-paste; pass5 materialization order; `find`/`resolve` kind union expansion | `test_call_edges_e2e.py`, `test_brownfield_clients.py`, `test_mcp_v2.py`, `test_client_node_extraction.py`, describe rollups | PR-D | -| PR-D | Hints v3 (empty `neighbors`) | **No** (query-time) | See [`plans/PLAN-HINTS-V3.md`](./PLAN-HINTS-V3.md) | `test_mcp_hints.py` HV* | PR-A/B/C | +| PR-D | Hints v3 (empty `neighbors`) | **No** (query-time) | See [`plans/PLAN-HINTS-V3.md`](../PLAN-HINTS-V3.md) | `test_mcp_hints.py` HV* | PR-A/B/C | **Landing order:** **PR-A → PR-B → PR-C → PR-D** (PR-D plan is separate; no parallel code PRs). @@ -358,7 +358,7 @@ with every hit accounted for (fixed or justified). - `Consumer` node, `NODE_SCHEMA`, DDL codegen from `EDGE_SCHEMA`. - Multi-target Client/Producer nodes; materialized composite edges. -- Hints v3 implementation (PR-D — [`plans/PLAN-HINTS-V3.md`](./PLAN-HINTS-V3.md)). +- Hints v3 implementation (PR-D — [`plans/PLAN-HINTS-V3.md`](../PLAN-HINTS-V3.md)). - Ontology **15** or second re-index. - Ranking / incremental rebuild proposes (`RANKING-MICROSERVICE`, `TIER2-INCREMENTAL-REBUILD`). - Special-casing `tests/bank-chat-system/` in production code. @@ -370,15 +370,13 @@ with every hit accounted for (fixed or justified). 3. `find_route_callers` / `trace_request_flow` / PR-analysis / impact analysis use caller-side two-hop traversals. 4. MCP `find`/`resolve` support `producer`; type-level `describe` exposes DECLARES_PRODUCER rollups. 5. PR-D (hints v3) landed per HINTS plan — empty wrong-kind `neighbors` queries are guided. -6. `propose/SCHEMA-V2-PROPOSE.md` moved to `propose/completed/` when **PR-D** merges (whole user-visible effort done). +6. `propose/completed/SCHEMA-V2-PROPOSE.md` and this plan moved to `completed/` after PR-C (PR-D tracked separately). # Tracking -- Artefacts (`PLAN-SCHEMA-V2`, `CURSOR-PROMPTS-SCHEMA-V2`, `PLAN-HINTS-V3`, `CURSOR-PROMPTS-HINTS-V3`): _pending_ -- `PR-A`: _pending_ -- `PR-B`: _pending_ -- `PR-C`: _pending_ -- `PR-D`: _see PLAN-HINTS-V3_ +- Artefacts (`PLAN-SCHEMA-V2`, `CURSOR-PROMPTS-SCHEMA-V2`): **done** (this directory) +- `PR-A` / `PR-B` / `PR-C`: **landed** +- `PR-D`: _see [`plans/PLAN-HINTS-V3.md`](../PLAN-HINTS-V3.md)_ ## Cursor handoff diff --git a/propose/HINTS-V3-PROPOSE.md b/propose/HINTS-V3-PROPOSE.md index ea2dad3..0f01990 100644 --- a/propose/HINTS-V3-PROPOSE.md +++ b/propose/HINTS-V3-PROPOSE.md @@ -6,7 +6,7 @@ ## TL;DR -- Replace the single generic empty-neighbors template `TPL_NEIGHBORS_EMPTY_KIND_CHECK = "0 results — check if the requested edge_types apply to this kind"` with a small family of kind- and direction-aware templates driven by `EDGE_SCHEMA` (introduced in `propose/SCHEMA-V2-PROPOSE.md` §3.4). +- Replace the single generic empty-neighbors template `TPL_NEIGHBORS_EMPTY_KIND_CHECK = "0 results — check if the requested edge_types apply to this kind"` with a small family of kind- and direction-aware templates driven by `EDGE_SCHEMA` (introduced in `propose/completed/SCHEMA-V2-PROPOSE.md` §3.4). - Each template fires by inspecting the subject node kind, the requested `direction`, and the requested `edge_types` against `EDGE_SCHEMA[edge].src` / `.dst` / `.typical_traversals` — no hardcoded edge-shape literals in `mcp_hints.py`. - New emit-side input: hints v3 reads `EdgeSpec.brownfield_resolver_sourced` (backed by `BROWNFIELD_RESOLVER_STRATEGY_SET` from SCHEMA-V2 PR-A) to fire a distinct *"absence may mean unresolved, not absent"* hint on empty results. That complements (does not replace) the v2 `TPL_NEIGHBORS_FUZZY_STRATEGY` hint on non-empty results. - **Propose gate** (SCHEMA-V2 Decision 30): merged to `master` ([#154](https://github.com/HumanBean17/java-codebase-rag/pull/154)) before SCHEMA-V2 PR-A. **Code** ships in SCHEMA-V2 PR-D after PR-A–C are on `master`. @@ -278,6 +278,6 @@ Each row references the SCHEMA-V2 use-case re-walk (§4 of that propose) where a **Cross-propose references**: -- `propose/SCHEMA-V2-PROPOSE.md` §3.4, §3.11, Decision 28–30, PR-D §6 (locked via #151). +- `propose/completed/SCHEMA-V2-PROPOSE.md` §3.4, §3.11, Decision 28–30, PR-D §6 (locked via #151). - `propose/completed/HINTS-V2-PROPOSE.md` — fuzzy hint, dot-key invariant (unchanged). - `propose/completed/HINTS-ROAD-SIGNS-PROPOSE.md` — v1 catalogue except `TPL_NEIGHBORS_EMPTY_KIND_CHECK` deleted. diff --git a/propose/SCHEMA-V2-PROPOSE.md b/propose/completed/SCHEMA-V2-PROPOSE.md similarity index 98% rename from propose/SCHEMA-V2-PROPOSE.md rename to propose/completed/SCHEMA-V2-PROPOSE.md index 2f4c628..81d52df 100644 --- a/propose/SCHEMA-V2-PROPOSE.md +++ b/propose/completed/SCHEMA-V2-PROPOSE.md @@ -1,6 +1,6 @@ # SCHEMA-V2 — edges connect the nodes the edge is about (HTTP_CALLS, ASYNC_CALLS, Producer node, canonical Edge Navigation Schema) -**Status**: locked — implementing (plan: [`plans/PLAN-SCHEMA-V2.md`](../plans/PLAN-SCHEMA-V2.md); code PR-A–D in flight; move to `propose/completed/` when PR-D lands) +**Status**: **completed** — PR-A/B/C landed; PR-D (hints v3) tracked in [`plans/PLAN-HINTS-V3.md`](../../plans/PLAN-HINTS-V3.md). Plan: [`plans/completed/PLAN-SCHEMA-V2.md`](../../plans/completed/PLAN-SCHEMA-V2.md). **Author**: Dmitriy Teriaev + Computer **Date**: 2026-05-16 @@ -155,7 +155,7 @@ Each DDL string is asserted against `EDGE_SCHEMA[...]` in `tests/test_schema_con - **`HttpCallRow` keys by `client_id`** instead of `caller_symbol_id`; **`AsyncCallRow` keys by `producer_id`** instead of `caller_symbol_id`. Multiple clients (or producers) on the same method fan out at the caller-side node, not the Symbol. - **`DECLARES_CLIENT` and `DECLARES_PRODUCER` edges** emit at the same point clients/producers are materialized, from the declaring method Symbol. -Plan-level details (exact field renames, fixture migration, edge-emit ordering) live in `plans/PLAN-SCHEMA-V2.md`, not here. +Plan-level details (exact field renames, fixture migration, edge-emit ordering) live in `plans/completed/PLAN-SCHEMA-V2.md`, not here. ### §3.5 — `docs/EDGE-NAVIGATION.md` generator @@ -318,7 +318,7 @@ Full design in `propose/HINTS-V3-PROPOSE.md` (separate). PR-D in §6 ships the i ## §6 — Migration plan — 4 PRs -**Merge gate**: `plans/PLAN-SCHEMA-V2.md` and `plans/CURSOR-PROMPTS-SCHEMA-V2.md` must exist (separate PRs or commits) before PR-A merges. The propose answers what/why; the plan enumerates per-PR file paths, exact signatures, and grep-enumeration contracts. (Decision 29.) +**Merge gate**: `plans/completed/PLAN-SCHEMA-V2.md` and `plans/completed/CURSOR-PROMPTS-SCHEMA-V2.md` must exist (separate PRs or commits) before PR-A merges. The propose answers what/why; the plan enumerates per-PR file paths, exact signatures, and grep-enumeration contracts. (Decision 29.) ### PR-A — `EDGE_SCHEMA` + doc generator + CI invariants + `ONTOLOGY_VERSION` bump (no DDL flips) @@ -369,7 +369,7 @@ Full design in `propose/HINTS-V3-PROPOSE.md` (separate). PR-D in §6 ships the i 11. **`typical_traversals` are rendered into both doc and hint engine.** Source of truth for "what's the right way to traverse this edge." 12. **`@CodebaseConsumer` is out of scope.** No such annotation exists today; if one is added, it's a separate propose. 13. **Multi-client / multi-producer methods fan out at the caller-side node.** No Symbol-level collapsing. -14. **Pass keys `HttpCallRow` by `client_id` and `AsyncCallRow` by `producer_id`.** Plan-level details in `plans/PLAN-SCHEMA-V2.md`. +14. **Pass keys `HttpCallRow` by `client_id` and `AsyncCallRow` by `producer_id`.** Plan-level details in `plans/completed/PLAN-SCHEMA-V2.md`. 15. **Caller queries become two-hop for both HTTP and async.** `MATCH (s:Symbol)-[:DECLARES_CLIENT]->(c:Client)-[:HTTP_CALLS]->(r:Route)` and `MATCH (s:Symbol)-[:DECLARES_PRODUCER]->(p:Producer)-[:ASYNC_CALLS]->(r:Route)`. No convenience view in v2. 16. **`describe(c:…)` and `describe(p:…)` `edge_summary` now show non-zero out-edges.** No `describe` code change; data just becomes accurate. 17. **Hints v3 (PR-D) is gated on PR-A landing.** If PR-A reverts, PR-D reverts.