Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ Unresolved targets become **phantom** nodes (`resolved=false`, FQN guessed from
| `DECLARES_CLIENT` | type → client | Type declares an outbound call site. |
| `CALLS` | method → method | In-process call (confidence-scored, strategy-tagged). |
| `EXPOSES` | type → route | Type exposes an HTTP/async route. |
| `HTTP_CALLS` | symbol → route | Cross-service HTTP call (caller-side). |
| `HTTP_CALLS` | client → route | Cross-service HTTP call (caller-side Client to target Route). |
| `ASYNC_CALLS` | symbol → route | Cross-service async (Kafka, Rabbit, JMS, …). |

JDK / Spring / Lombok callees are represented as **phantom** method symbols at index time. Caller/callee traversals default to `exclude_external=true` so those edges are filtered by FQN prefix without dropping them from the graph.
Expand Down Expand Up @@ -426,7 +426,7 @@ Resolution order for `microservice`:

Current ontology version is **14**. Any index built before this version must be rebuilt via `cocoindex update ... --full-reprocess -f` or a full `java-codebase-rag reprocess` (no selective flags) so vectors and graph stay aligned. Until re-indexed, the server defensively JSON-decodes string-form list columns so nothing explodes, but filters like `array_contains` will not work.

Ontology **14** introduces `EDGE_SCHEMA` in `java_ontology.py` as the canonical edge navigation schema (see `docs/EDGE-NAVIGATION.md`). **This PR-A bump alone does not flip `HTTP_CALLS` / `ASYNC_CALLS` endpoints** — graphs rebuilt at v14 still use `Symbol → Route` for those edges until SCHEMA-V2 PR-B/C land. **PR-B** flips `HTTP_CALLS` to `Client → Route`; **PR-C** adds the `Producer` node, `DECLARES_PRODUCER`, and flips `ASYNC_CALLS` to `Producer → Route`. Run one full reprocess after upgrading through the SCHEMA-V2 sequence (or when you need the v14 ontology gate).
Ontology **14** introduces `EDGE_SCHEMA` in `java_ontology.py` as the canonical edge navigation schema (see `docs/EDGE-NAVIGATION.md`). **`HTTP_CALLS` is `Client → Route`** (SCHEMA-V2 PR-B). **`ASYNC_CALLS` remains `Symbol → Route` until PR-C**, which adds the `Producer` node, `DECLARES_PRODUCER`, and flips `ASYNC_CALLS` to `Producer → Route`. Run one full reprocess after upgrading through the SCHEMA-V2 sequence (or when you need the v14 ontology gate).

Ontology **13** materializes stored `OVERRIDES` edges between method Symbols (subtype override → supertype declaration, matching `signature` on a direct `IMPLEMENTS` / `EXTENDS` hop). `neighbors(edge_types=["OVERRIDES"])` traverses this relationship; `OVERRIDDEN_BY*` keys in `edge_summary` remain describe-time rollups only.

Expand Down
17 changes: 9 additions & 8 deletions build_ast_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ class RouteExtractionStats:

@dataclass
class HttpCallRow:
symbol_id: str
client_id: str
route_id: str
confidence: float
strategy: str
Expand Down Expand Up @@ -1690,14 +1690,14 @@ def _phantom_async_route_id(call: OutgoingCallDecl) -> str:
source_layer="builtin",
)
)
key = (member.node_id, rid)
key = (cid, rid)
if key in http_seen:
continue
http_seen.add(key)
conf = call.confidence_base * 0.3 * micro_factor
tables.http_call_rows.append(
HttpCallRow(
symbol_id=member.node_id,
client_id=cid,
route_id=rid,
confidence=conf,
strategy=strategy,
Expand Down Expand Up @@ -1947,7 +1947,8 @@ def _micro_factor(member: MemberEntry | None) -> float:
for row in tables.http_call_rows:
if row.match != "unresolved":
continue
member = member_by_id.get(row.symbol_id)
client = clients_by_id.get(row.client_id)
member = member_by_id.get(client.member_id) if client else None
base = row.confidence / max(1e-9, (0.3 * _micro_factor(member)))
src_route = route_by_id.get(row.route_id)
if src_route is None and member is not None:
Expand Down Expand Up @@ -2202,7 +2203,7 @@ def _micro_factor(member: MemberEntry | None) -> float:
"confidence DOUBLE, strategy STRING)"
)
_SCHEMA_HTTP_CALLS = (
"CREATE REL TABLE HTTP_CALLS(FROM Symbol TO Route, "
"CREATE REL TABLE HTTP_CALLS(FROM Client TO Route, "
"confidence DOUBLE, strategy STRING, "
"method_call STRING, raw_uri STRING, match STRING)"
)
Expand Down Expand Up @@ -2402,8 +2403,8 @@ def _write_nodes(
"CREATE (s)-[:DECLARES_CLIENT {confidence: $confidence, strategy: $strategy}]->(c)"
)
_CREATE_HTTP_CALL = (
"MATCH (s:Symbol {id: $sid}), (r:Route {id: $rid}) "
"CREATE (s)-[:HTTP_CALLS {confidence: $confidence, strategy: $strategy, "
"MATCH (c:Client {id: $cid}), (r:Route {id: $rid}) "
"CREATE (c)-[:HTTP_CALLS {confidence: $confidence, strategy: $strategy, "
"method_call: $method_call, raw_uri: $raw_uri, match: $match}]->(r)"
)
_CREATE_ASYNC_CALL = (
Expand Down Expand Up @@ -2543,7 +2544,7 @@ def _write_routes_and_exposes(conn: kuzu.Connection, tables: GraphTables) -> Non
})
for row in tables.http_call_rows:
conn.execute(_CREATE_HTTP_CALL, {
"sid": row.symbol_id,
"cid": row.client_id,
"rid": row.route_id,
"confidence": row.confidence,
"strategy": row.strategy,
Expand Down
8 changes: 4 additions & 4 deletions docs/AGENT-GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
>
> Calibrated against ontology version **14** (see `ast_java.ONTOLOGY_VERSION` /
> `java_ontology.EDGE_SCHEMA` + valid sets): canonical edge navigation schema in
> `docs/EDGE-NAVIGATION.md`. v14 re-index required; PR-B flips `HTTP_CALLS` to
> `Client → Route`; PR-C adds `Producer` + `DECLARES_PRODUCER` and flips `ASYNC_CALLS`.
> `docs/EDGE-NAVIGATION.md`. v14 re-index required; `HTTP_CALLS` is `Client → Route`;
> PR-C adds `Producer` + `DECLARES_PRODUCER` and flips `ASYNC_CALLS`.
> Still includes stored `OVERRIDES` Symbol→Symbol edges and v12 HTTP brownfield
> (`@CodebaseHttpClient`, shared `CodebaseHttpMethod` enum, inbound layer-C HTTP routes
> replace same-method built-in rows). **Design rationale:** navigation surface and tools —
Expand Down Expand Up @@ -96,7 +96,7 @@ Use these strings **verbatim** in `neighbors(..., edge_types=[...])`:
| Method overrides | `OVERRIDES` | Subtype **method** → supertype **declaration** method (same `signature`, one `IMPLEMENTS`/`EXTENDS` hop). `in` = overriders; `out` = overridden declarations |
| Method calls | `CALLS` | `in` = callers; `out` = callees |
| Service boundary | `EXPOSES` | Symbol → Route (handler exposes route) |
| Cross-service | `HTTP_CALLS`, `ASYNC_CALLS` | Symbol → Route across services |
| Cross-service | `HTTP_CALLS`, `ASYNC_CALLS` | `HTTP_CALLS`: Client → Route; `ASYNC_CALLS`: Symbol → Route until SCHEMA-V2 PR-C (`Producer` node) |

Symmetric: cross-service and intra-service questions use the **same** `neighbors` call with different `edge_types`.

Expand Down Expand Up @@ -263,7 +263,7 @@ Virtual keys (`OVERRIDDEN_BY`, …) and composed dot-keys are **not** valid `Edg

### Ontology glossary (version 14)

Source of truth: `java_ontology.py` (`EDGE_SCHEMA`, valid sets). Strings are case-sensitive. Edge navigation: [`docs/EDGE-NAVIGATION.md`](./EDGE-NAVIGATION.md) — use `*_current` traversal keys for `HTTP_CALLS` / `ASYNC_CALLS` until SCHEMA-V2 PR-B/C flip endpoints.
Source of truth: `java_ontology.py` (`EDGE_SCHEMA`, valid sets). Strings are case-sensitive. Edge navigation: [`docs/EDGE-NAVIGATION.md`](./EDGE-NAVIGATION.md) — for `HTTP_CALLS`, traverse via `DECLARES_CLIENT` from a method Symbol or `neighbors` outbound from a Client id; `ASYNC_CALLS` still uses `*_current` member traversals until SCHEMA-V2 PR-C.

**Roles:** `CONTROLLER`, `SERVICE`, `REPOSITORY`, `COMPONENT`, `CONFIG`, `ENTITY`, `CLIENT`, `MAPPER`, `DTO`, `OTHER`.

Expand Down
11 changes: 5 additions & 6 deletions docs/EDGE-NAVIGATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
| CALLS | Symbol | Symbol | many_to_many | yes | yes |
| EXPOSES | Symbol | Route | one_to_one | yes | yes |
| DECLARES_CLIENT | Symbol | Client | one_to_many | yes | yes |
| HTTP_CALLS | Symbol | Route | many_to_many | yes | no |
| HTTP_CALLS | Client | Route | many_to_many | yes | no |
| ASYNC_CALLS | Symbol | Route | many_to_many | yes | no |

## EXTENDS
Expand Down Expand Up @@ -185,12 +185,12 @@

## HTTP_CALLS

**Endpoints**: `Symbol → Route`
**Endpoints**: `Client → Route`
**Cardinality**: `many_to_many`
**Brownfield-resolver-sourced**: yes
**Member-only** (hints): no

**Purpose**: resolved HTTP call from declaring method to target route (pre-flip: Symbol→Route; PR-B: Client→Route)
**Purpose**: resolved HTTP call from a declared Client to a target route

**Attributes**:

Expand All @@ -202,11 +202,10 @@

**Typical traversals**:

- `type_subject_current`: neighbors(['{id}'],'out',['DECLARES']) then neighbors(member_ids,'out',['HTTP_CALLS'])
- `type_subject`: neighbors(['{id}'],'out',['DECLARES']) then neighbors(member_ids,'out',['DECLARES_CLIENT']) then neighbors(client_ids,'out',['HTTP_CALLS'])
- `member_subject_current`: neighbors(['{id}'],'out',['HTTP_CALLS'])
- `member_subject`: neighbors(['{id}'],'out',['DECLARES_CLIENT']) then neighbors(client_ids,'out',['HTTP_CALLS'])
- `alien_subject`: HTTP_CALLS is Symbol→Route until PR-B; use member_subject_current. After PR-B (Client→Route), use member_subject via DECLARES_CLIENT
- `route_subject`: neighbors(['{id}'],'in',['HTTP_CALLS']) then neighbors(client_ids,'in',['DECLARES_CLIENT']) for declaring method
- `alien_subject`: HTTP_CALLS connects Client→Route; use DECLARES_CLIENT from a method Symbol, or neighbors(client_id,'out',['HTTP_CALLS']) from a Client id

## ASYNC_CALLS

Expand Down
8 changes: 4 additions & 4 deletions docs/skills/java-codebase-explore.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ You cannot reason reliably about cross-service behaviour until these surfaces ex
**Sequence:**

1. Cluster routes by path prefix; **`describe`** on representative `route:` ids.
2. For each major route, **`neighbors(direction="in", edge_types=["EXPOSES"])`** (and `HTTP_CALLS` / `ASYNC_CALLS` when tracing callers) to land on handler symbols; then outbound `CALLS` as needed.
3. Use **`find(kind="client", …)`** with the same microservice filter to list outbound integration points; follow **`HTTP_CALLS` / `ASYNC_CALLS`** edges when present.
2. For each major route, **`neighbors(direction="in", edge_types=["EXPOSES"])`** to land on handler symbols; for inbound **`HTTP_CALLS`**, expect **Client** callers (then **`DECLARES_CLIENT` inbound** to the declaring method); **`ASYNC_CALLS`** inbound still lands on Symbol callers until PR-C.
3. Use **`find(kind="client", …)`** with the same microservice filter to list outbound integration points; follow outbound **`HTTP_CALLS`** from each Client (or **`ASYNC_CALLS`** from methods until Producer lands).

**Stopping rule:** You can summarize how traffic enters the service, what modules/controllers own key paths, and what external systems it calls—**without** claiming tests, runtime config, or unindexed siblings exist in MCP.

Expand All @@ -116,7 +116,7 @@ You cannot reason reliably about cross-service behaviour until these surfaces ex
**Sequence:**

1. **`neighbors(direction="in", edge_types=["EXPOSES"])`** onto the handling symbol; walk **`CALLS`** outbound method-by-method.
2. When a method shows outbound HTTP/async, use **`neighbors`** with **`HTTP_CALLS` / `ASYNC_CALLS`** (direction per question) and follow to target routes or async targets.
2. When a method makes outbound HTTP, **`neighbors(..., out, ["DECLARES_CLIENT"])`** then outbound **`HTTP_CALLS`** from each Client id; for async (pre-PR-C), **`ASYNC_CALLS`** may still be direct from the method Symbol.
3. Stop at leaves, framework boundaries, or unresolved edges; read **`edge.attrs`** (`attrs.confidence`, `attrs.strategy`, `attrs.match`) and report low-confidence segments as resolver gaps, not as facts.

**Stopping rule:** You reach a stable leaf (external IO, message publish, clear terminal layer) **or** you document every unresolved hop with a concrete next non-MCP check.
Expand Down Expand Up @@ -243,7 +243,7 @@ Ten edge types:
| Method overrides | `OVERRIDES` |
| Method calls | `CALLS` |
| Service boundary | `EXPOSES` |
| Cross-service | `HTTP_CALLS`, `ASYNC_CALLS` |
| Cross-service | `HTTP_CALLS` (Client→Route), `ASYNC_CALLS` (Symbol→Route until Producer) |

For exact argument shapes, recovery playbook, and slash aliases see
[`docs/AGENT-GUIDE.md`](https://github.com/HumanBean17/java-codebase-rag/blob/master/docs/AGENT-GUIDE.md) in the java-codebase-rag repo.
17 changes: 8 additions & 9 deletions java_ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ class EdgeSpec:
),
"HTTP_CALLS": EdgeSpec(
name="HTTP_CALLS",
src="Symbol",
src="Client",
dst="Route",
cardinality="many_to_many",
brownfield_resolver_sourced=True,
Expand All @@ -314,25 +314,24 @@ class EdgeSpec:
EdgeAttr("raw_uri", "STRING", "uninterpolated URI template from the call site"),
EdgeAttr("match", "STRING", "cross_service|intra_service|ambiguous|phantom|unresolved"),
),
purpose="resolved HTTP call from declaring method to target route (pre-flip: Symbol→Route; PR-B: Client→Route)",
purpose="resolved HTTP call from a declared Client to a target route",
typical_traversals={
"type_subject_current": (
"neighbors(['{id}'],'out',['DECLARES']) "
"then neighbors(member_ids,'out',['HTTP_CALLS'])"
),
"type_subject": (
"neighbors(['{id}'],'out',['DECLARES']) "
"then neighbors(member_ids,'out',['DECLARES_CLIENT']) "
"then neighbors(client_ids,'out',['HTTP_CALLS'])"
),
"member_subject_current": "neighbors(['{id}'],'out',['HTTP_CALLS'])",
"member_subject": (
"neighbors(['{id}'],'out',['DECLARES_CLIENT']) "
"then neighbors(client_ids,'out',['HTTP_CALLS'])"
),
"route_subject": (
"neighbors(['{id}'],'in',['HTTP_CALLS']) "
"then neighbors(client_ids,'in',['DECLARES_CLIENT']) for declaring method"
),
"alien_subject": (
"HTTP_CALLS is Symbol→Route until PR-B; use member_subject_current. "
"After PR-B (Client→Route), use member_subject via DECLARES_CLIENT"
"HTTP_CALLS connects Client→Route; use DECLARES_CLIENT from a method Symbol, "
"or neighbors(client_id,'out',['HTTP_CALLS']) from a Client id"
),
},
),
Expand Down
Loading
Loading