Skip to content

Commit 00e718d

Browse files
authored
mcp: split mz_mcp_data_products into lightweight discovery and detailed views (#35747)
Fixes: https://linear.app/materializeinc/issue/DEX-18/mcp-benchmark-agents-endpoint-and-optimize-mz-mcp-data-products-view Splitting the `mz_mcp_data_products` into two views to improve discovery performance: - `mz_mcp_data_products`: just a lightweight view returning just name, cluster, and description (fewer joins, no JSON aggregation) - `mz_mcp_data_product_details`: the full view with JSON Schema column for agents that need column-level detail The `get_data_products` tool now hits the lightweight view, while `get_data_product_details` queries the full view. Local benchmarks at 1500 data products show ~25% improvement on the discovery path. Tagging @ggevay in case that you have some suggestions for further optimizations to those 2 builtin views 🙏
1 parent 9a22527 commit 00e718d

9 files changed

Lines changed: 88 additions & 12 deletions

File tree

doc/user/content/reference/system-catalog/mz_internal.md

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -542,16 +542,30 @@ each materialized view with a refresh strategy other than `on-commit`.
542542

543543
## `mz_mcp_data_products`
544544

545-
The `mz_mcp_data_products` view exposes data products (indexed materialized views)
546-
available through the Model Context Protocol (MCP) server. Each data product
547-
represents a queryable dataset with a defined schema.
545+
The `mz_mcp_data_products` view lists data products (i.e., indexed materialized
546+
views) that are available through the Model Context Protocol (MCP) server and
547+
that the current user can access. This is a lightweight discovery view. Use
548+
[`mz_mcp_data_product_details`](#mz_mcp_data_product_details) for full column
549+
schema information.
548550

549551
<!-- RELATION_SPEC mz_internal.mz_mcp_data_products -->
550552
| Field | Type | Meaning |
551553
| ------------- | -------- | ---------------------------------------------------------------------------------------- |
552554
| `object_name` | [`text`] | Fully qualified object name (database.schema.name). |
553555
| `cluster` | [`text`] | Cluster where the index is hosted. |
554556
| `description` | [`text`] | Index comment (used as data product description). |
557+
558+
## `mz_mcp_data_product_details`
559+
560+
The `mz_mcp_data_product_details` view extends [`mz_mcp_data_products`](#mz_mcp_data_products)
561+
with a JSON Schema describing each data product's columns and types.
562+
563+
<!-- RELATION_SPEC mz_internal.mz_mcp_data_product_details -->
564+
| Field | Type | Meaning |
565+
| ------------- | -------- | ---------------------------------------------------------------------------------------- |
566+
| `object_name` | [`text`] | Fully qualified object name (database.schema.name). |
567+
| `cluster` | [`text`] | Cluster where the index is hosted. |
568+
| `description` | [`text`] | Index comment (used as data product description). |
555569
| `schema` | [`jsonb`]| JSON Schema describing the object's columns and types. |
556570

557571
## `mz_object_dependencies`

src/catalog/src/builtin.rs

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11506,15 +11506,60 @@ FROM
1150611506
access: vec![PUBLIC_SELECT],
1150711507
});
1150811508

11509-
/// Data products exposed via MCP (Model Context Protocol) for AI agents.
11509+
/// Lightweight data product discovery for MCP (Model Context Protocol).
1151011510
///
11511-
/// This view discovers indexes with comments that can be used as "data products"
11512-
/// for AI agent access. Only indexes with SELECT privilege and cluster USAGE
11513-
/// privilege are included.
11511+
/// Lists indexed views with comments that the current user has privileges on.
11512+
/// Used by the `get_data_products` and `read_data_product` MCP tools.
11513+
/// Does not include schema details — use `mz_mcp_data_product_details` for that.
1151411514
pub static MZ_MCP_DATA_PRODUCTS: LazyLock<BuiltinView> = LazyLock::new(|| BuiltinView {
1151511515
name: "mz_mcp_data_products",
1151611516
schema: MZ_INTERNAL_SCHEMA,
1151711517
oid: oid::VIEW_MZ_MCP_DATA_PRODUCTS_OID,
11518+
desc: RelationDesc::builder()
11519+
.with_column("object_name", SqlScalarType::String.nullable(false))
11520+
.with_column("cluster", SqlScalarType::String.nullable(false))
11521+
.with_column("description", SqlScalarType::String.nullable(true))
11522+
.with_key(vec![0, 1, 2])
11523+
.finish(),
11524+
column_comments: BTreeMap::from_iter([
11525+
(
11526+
"object_name",
11527+
"Fully qualified object name (database.schema.name).",
11528+
),
11529+
("cluster", "Cluster where the index is hosted."),
11530+
(
11531+
"description",
11532+
"Index comment (used as data product description).",
11533+
),
11534+
]),
11535+
sql: r#"
11536+
SELECT DISTINCT
11537+
'"' || op.database || '"."' || op.schema || '"."' || op.name || '"' AS object_name,
11538+
c.name AS cluster,
11539+
cts.comment AS description
11540+
FROM mz_internal.mz_show_my_object_privileges op
11541+
JOIN mz_objects o ON op.name = o.name AND op.object_type = o.type
11542+
JOIN mz_schemas s ON s.name = op.schema AND s.id = o.schema_id
11543+
JOIN mz_databases d ON d.name = op.database AND d.id = s.database_id
11544+
JOIN mz_indexes i ON i.on_id = o.id
11545+
JOIN mz_clusters c ON c.id = i.cluster_id
11546+
JOIN mz_internal.mz_show_my_cluster_privileges cp ON cp.name = c.name
11547+
LEFT JOIN mz_internal.mz_comments cts ON cts.id = i.id AND cts.object_sub_id IS NULL
11548+
WHERE op.privilege_type = 'SELECT'
11549+
AND cp.privilege_type = 'USAGE'
11550+
"#,
11551+
access: vec![PUBLIC_SELECT],
11552+
});
11553+
11554+
/// Full data product details with JSON Schema for MCP agents.
11555+
///
11556+
/// Extends `mz_mcp_data_products` with column types, index keys, and column
11557+
/// comments, formatted as a JSON Schema object. Used by the
11558+
/// `get_data_product_details` MCP tool.
11559+
pub static MZ_MCP_DATA_PRODUCT_DETAILS: LazyLock<BuiltinView> = LazyLock::new(|| BuiltinView {
11560+
name: "mz_mcp_data_product_details",
11561+
schema: MZ_INTERNAL_SCHEMA,
11562+
oid: oid::VIEW_MZ_MCP_DATA_PRODUCT_DETAILS_OID,
1151811563
desc: RelationDesc::builder()
1151911564
.with_column("object_name", SqlScalarType::String.nullable(false))
1152011565
.with_column("cluster", SqlScalarType::String.nullable(false))
@@ -14482,6 +14527,7 @@ pub static BUILTINS_STATIC: LazyLock<Vec<Builtin<NameReference>>> = LazyLock::ne
1448214527
Builtin::ContinualTask(&MZ_WALLCLOCK_LAG_HISTORY_CT),
1448314528
Builtin::View(&MZ_INDEX_ADVICE),
1448414529
Builtin::View(&MZ_MCP_DATA_PRODUCTS),
14530+
Builtin::View(&MZ_MCP_DATA_PRODUCT_DETAILS),
1448514531
]);
1448614532

1448714533
builtins.extend(notice::builtins());

src/environmentd/src/http/mcp.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,11 @@ use crate::http::AuthedClient;
4545
use crate::http::sql::{SqlRequest, SqlResponse, SqlResult, execute_request};
4646

4747
// To add a new tool: add entry to tools/list, add handler function, add dispatch case.
48+
// Discovery uses the lightweight view (no JSON schema computation).
4849
const DISCOVERY_QUERY: &str = "SELECT * FROM mz_internal.mz_mcp_data_products";
50+
// Details uses the full view with JSON schema.
51+
const DETAILS_QUERY_PREFIX: &str =
52+
"SELECT * FROM mz_internal.mz_mcp_data_product_details WHERE object_name = ";
4953

5054
/// MCP request errors, mapped to JSON-RPC error codes.
5155
#[derive(Debug, Error)]
@@ -660,10 +664,7 @@ async fn get_data_product_details(
660664
) -> Result<McpResult, McpRequestError> {
661665
debug!(name = %name, "Executing get_data_product_details");
662666

663-
let query = format!(
664-
"SELECT * FROM mz_internal.mz_mcp_data_products WHERE object_name = {}",
665-
escaped_string_literal(name)
666-
);
667+
let query = format!("{}{}", DETAILS_QUERY_PREFIX, escaped_string_literal(name));
667668

668669
let rows = execute_sql(client, &query).await?;
669670

src/pgrepr-consts/src/oid.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,3 +788,4 @@ pub const SOURCE_MZ_CATALOG_RAW_OID: u32 = 17067;
788788
pub const LOG_MZ_CLUSTER_PROMETHEUS_METRICS_OID: u32 = 17068;
789789
pub const FUNC_PARSE_CATALOG_ID_OID: u32 = 17069;
790790
pub const FUNC_PARSE_CATALOG_PRIVILEGES_OID: u32 = 17070;
791+
pub const VIEW_MZ_MCP_DATA_PRODUCT_DETAILS_OID: u32 = 17071;

test/sqllogictest/autogenerated/mz_internal.slt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,13 @@ SELECT name, type, comment FROM objects WHERE schema = 'mz_internal' AND object
319319
object_name text Fully␠qualified␠object␠name␠(database.schema.name).
320320
cluster text Cluster␠where␠the␠index␠is␠hosted.
321321
description text Index␠comment␠(used␠as␠data␠product␠description).
322+
323+
query TTT
324+
SELECT name, type, comment FROM objects WHERE schema = 'mz_internal' AND object = 'mz_mcp_data_product_details' ORDER BY position
325+
----
326+
object_name text Fully␠qualified␠object␠name␠(database.schema.name).
327+
cluster text Cluster␠where␠the␠index␠is␠hosted.
328+
description text Index␠comment␠(used␠as␠data␠product␠description).
322329
schema jsonb JSON␠Schema␠describing␠the␠object's␠columns␠and␠types.
323330

324331
query TTT
@@ -751,6 +758,7 @@ mz_materialization_dependencies
751758
mz_materialization_lag
752759
mz_materialized_view_refresh_strategies
753760
mz_materialized_view_refreshes
761+
mz_mcp_data_product_details
754762
mz_mcp_data_products
755763
mz_mysql_source_tables
756764
mz_network_policies

test/sqllogictest/information_schema_tables.slt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,10 @@ mz_materialized_view_refreshes
417417
SOURCE
418418
materialize
419419
mz_internal
420+
mz_mcp_data_product_details
421+
VIEW
422+
materialize
423+
mz_internal
420424
mz_mcp_data_products
421425
VIEW
422426
materialize

test/sqllogictest/mz_catalog_server_index_accounting.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ mz_message_batch_counts_received_raw_s2_primary_idx CREATE␠INDEX␠"mz_messag
7979
mz_message_batch_counts_sent_raw_s2_primary_idx CREATE␠INDEX␠"mz_message_batch_counts_sent_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_message_batch_counts_sent_raw"␠("channel_id",␠"from_worker_id",␠"to_worker_id")
8080
mz_message_counts_received_raw_s2_primary_idx CREATE␠INDEX␠"mz_message_counts_received_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_message_counts_received_raw"␠("channel_id",␠"from_worker_id",␠"to_worker_id")
8181
mz_message_counts_sent_raw_s2_primary_idx CREATE␠INDEX␠"mz_message_counts_sent_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_message_counts_sent_raw"␠("channel_id",␠"from_worker_id",␠"to_worker_id")
82-
mz_notices_ind CREATE␠INDEX␠"mz_notices_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s803␠AS␠"mz_internal"."mz_notices"]␠("id")
82+
mz_notices_ind CREATE␠INDEX␠"mz_notices_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s804␠AS␠"mz_internal"."mz_notices"]␠("id")
8383
mz_object_dependencies_ind CREATE␠INDEX␠"mz_object_dependencies_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s465␠AS␠"mz_internal"."mz_object_dependencies"]␠("object_id")
8484
mz_object_history_ind CREATE␠INDEX␠"mz_object_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s534␠AS␠"mz_internal"."mz_object_history"]␠("id")
8585
mz_object_lifetimes_ind CREATE␠INDEX␠"mz_object_lifetimes_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s535␠AS␠"mz_internal"."mz_object_lifetimes"]␠("id")

test/sqllogictest/oid.slt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1178,3 +1178,4 @@ SELECT oid, name FROM mz_objects WHERE id LIKE 's%' AND oid < 20000 ORDER BY oid
11781178
17068 mz_cluster_prometheus_metrics
11791179
17069 parse_catalog_id
11801180
17070 parse_catalog_privileges
1181+
17071 mz_mcp_data_product_details

test/testdrive/catalog.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,7 @@ mz_hydration_statuses ""
644644
mz_index_advice ""
645645
mz_materialization_dependencies ""
646646
mz_materialization_lag ""
647+
mz_mcp_data_product_details ""
647648
mz_mcp_data_products ""
648649
mz_notices ""
649650
mz_notices_redacted ""

0 commit comments

Comments
 (0)