Add aggregate_records DML tool and query-timeout to MCP server#3179
Add aggregate_records DML tool and query-timeout to MCP server#3179
Conversation
Co-authored-by: JerryNixon <1749983+JerryNixon@users.noreply.github.com>
|
@copilot The implementation should include support for $first and $after per the updated spec. Here is a copy (not the original) of that spec in case you cannot navigate to the source issue for this PR. Please ensure it is implemented. What?Allow models to answer: "How many products are there?" and "What is our most expensive product?" Why?These are among the most common information discovery questions, a primary model use case. How?Introduce a new tool: Schema{
"type": "object",
"properties": {
"entity": {
"type": "string",
"description": "Entity name with READ permission.",
"required": true
},
"function": {
"type": "string",
"enum": ["count", "avg", "sum", "min", "max"],
"description": "Aggregation function to apply.",
"required": true
},
"field": {
"type": "string",
"description": "Field to aggregate. Use '*' for count.",
"required": true
},
"distinct": {
"type": "boolean",
"description": "Apply DISTINCT before aggregating.",
"default": false
},
"filter": {
"type": "string",
"description": "OData filter applied before aggregating (WHERE). Example: 'unitPrice lt 10'",
"default": ""
},
"groupby": {
"type": "array",
"items": { "type": "string" },
"description": "Fields to group by, e.g., ['category', 'region']. Grouped field values are included in the response.",
"default": []
},
"orderby": {
"type": "string",
"enum": ["asc", "desc"],
"description": "Sort aggregated results by the computed value. Only applies with groupby.",
"default": "desc"
},
"having": {
"type": "object",
"description": "Filter applied after aggregating on the result (HAVING). Operators are AND-ed together.",
"properties": {
"eq": { "type": "number", "description": "Aggregated value equals." },
"neq": { "type": "number", "description": "Aggregated value not equals." },
"gt": { "type": "number", "description": "Aggregated value greater than." },
"gte": { "type": "number", "description": "Aggregated value greater than or equal." },
"lt": { "type": "number", "description": "Aggregated value less than." },
"lte": { "type": "number", "description": "Aggregated value less than or equal." },
"in": {
"type": "array",
"items": { "type": "number" },
"description": "Aggregated value is in the given list."
}
}
},
"first": {
"type": "integer",
"description": "Maximum number of results to return. Used for pagination. Only applies with groupby.",
"minimum": 1,
"default": null
},
"after": {
"type": "string",
"description": "Cursor for pagination. Returns results after this cursor. Only applies with groupby and first.",
"default": null
}
},
"required": ["entity", "function", "field"]
}
Response Alias ConventionThe aggregated value in the response is always aliased as ExamplesExample 1: "How many products are there?"{
"entity": "Product",
"function": "count",
"field": "*"
}SELECT COUNT(*) AS count
FROM Product;Example output:
Example 2: "What is the average price of products under $10?"{
"entity": "Product",
"function": "avg",
"field": "unitPrice",
"filter": "unitPrice lt 10"
}SELECT AVG(unitPrice) AS avg_unitPrice
FROM Product
WHERE unitPrice < 10;Example output:
Example 3: "Which categories have more than 20 products?"{
"entity": "Product",
"function": "count",
"field": "*",
"groupby": ["categoryName"],
"having": {
"gt": 20
}
}SELECT categoryName, COUNT(*) AS count
FROM Product
GROUP BY categoryName
HAVING COUNT(*) > 20;Example output:
Example 4: "For discontinued products, which categories have a total revenue between $500 and $10,000?"{
"entity": "Product",
"function": "sum",
"field": "unitPrice",
"filter": "discontinued eq true",
"groupby": ["categoryName"],
"having": {
"gte": 500,
"lte": 10000
}
}SELECT categoryName, SUM(unitPrice) AS sum_unitPrice
FROM Product
WHERE discontinued = 1
GROUP BY categoryName
HAVING SUM(unitPrice) >= 500
AND SUM(unitPrice) <= 10000;Example output:
Example 5: "How many distinct suppliers do we have?"{
"entity": "Product",
"function": "count",
"field": "supplierId",
"distinct": true
}SELECT COUNT(DISTINCT supplierId) AS count_supplierId
FROM Product;Example output:
Example 6: "Which categories have exactly 5 or 10 products?"{
"entity": "Product",
"function": "count",
"field": "*",
"groupby": ["categoryName"],
"having": {
"in": [5, 10]
}
}SELECT categoryName, COUNT(*) AS count
FROM Product
GROUP BY categoryName
HAVING COUNT(*) IN (5, 10);Example output:
Example 7: "What is the average distinct unit price per category, for categories averaging over $25?"{
"entity": "Product",
"function": "avg",
"field": "unitPrice",
"distinct": true,
"groupby": ["categoryName"],
"having": {
"gt": 25
}
}SELECT categoryName, AVG(DISTINCT unitPrice) AS avg_unitPrice
FROM Product
GROUP BY categoryName
HAVING AVG(DISTINCT unitPrice) > 25;Example output:
Example 8: "Which categories have the most products?"{
"entity": "Product",
"function": "count",
"field": "*",
"groupby": ["categoryName"],
"orderby": "desc"
}SELECT categoryName, COUNT(*) AS count
FROM Product
GROUP BY categoryName
ORDER BY COUNT(*) DESC;Example output:
Example 9: "What are the cheapest categories by average price?"{
"entity": "Product",
"function": "avg",
"field": "unitPrice",
"groupby": ["categoryName"],
"orderby": "asc"
}SELECT categoryName, AVG(unitPrice) AS avg_unitPrice
FROM Product
GROUP BY categoryName
ORDER BY AVG(unitPrice) ASC;Example output:
Example 10: "For categories with over $500 revenue from discontinued products, which has the highest total?"{
"entity": "Product",
"function": "sum",
"field": "unitPrice",
"filter": "discontinued eq true",
"groupby": ["categoryName"],
"having": {
"gt": 500
},
"orderby": "desc"
}Example 11: "Show me the first 5 categories by product count"{
"entity": "Product",
"function": "count",
"field": "*",
"groupby": ["categoryName"],
"orderby": "desc",
"first": 5
}SELECT categoryName, COUNT(*) AS count
FROM Product
GROUP BY categoryName
ORDER BY COUNT(*) DESC
LIMIT 5;Example output:
Response includes cursor: {
"items": [
{ "categoryName": "Confections", "count": 13 },
{ "categoryName": "Beverages", "count": 12 },
{ "categoryName": "Condiments", "count": 12 },
{ "categoryName": "Seafood", "count": 12 },
{ "categoryName": "Dairy", "count": 10 }
],
"endCursor": "eyJjYXRlZ29yeU5hbWUiOiJEYWlyeSJ9",
"hasNextPage": true
}Example 12: "Show me the next 5 categories"{
"entity": "Product",
"function": "count",
"field": "*",
"groupby": ["categoryName"],
"orderby": "desc",
"first": 5,
"after": "eyJjYXRlZ29yeU5hbWUiOiJEYWlyeSJ9"
}SELECT categoryName, COUNT(*) AS count
FROM Product
GROUP BY categoryName
ORDER BY COUNT(*) DESC
LIMIT 5 OFFSET 5;Example output:
Response includes cursor: {
"items": [
{ "categoryName": "Grains/Cereals", "count": 7 },
{ "categoryName": "Meat/Poultry", "count": 6 },
{ "categoryName": "Produce", "count": 5 }
],
"endCursor": "eyJjYXRlZ29yeU5hbWUiOiJQcm9kdWNlIn0=",
"hasNextPage": false
}Example 13: "Show me the top 3 most expensive categories by average price"{
"entity": "Product",
"function": "avg",
"field": "unitPrice",
"groupby": ["categoryName"],
"orderby": "desc",
"first": 3
}SELECT categoryName, AVG(unitPrice) AS avg_unitPrice
FROM Product
GROUP BY categoryName
ORDER BY AVG(unitPrice) DESC
LIMIT 3;Example output:
SELECT categoryName, SUM(unitPrice) AS sum_unitPrice
FROM Product
WHERE discontinued = 1
GROUP BY categoryName
HAVING SUM(unitPrice) > 500
ORDER BY SUM(unitPrice) DESC;Example output:
|
Co-authored-by: JerryNixon <1749983+JerryNixon@users.noreply.github.com>
souvikghosh04
left a comment
There was a problem hiding this comment.
Yet to complete the review but leaving a note here- main concern is about the performance of aggregation. we should consider testing this with a large dataset and capture some insights.
| List<Dictionary<string, object?>> aggregatedResults = PerformAggregation( | ||
| records, function, field, distinct, groupby, havingOps, havingIn, orderby, alias); |
There was a problem hiding this comment.
Currently, the aggregation seems to happen in-memory, once all records are fetched. this is fine but for very large datasets, we should consider moving the aggregation logic inside database. this might require some changes in the query executor.
Co-authored-by: JerryNixon <1749983+JerryNixon@users.noreply.github.com>
Co-authored-by: JerryNixon <1749983+JerryNixon@users.noreply.github.com>
|
@JerryNixon The model You can try again without specifying a model (just If you want to contact GitHub about this error, please mention the following identifier so they can better serve you: |
souvikghosh04
left a comment
There was a problem hiding this comment.
Posting additional comments
| if (groupbyMapping.Count > 0) | ||
| { | ||
| string direction = orderby.Equals("asc", StringComparison.OrdinalIgnoreCase) ? "ASC" : "DESC"; |
There was a problem hiding this comment.
how does OrderBy work if GroupBy ifgroupbyMapping.Count is 0? it seems OrderBy only works in combination with GroupBy.
There was a problem hiding this comment.
@copilot please take a look at this and implement the fix if needed. respond back once done
| if (first.HasValue) | ||
| { |
There was a problem hiding this comment.
similar to OrderBy, first also seems to be nested inside groupbyMapping.Count > 0 and seems to only work in combination with GroupBy. So without GroupBy first will not be applied?
There was a problem hiding this comment.
@copilot please take a look at this and implement the fix if needed. respond back once done
| } | ||
|
|
||
| // Validate query-timeout if provided | ||
| if (runtimeConfig.Runtime.Mcp.QueryTimeout is not null && runtimeConfig.Runtime.Mcp.QueryTimeout < 1) |
There was a problem hiding this comment.
QueryTimeout should have an upper bound to prevent acccidentally running it inadequately longer.
There was a problem hiding this comment.
@copilot please take a look at this and implement the fix if needed. respond back once done
|
@copilot please go through the review comments and implement the changes.
|
Co-authored-by: souvikghosh04 <210500244+souvikghosh04@users.noreply.github.com>
|
@copilot please try again. please go through the review comments and implement the changes.
|
|
/azp run |
|
Azure Pipelines successfully started running 6 pipeline(s). |
|
/azp run |
|
Azure Pipelines successfully started running 6 pipeline(s). |
|
/azp run |
|
Azure Pipelines successfully started running 6 pipeline(s). |
|
/azp run |
|
Azure Pipelines successfully started running 6 pipeline(s). |
|
/azp run |
|
Azure Pipelines successfully started running 6 pipeline(s). |
Original prompt
This section details on the original issue you should resolve
<issue_title>[Enh]: add
aggregate_recordsDML tool to MCP server</issue_title><issue_description>## What?
Allow models to answer: "How many products are there?" and "What is our most expensive product?"
Why?
These are among the most common information discovery questions, a primary model use case.
How?
Introduce a new tool:
aggregate_recordsthat reuses native GraphQL aggregation capabilities in DAB.Schema
{ "type": "object", "properties": { "entity": { "type": "string", "description": "Entity name with READ permission.", "required": true }, "function": { "type": "string", "enum": ["count", "avg", "sum", "min", "max"], "description": "Aggregation function to apply.", "required": true }, "field": { "type": "string", "description": "Field to aggregate. Use '*' for count.", "required": true }, "distinct": { "type": "boolean", "description": "Apply DISTINCT before aggregating.", "default": false }, "filter": { "type": "string", "description": "OData filter applied before aggregating (WHERE). Example: 'unitPrice lt 10'", "default": "" }, "groupby": { "type": "array", "items": { "type": "string" }, "description": "Fields to group by, e.g., ['category', 'region']. Grouped field values are included in the response.", "default": [] }, "orderby": { "type": "string", "enum": ["asc", "desc"], "description": "Sort aggregated results by the computed value. Only applies with groupby.", "default": "desc" }, "having": { "type": "object", "description": "Filter applied after aggregating on the result (HAVING). Operators are AND-ed together.", "properties": { "eq": { "type": "number", "description": "Aggregated value equals." }, "neq": { "type": "number", "description": "Aggregated value not equals." }, "gt": { "type": "number", "description": "Aggregated value greater than." }, "gte": { "type": "number", "description": "Aggregated value greater than or equal." }, "lt": { "type": "number", "description": "Aggregated value less than." }, "lte": { "type": "number", "description": "Aggregated value less than or equal." }, "in": { "type": "array", "items": { "type": "number" }, "description": "Aggregated value is in the given list." } } } }, "required": ["entity", "function", "field"] }Response Alias Convention
The aggregated value in the response is always aliased as
{function}_{field}. Forcountwith"*", the alias iscount.Examples
Q1: "How many products are there?"
{ "entity": "Product", "function": "count", "field": "*" }Example output:
Q2: "What is the average price of products under $10?"
{ "entity": "Product", "function": "avg", "field": "unitPrice", "filter": "unitPrice lt 10" }Example output:
Q3: "Which categories have more than 20 products?"
{ "entity": "Product", "function": "count", "field": "*", "groupby": ["categoryName"], "having": { "gt": 20 } }Example output:
Q4: "For discontinued products, which categories have a total revenue between $500 and $10,000?"
{ "entity": "Product", "function": "sum", "field": "unitPrice", "filter": "discontinued eq true", "groupby": ["categoryName"], "having": { "gte": 500, "lte": 10000 } }Example output:
Q5: "How many distinct suppliers do we have?"
{ "entity": "Product", "function": "count", "field": "supplierId", "distinct": true }Example output:
Q6: "Which categories have exactly 5 or 10 products?"
{ "entity": "Product", "function": "count", "field": "*", "groupby": ["categoryName"], "having": { "in": [5, 10] } }