This document provides practical examples of DuckDB queries used in the loop-duckdb project. All examples are derived from actual queries in scripts/generate-report-parts.
Prerequisites: Before running any queries, ensure the database exists by running
pnpm generate-report. See SKILL.md for details.
- Basic Table Creation
- JSON Processing
- Aggregation Queries
- Window Functions
- Event Analysis
- Working with Recent Data
CREATE OR REPLACE TABLE loop_items AS
SELECT *
FROM './generate-report/generate-report.parquet';Source: 020-create-duckdb-database.sh:9-13
Transform a JSON array of plugins into structured rows:
CREATE OR REPLACE TABLE plugins AS
SELECT
timestamp,
instance,
filename,
unnest(json_transform(
wordpress->'active_plugins',
'[{
"plugin_slug": "VARCHAR",
"version": "VARCHAR",
"auto_update": "BOOLEAN"
}]'
)) AS plugin
FROM loop_items;Source: 020-create-duckdb-database.sh:16-30
Convert a JSON object to a structured type:
CREATE OR REPLACE TABLE themes AS
SELECT
timestamp,
instance,
filename,
json_transform(
wordpress->'active_theme',
'{
"id": "VARCHAR",
"version": "VARCHAR",
"parent_theme_slug": "VARCHAR",
"auto_update": "BOOLEAN"
}'
) AS theme
FROM loop_items;Source: 020-create-duckdb-database.sh:33-48
SELECT
json_extract_string(event.payload, '$.type') AS login_type,
COUNT(*) AS total_logins
FROM events
WHERE event.name = 'login'
GROUP BY login_type;Source: 080-logins.sh:16-28
WITH UnpivotedNBA AS (
SELECT
instance,
nba_data,
unnest(json_keys(nba_data)) AS nba_key
FROM NBAStatus
WHERE nba_data IS NOT NULL
)
SELECT
nba_key,
json_extract_string(nba_data, '$.' || nba_key) AS status
FROM UnpivotedNBA;Source: 110-nbas-dismissed.sh:19-38
Calculate version distribution with percentages:
WITH
php_versions AS (
SELECT
CAST(hosting->'php_version' AS text) AS php_version
FROM loop_items
JOIN recent_loops ON loop_items.filename = recent_loops.filename
),
php_version_counts AS (
SELECT
php_version,
COUNT(*) AS occurrence_count
FROM php_versions
GROUP BY php_version
),
total_count AS (
SELECT
COUNT(DISTINCT instance) AS total_instances
FROM recent_loops
)
SELECT
pvc.php_version,
pvc.occurrence_count,
ROUND(pvc.occurrence_count * 100.0 / tc.total_instances, 2) AS percentage
FROM php_version_counts AS pvc,
total_count AS tc
ORDER BY pvc.occurrence_count DESC;Source: 050-php_versions.sh:7-43
WITH _plugins AS (
SELECT
CASE
WHEN plugin_slug LIKE '01-ext%' THEN 'Extendify License'
ELSE plugin_slug
END AS slug,
instance
FROM plugins
JOIN recent_loops ON plugins.filename = recent_loops.filename
WHERE plugin_slug NOT LIKE 'ionos-%'
)
SELECT
slug,
COUNT(*) AS occurrence_count,
ROUND(COUNT(*) * 100.0 / (SELECT COUNT(DISTINCT instance) FROM recent_loops), 2) AS percentage
FROM _plugins
GROUP BY slug
ORDER BY occurrence_count DESC;Source: 060-most-active-plugins.sh:8-53
Use ROW_NUMBER() to filter to the latest record per instance:
CREATE OR REPLACE VIEW recent_loops AS
SELECT
filename,
instance,
timestamp
FROM (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY instance
ORDER BY "timestamp" DESC
) AS rn
FROM loop_items
)
WHERE rn = 1;Source: 020-create-duckdb-database.sh:68-86
SELECT
-- SSO Login Count
COUNT(DISTINCT CASE
WHEN event.name = 'login'
AND json_extract_string(event.payload, '$.type') = 'sso'
THEN instance
ELSE NULL
END) AS sso,
-- Manual Login Count
COUNT(DISTINCT CASE
WHEN event.name = 'login'
AND json_extract_string(event.payload, '$.type') = 'default'
THEN instance
ELSE NULL
END) AS manual,
-- All Logins
COUNT(DISTINCT CASE
WHEN event.name = 'login'
THEN instance
ELSE NULL
END) AS both
FROM events
WHERE event.name = 'login';Source: 080-logins.sh:48-73
WITH CustomerLoginTypes AS (
SELECT
instance,
json_extract_string(event.payload, '$.type') AS login_type
FROM events
WHERE event.name = 'login'
GROUP BY instance, login_type
),
CustomerUsageSummary AS (
SELECT
instance,
bool_or(login_type = 'sso') AS used_sso,
bool_or(login_type = 'default') AS used_manual
FROM CustomerLoginTypes
GROUP BY instance
)
SELECT
COUNT(CASE WHEN used_sso = TRUE AND used_manual = FALSE THEN 1 END) AS sso_only,
COUNT(CASE WHEN used_sso = FALSE AND used_manual = TRUE THEN 1 END) AS manual_only,
COUNT(CASE WHEN used_sso = TRUE AND used_manual = TRUE THEN 1 END) AS both
FROM CustomerUsageSummary;Source: 080-logins.sh:88-124
SELECT
COUNT(DISTINCT instance) AS active_instances
FROM events
WHERE
timestamp >= current_timestamp - INTERVAL 7 DAY
AND event.name = 'login';Source: 080-logins.sh:47-72
Most queries join with the recent_loops view to ensure analysis of current state only:
SELECT
CAST(hosting->'core_version' AS text) AS wordpress_version,
COUNT(*) AS occurrence_count
FROM loop_items
JOIN recent_loops ON loop_items.filename = recent_loops.filename
GROUP BY wordpress_version
ORDER BY occurrence_count DESC;Source: 040-wordpress_versions.sh:8-43
SELECT
CAST(hosting->'php_version' AS text) AS php_version,
CAST(hosting->'core_version' AS text) AS wordpress_version
FROM loop_items;SELECT
(COUNT(*) * 100.0 / total)::NUMERIC(5, 2) AS percentage
FROM my_table;Source: 110-nbas-dismissed.sh:43
-
Use CTEs for Readability: Break complex queries into logical steps using
WITHclauses -
Join with recent_loops: Always join with
recent_loopswhen analyzing current state to avoid duplicate instances -
JSON Schema Definitions: When using
json_transform(), always specify the target schema for type safety -
Percentage Calculations: Use
ROUND()and multiply by 100.0 (not 100) to ensure float division -
Window Functions: Use
ROW_NUMBER()withPARTITION BYto deduplicate data based on business logic -
Type Casting: Explicitly cast JSON values to appropriate types (
text,VARCHAR,NUMERIC, etc.) -
NULL Handling: Use
IS NOT NULLchecks when working with optional JSON fields -
Conditional Aggregation: Use
COUNT(CASE WHEN ... END)for multiple aggregations in a single pass
The recommended way to query the database is through the MCP server, which AI assistants can use directly:
// The MCP server provides a query tool
{
"tool": "mcp__duckdb__query",
"parameters": {
"query": "SELECT * FROM loop_items LIMIT 10"
}
}Start the browser-based DuckDB UI:
pnpm start-report-uiWhen creating custom report parts in scripts/generate-report-parts/, use the ionos.loop-duckdb.exec_duckdb function:
# Example from a report part script
readonly SQL="
SELECT * FROM loop_items LIMIT 3;
"
# Query with markdown output
ionos.loop-duckdb.exec_duckdb "$SQL" '-markdown'
# Query with JSON output
ionos.loop-duckdb.exec_duckdb "$SQL" '-json'Output format flags:
-markdown- Great for reports and documentation-json- Useful for programmatic processing- (default) - Standard table output