Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions .github/workflows/ogm-nightly-sync.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: Nightly OGM Sync

on:
schedule:
- cron: "15 7 * * *"
workflow_dispatch:

permissions:
contents: read

concurrency:
group: ogm-nightly-sync
cancel-in-progress: false

jobs:
trigger-nightly-sync:
name: Trigger Production OGM Sync
runs-on: ubuntu-latest

steps:
- name: Start SSH agent
uses: webfactory/ssh-agent@v0.9.0
with:
ssh-private-key: ${{ secrets.OGM_KAMAL_SSH_PRIVATE_KEY }}

- name: Trust production host key
env:
SSH_HOST: ${{ secrets.OGM_KAMAL_SSH_HOST }}
SSH_PORT: ${{ secrets.OGM_KAMAL_SSH_PORT || '22' }}
run: |
mkdir -p "$HOME/.ssh"
ssh-keyscan -p "$SSH_PORT" -H "$SSH_HOST" >> "$HOME/.ssh/known_hosts"

- name: Refresh OGM repos and enqueue nightly harvest
env:
SSH_HOST: ${{ secrets.OGM_KAMAL_SSH_HOST }}
SSH_PORT: ${{ secrets.OGM_KAMAL_SSH_PORT || '22' }}
SSH_USER: ${{ secrets.OGM_KAMAL_SSH_USER }}
run: |
ssh -o BatchMode=yes -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" '
set -eu

container="$(docker ps \
--filter label=service=ogm-api \
--filter label=role=worker \
--filter status=running \
--format "{{.Names}}" \
| head -n1)"

if [ -z "$container" ]; then
container="$(docker ps \
--filter label=service=ogm-api \
--filter label=role=web \
--filter status=running \
--format "{{.Names}}" \
| head -n1)"
fi

if [ -z "$container" ]; then
echo "No running ogm-api web or worker container found." >&2
exit 1
fi

echo "Using container: $container"
docker exec "$container" python /app/backend/scripts/trigger_ogm_nightly_sync.py
'
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Then open:

- **Website**: `http://localhost:3000`
- **API docs (for technical staff)**: `http://localhost:8000/api/docs`
- **OGM repository dashboard**: `http://localhost:8000/api/v1/ogm/repos/dashboard`

To stop everything later:

Expand Down Expand Up @@ -149,6 +150,7 @@ All documentation is now in the top-level `docs/` folder:
- **Codebase overview / executive architecture summary**: `docs/backend/codebase_overview.md`
- **Caching**: `docs/backend/caching.md`
- **Search**: `docs/backend/search.md`
- **OpenGeoMetadata harvesting**: `docs/backend/ogm_harvesting.md`
- **Service tiers / API keys / rate limiting**: `docs/backend/service_tiers_runbook.md`
- **Scripts (Python utilities)**: `docs/backend/scripts.md`
- **MCP / Claude Desktop**: `docs/mcp/`
Expand Down
6 changes: 3 additions & 3 deletions backend/app/api/v1/endpoint_modules/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class UpdateAPIKeyRequest(BaseModel):

class UpdateOGMRepoRequest(BaseModel):
ogm_enabled: Optional[bool] = None
ogm_watch_mode: Optional[str] = None # weekly|webhook|both|manual
ogm_watch_mode: Optional[str] = None # nightly|weekly|webhook|both|manual
ogm_notes: Optional[str] = None
ogm_tags: Optional[dict] = None

Expand Down Expand Up @@ -393,12 +393,12 @@ async def update_ogm_repo(repo_name: str, body: UpdateOGMRepoRequest):
"""Create or update a repo watch entry."""
if body.ogm_watch_mode is not None:
mode = body.ogm_watch_mode.lower().strip()
if mode not in {"weekly", "webhook", "both", "manual"}:
if mode not in {"nightly", "weekly", "webhook", "both", "manual"}:
raise HTTPException(status_code=400, detail="Invalid ogm_watch_mode")
await ogm_repo.upsert_repo(
ogm_repo_name=repo_name,
ogm_enabled=body.ogm_enabled if body.ogm_enabled is not None else True,
ogm_watch_mode=body.ogm_watch_mode or "weekly",
ogm_watch_mode=body.ogm_watch_mode or "nightly",
ogm_notes=body.ogm_notes,
ogm_tags=body.ogm_tags,
)
Expand Down
116 changes: 114 additions & 2 deletions backend/app/api/v1/endpoint_modules/ogm.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,35 @@
from typing import Optional
from __future__ import annotations

from fastapi import APIRouter, Query
from datetime import datetime
from pathlib import Path
from typing import Any, Optional

from fastapi import APIRouter, Query, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates

from app.api.v1.utils import create_response
from app.services.ogm_harvest.repository import OGMHarvestRepository

router = APIRouter()
ogm_repo = OGMHarvestRepository()
TEMPLATES_DIR = Path(__file__).resolve().parents[4] / "templates"
templates = Jinja2Templates(directory=str(TEMPLATES_DIR)) if TEMPLATES_DIR.exists() else None


def _format_timestamp(value: Any) -> Optional[str]:
if value in (None, ""):
return None
if isinstance(value, datetime):
return value.strftime("%Y-%m-%d %H:%M UTC")
if isinstance(value, str):
try:
normalized = value.replace("Z", "+00:00")
parsed = datetime.fromisoformat(normalized)
return parsed.strftime("%Y-%m-%d %H:%M UTC")
except ValueError:
return value
return str(value)


@router.get("/ogm/repos")
Expand All @@ -18,6 +41,95 @@ async def list_public_ogm_repos():
return create_response({"repos": repos})


@router.get(
"/ogm/repos/dashboard",
include_in_schema=False,
response_class=HTMLResponse,
)
async def ogm_repo_dashboard(request: Request):
repos = await ogm_repo.list_public_repo_summaries()

dashboard_repos = []
total_harvested = 0
total_available = 0
repos_with_aardvark = 0
enabled_repos = 0
never_harvested = 0

for repo in repos:
harvested_count = int(repo.get("harvested_record_count") or 0)
available_count = int(repo.get("available_record_count") or 0)
has_aardvark = bool(repo.get("ogm_has_aardvark"))
enabled = bool(repo.get("ogm_enabled"))
last_harvest_completed = repo.get("last_crawl_completed_at")

total_harvested += harvested_count
total_available += available_count
repos_with_aardvark += int(has_aardvark)
enabled_repos += int(enabled)
never_harvested += int(not bool(last_harvest_completed))

dashboard_repos.append(
{
**repo,
"display_last_commit_at": _format_timestamp(repo.get("last_commit_at")),
"display_last_harvest_at": _format_timestamp(last_harvest_completed),
"display_last_harvest_started_at": _format_timestamp(
repo.get("last_crawl_started_at")
),
"harvest_gap_count": max(harvested_count - available_count, 0),
}
)

summary = {
"repo_count": len(dashboard_repos),
"enabled_repo_count": enabled_repos,
"repos_with_aardvark_count": repos_with_aardvark,
"never_harvested_count": never_harvested,
"harvested_record_count": total_harvested,
"available_record_count": total_available,
}

if templates is None:
rows = "".join(
(
"<tr>"
f"<td>{repo.get('ogm_repo_name') or ''}</td>"
f"<td>{repo.get('display_last_commit_at') or '-'}</td>"
f"<td>{repo.get('display_last_harvest_at') or '-'}</td>"
f"<td>{'yes' if repo.get('ogm_has_aardvark') else 'no'}</td>"
f"<td>{repo.get('harvested_record_count') or 0}</td>"
f"<td>{repo.get('available_record_count') or 0}</td>"
"</tr>"
)
for repo in dashboard_repos
)
return HTMLResponse(
(
"<!doctype html><html><head>"
"<title>OpenGeoMetadata Repository Dashboard</title>"
"</head>"
"<body><h1>OpenGeoMetadata Repository Dashboard</h1>"
"<p>Templates are unavailable, showing a minimal fallback view.</p>"
"<table><thead><tr>"
"<th>Repository</th><th>Last commit</th><th>Last harvest</th>"
"<th>Aardvark</th><th>Harvested</th><th>Available</th>"
f"</tr></thead><tbody>{rows}</tbody></table></body></html>"
)
)

return templates.TemplateResponse(
"ogm_repo_dashboard.html",
{
"request": request,
"title": "OpenGeoMetadata Repository Dashboard",
"summary": summary,
"repos": dashboard_repos,
"generated_at": _format_timestamp(datetime.utcnow()),
},
)


@router.get("/ogm/harvest/failures")
async def list_public_ogm_harvest_failures(
repo_name: Optional[str] = Query(None, description="Filter by a single ogm_repo_name"),
Expand Down
Loading
Loading