diff --git a/backend/app/api/errors.py b/backend/app/api/errors.py new file mode 100644 index 0000000..20feeaf --- /dev/null +++ b/backend/app/api/errors.py @@ -0,0 +1,116 @@ +from __future__ import annotations + +import uuid +from typing import Any + +from fastapi import Request +from fastapi.responses import JSONResponse +from pydantic import BaseModel, Field +from starlette.middleware.base import BaseHTTPMiddleware + +REQUEST_ID_HEADER = "X-Request-ID" + + +class APIError(BaseModel): + """Public API error object shared by exception handlers and OpenAPI docs.""" + + status: int = Field(..., ge=400, le=599) + code: str = Field(..., min_length=1) + title: str = Field(..., min_length=1) + detail: str | None = None + request_id: str | None = None + + +class APIErrorResponse(BaseModel): + errors: list[APIError] = Field(..., min_length=1) + + +COMMON_ERROR_RESPONSES: dict[int | str, dict[str, Any]] = { + 500: { + "model": APIErrorResponse, + "description": "Internal server error", + }, +} + + +class RequestIDMiddleware(BaseHTTPMiddleware): + """Attach a stable request ID to every request and response.""" + + async def dispatch(self, request: Request, call_next): + request_id = request.headers.get(REQUEST_ID_HEADER) or uuid.uuid4().hex + request.state.request_id = request_id + + response = await call_next(request) + response.headers[REQUEST_ID_HEADER] = request_id + return response + + +def get_request_id(request: Request | None) -> str | None: + if request is None: + return None + + state = getattr(request, "state", None) + state_request_id = getattr(state, "request_id", None) + if state_request_id: + return str(state_request_id) + + headers = getattr(request, "headers", None) + header_request_id = headers.get(REQUEST_ID_HEADER) if headers is not None else None + return header_request_id or None + + +def build_error_payload( + *, + status_code: int, + code: str, + title: str, + detail: str | None = None, + request_id: str | None = None, +) -> dict[str, Any]: + payload = APIErrorResponse( + errors=[ + APIError( + status=status_code, + code=code, + title=title, + detail=detail, + request_id=request_id, + ) + ] + ) + return payload.model_dump(exclude_none=True) + + +def api_error_response( + *, + status_code: int, + code: str, + title: str, + detail: str | None = None, + request_id: str | None = None, + headers: dict[str, str] | None = None, +) -> JSONResponse: + response = JSONResponse( + status_code=status_code, + content=build_error_payload( + status_code=status_code, + code=code, + title=title, + detail=detail, + request_id=request_id, + ), + headers=headers, + ) + if request_id: + response.headers[REQUEST_ID_HEADER] = request_id + return response + + +def internal_server_error_response(request: Request) -> JSONResponse: + return api_error_response( + status_code=500, + code="internal_server_error", + title="Internal server error", + detail="An unexpected error occurred.", + request_id=get_request_id(request), + ) diff --git a/backend/app/api/v1/endpoint_modules/search.py b/backend/app/api/v1/endpoint_modules/search.py index 1266cba..c4ff528 100644 --- a/backend/app/api/v1/endpoint_modules/search.py +++ b/backend/app/api/v1/endpoint_modules/search.py @@ -9,6 +9,7 @@ from fastapi.responses import JSONResponse from sqlalchemy import select +from app.api.errors import COMMON_ERROR_RESPONSES, api_error_response, get_request_id from app.api.v1.advanced_search_utils import validate_adv_q from app.api.v1.strong_params import FACET_ALLOWED_PARAMS from app.api.v1.utils import ( @@ -58,6 +59,24 @@ # Cache TTL configuration in seconds SEARCH_CACHE_TTL = int(3600) # 1 hour SUGGEST_CACHE_TTL = int(7200) # 2 hours + + +def _search_error_response( + request: Request, + *, + code: str, + title: str = "Search failed", + detail: str = "Search request failed.", +) -> JSONResponse: + return api_error_response( + status_code=500, + code=code, + title=title, + detail=detail, + request_id=get_request_id(request), + ) + + SEARCH_RESULT_CACHE_TTL = int(os.getenv("SEARCH_RESULT_CACHE_TTL", str(SEARCH_CACHE_TTL))) SEARCH_RESULT_CACHE_VERSION = os.getenv("SEARCH_RESULT_CACHE_VERSION", "v1") SEARCH_RESULT_CACHE_NAMESPACE = "search.results" @@ -463,7 +482,11 @@ async def _handle_search(request: Request, params: dict) -> JSONResponse: search_duration_ms = (time.perf_counter() - search_started_at) * 1000 if isinstance(results, dict) and "error" in results: logger.error("Search service returned an internal error", exc_info=False) - return JSONResponse(content={"error": "Elasticsearch search failed"}, status_code=500) + return _search_error_response( + request, + code="elasticsearch_search_failed", + detail="Elasticsearch search failed.", + ) # Step 2: Extract resource IDs and scores result_obj = results if isinstance(results, dict) else {} @@ -722,7 +745,7 @@ async def _handle_search(request: Request, params: dict) -> JSONResponse: return _attach_search_timing_headers(response, timings) -@router.get("/search") +@router.get("/search", responses=COMMON_ERROR_RESPONSES) @cached_endpoint(ttl=SEARCH_CACHE_TTL) async def search( request: Request, @@ -826,10 +849,10 @@ async def search( total_duration, exc_info=True, ) - return JSONResponse(content={"error": "Search request failed"}, status_code=500) + return _search_error_response(request, code="search_request_failed") -@router.post("/search") +@router.post("/search", responses=COMMON_ERROR_RESPONSES) @cached_endpoint(ttl=SEARCH_CACHE_TTL) async def search_post( request: Request, @@ -903,10 +926,10 @@ async def search_post( raise except Exception: logger.error("Search POST request failed", exc_info=True) - return JSONResponse(content={"error": "Search request failed"}, status_code=500) + return _search_error_response(request, code="search_request_failed") -@router.get("/search/facets/{facet_name}") +@router.get("/search/facets/{facet_name}", responses=COMMON_ERROR_RESPONSES) @cached_endpoint(ttl=SEARCH_CACHE_TTL) async def get_facet( facet_name: str, @@ -1043,10 +1066,15 @@ async def get_facet( return JSONResponse(content={"error": "Invalid facet request"}, status_code=400) except Exception: logger.error("Error getting facet values", exc_info=True) - return JSONResponse(content={"error": "Failed to get facet values"}, status_code=500) + return _search_error_response( + request, + code="facet_values_failed", + title="Facet lookup failed", + detail="Failed to get facet values.", + ) -@router.get("/suggest") +@router.get("/suggest", responses=COMMON_ERROR_RESPONSES) @cached_endpoint(ttl=SUGGEST_CACHE_TTL) async def suggest( request: Request, @@ -1069,4 +1097,9 @@ async def suggest( raise except Exception: logger.error("Error getting suggestions", exc_info=True) - return JSONResponse(content={"error": "Failed to get suggestions"}, status_code=500) + return _search_error_response( + request, + code="suggestions_failed", + title="Suggestions failed", + detail="Failed to get suggestions.", + ) diff --git a/backend/app/elasticsearch/search.py b/backend/app/elasticsearch/search.py index eb58424..8691928 100644 --- a/backend/app/elasticsearch/search.py +++ b/backend/app/elasticsearch/search.py @@ -1636,17 +1636,21 @@ async def finalize_response( exc_info=True, ) - # If we get here, propagate a detailed HTTP error + # If we get here, propagate a public-safe HTTP error. The full query, + # index name, and upstream exception remain in logs via exc_info above. error_detail = { "message": "Elasticsearch query failed", - "error": str(es_error), - "query": search_query, - "index": index_name, + "code": "elasticsearch_query_failed", } if hasattr(es_error, "info"): - error_detail["info"] = es_error.info + info = getattr(es_error, "info", {}) or {} + upstream_status = info.get("status") if isinstance(info, dict) else None + if isinstance(upstream_status, int): + error_detail["upstream_status_code"] = upstream_status if hasattr(es_error, "status_code"): - error_detail["status_code"] = es_error.status_code + status_code = es_error.status_code + if isinstance(status_code, int): + error_detail["upstream_status_code"] = status_code raise HTTPException(status_code=500, detail=error_detail) from es_error return await finalize_response( diff --git a/backend/app/main.py b/backend/app/main.py index 2d8d0a8..1a571fa 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -32,6 +32,11 @@ FastAPIInstrumentor = None # Optional: requires appsignal/otel stack from starlette.middleware.base import BaseHTTPMiddleware +from app.api.errors import ( + RequestIDMiddleware, + get_request_id, + internal_server_error_response, +) from app.api.ogc import router as ogc_router from app.api.v1.endpoints import router as public_router from app.elasticsearch import close_elasticsearch, init_elasticsearch @@ -241,6 +246,9 @@ async def dispatch(self, request: Request, call_next): # Add Cloudflare Turnstile browser gate middleware app.add_middleware(TurnstileMiddleware) +# Add request IDs after other middleware so it runs first in the Starlette stack. +app.add_middleware(RequestIDMiddleware) + # Include routers app.include_router(public_router, prefix="/api/v1") app.include_router(ogc_router, prefix="/api/v1/ogc") @@ -292,21 +300,24 @@ async def robots_txt() -> PlainTextResponse: @app.exception_handler(Exception) async def global_exception_handler(request: Request, exc: Exception): """Global exception handler for the application.""" - logger.error(f"Global exception handler caught: {str(exc)}", exc_info=True) + request_id = get_request_id(request) + logger.error( + "Global exception handler caught request_id=%s path=%s", + request_id, + request.url.path, + exc_info=True, + ) if isinstance(exc, HTTPException): - return JSONResponse( + response = JSONResponse( status_code=exc.status_code, content={"detail": exc.detail}, ) + if request_id: + response.headers["X-Request-ID"] = request_id + return response - return JSONResponse( - status_code=500, - content={ - "message": "An unexpected error occurred", - "error": str(exc), - }, - ) + return internal_server_error_response(request) # Frontend is now served by React Router v7 in a separate Docker container diff --git a/backend/db/migrations/create_distribution_tables.py b/backend/db/migrations/create_distribution_tables.py index 0e1a0b3..1ca0698 100644 --- a/backend/db/migrations/create_distribution_tables.py +++ b/backend/db/migrations/create_distribution_tables.py @@ -109,8 +109,8 @@ def create_distribution_tables(): (23, 'tile_json', 'TileJSON', 'https://github.com/mapbox/tilejson-spec', False, '-', 23), (24, 'wcs', 'Web Coverage Service (WCS)', 'http://www.opengis.net/def/serviceType/ogc/wcs', False, '-', 24), (25, 'wfs', 'Web Feature Service (WFS)', 'http://www.opengis.net/def/serviceType/ogc/wfs', False, 'Provides a to download generated vector datasets (GeoJSON, shapefile)', 25), - (26, 'wmts', 'Web Mapping Service (WMS)', 'http://www.opengis.net/def/serviceType/ogc/wms', False, 'Provides a service to visually preview a layer and inspect its features', 26), - (27, 'wms', 'WMTS', 'http://www.opengis.net/def/serviceType/ogc/wmts', False, '-', 27), + (26, 'wms', 'Web Mapping Service (WMS)', 'http://www.opengis.net/def/serviceType/ogc/wms', False, 'Provides a service to visually preview a layer and inspect its features', 26), + (27, 'wmts', 'WMTS', 'http://www.opengis.net/def/serviceType/ogc/wmts', False, '-', 27), (28, 'xyz_tiles', 'XYZ tiles', 'https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames', False, 'Link to an XYZ tile server', 28) ] diff --git a/backend/db/migrations/create_distribution_types_table.py b/backend/db/migrations/create_distribution_types_table.py index d0adb10..769d291 100644 --- a/backend/db/migrations/create_distribution_types_table.py +++ b/backend/db/migrations/create_distribution_types_table.py @@ -90,8 +90,8 @@ def create_distribution_types_table(): (23, 'tile_json', 'TileJSON', 'https://github.com/mapbox/tilejson-spec', False, '-', 23), (24, 'wcs', 'Web Coverage Service (WCS)', 'http://www.opengis.net/def/serviceType/ogc/wcs', False, '-', 24), (25, 'wfs', 'Web Feature Service (WFS)', 'http://www.opengis.net/def/serviceType/ogc/wfs', False, 'Provides a to download generated vector datasets (GeoJSON, shapefile)', 25), - (26, 'wmts', 'Web Mapping Service (WMS)', 'http://www.opengis.net/def/serviceType/ogc/wms', False, 'Provides a service to visually preview a layer and inspect its features', 26), - (27, 'wms', 'WMTS', 'http://www.opengis.net/def/serviceType/ogc/wmts', False, '-', 27), + (26, 'wms', 'Web Mapping Service (WMS)', 'http://www.opengis.net/def/serviceType/ogc/wms', False, 'Provides a service to visually preview a layer and inspect its features', 26), + (27, 'wmts', 'WMTS', 'http://www.opengis.net/def/serviceType/ogc/wmts', False, '-', 27), (28, 'xyz_tiles', 'XYZ tiles', 'https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames', False, 'Link to an XYZ tile server', 28) ] diff --git a/backend/tests/api/test_error_contracts.py b/backend/tests/api/test_error_contracts.py new file mode 100644 index 0000000..2445a0c --- /dev/null +++ b/backend/tests/api/test_error_contracts.py @@ -0,0 +1,155 @@ +import json +from unittest.mock import AsyncMock + +import pytest +from fastapi import FastAPI +from fastapi.testclient import TestClient +from starlette.requests import Request + +from app.api.errors import ( + COMMON_ERROR_RESPONSES, + REQUEST_ID_HEADER, + APIErrorResponse, + RequestIDMiddleware, + internal_server_error_response, +) +from app.api.v1.endpoint_modules import search as search_module +from app.main import app as main_app + + +def test_error_response_contract_serializes_request_id(): + payload = APIErrorResponse( + errors=[ + { + "status": 500, + "code": "internal_server_error", + "title": "Internal server error", + "detail": "An unexpected error occurred.", + "request_id": "req-123", + } + ] + ).model_dump(exclude_none=True) + + assert payload == { + "errors": [ + { + "status": 500, + "code": "internal_server_error", + "title": "Internal server error", + "detail": "An unexpected error occurred.", + "request_id": "req-123", + } + ] + } + + +def test_unhandled_exceptions_use_public_safe_error_envelope(): + app = FastAPI(responses=COMMON_ERROR_RESPONSES) + app.add_middleware(RequestIDMiddleware) + + @app.get("/boom") + async def boom(): + raise RuntimeError("database password secret") + + @app.exception_handler(Exception) + async def global_exception_handler(request, exc): + return internal_server_error_response(request) + + client = TestClient(app, raise_server_exceptions=False) + response = client.get("/boom", headers={REQUEST_ID_HEADER: "req-test"}) + + assert response.status_code == 500 + assert response.headers[REQUEST_ID_HEADER] == "req-test" + assert response.json() == { + "errors": [ + { + "status": 500, + "code": "internal_server_error", + "title": "Internal server error", + "detail": "An unexpected error occurred.", + "request_id": "req-test", + } + ] + } + assert "database password secret" not in response.text + + +@pytest.mark.unit +def test_main_app_adds_request_id_header_to_normal_responses(): + client = TestClient(main_app) + response = client.get("/api/v1", headers={REQUEST_ID_HEADER: "req-main"}) + + assert response.status_code == 200 + assert response.headers[REQUEST_ID_HEADER] == "req-main" + + +def test_openapi_documents_standard_500_error_schema(): + client = TestClient(main_app) + schema = client.get("/api/openapi.json").json() + + assert "APIErrorResponse" in schema["components"]["schemas"] + search_get = schema["paths"]["/api/v1/search"]["get"] + assert search_get["responses"]["500"]["description"] == "Internal server error" + assert search_get["responses"]["500"]["content"]["application/json"]["schema"] == { + "$ref": "#/components/schemas/APIErrorResponse" + } + + +def _build_request(query_string: bytes = b"q=test") -> Request: + return Request( + { + "type": "http", + "http_version": "1.1", + "method": "GET", + "scheme": "http", + "path": "/api/v1/search", + "raw_path": b"/api/v1/search", + "query_string": query_string, + "headers": [(REQUEST_ID_HEADER.lower().encode(), b"req-search")], + "server": ("testserver", 80), + "client": ("testclient", 50000), + "root_path": "", + } + ) + + +@pytest.mark.asyncio +async def test_search_service_errors_use_public_error_contract(monkeypatch): + request = _build_request() + request.state.request_id = "req-search" + + class StubSearchService: + async def search(self, **kwargs): + return {"error": "Elasticsearch backend secret"} + + monkeypatch.setattr(search_module, "SearchService", StubSearchService) + monkeypatch.setattr( + search_module, + "_get_cached_search_response_core", + AsyncMock(return_value=(None, "disabled")), + ) + + response = await search_module._handle_search( + request, + { + "q": "test", + "page": 1, + "per_page": 10, + "request_query_params": "q=test", + }, + ) + + assert response.status_code == 500 + assert response.headers[REQUEST_ID_HEADER] == "req-search" + assert json.loads(response.body) == { + "errors": [ + { + "status": 500, + "code": "elasticsearch_search_failed", + "title": "Search failed", + "detail": "Elasticsearch search failed.", + "request_id": "req-search", + } + ] + } + assert "backend secret" not in response.body.decode() diff --git a/backend/tests/api/v1/test_facet_endpoint.py b/backend/tests/api/v1/test_facet_endpoint.py index 38f9663..b8c39a8 100644 --- a/backend/tests/api/v1/test_facet_endpoint.py +++ b/backend/tests/api/v1/test_facet_endpoint.py @@ -628,7 +628,9 @@ async def test_general_exception( response = await async_client.get("/api/v1/search/facets/schema_provider_s") assert response.status_code == 500 - assert "error" in response.json() + error = response.json()["errors"][0] + assert error["code"] == "facet_values_failed" + assert error["detail"] == "Failed to get facet values." class TestFacetEndpointIntegration: diff --git a/backend/tests/api/v1/test_search_endpoints.py b/backend/tests/api/v1/test_search_endpoints.py index f19c010..eb302ca 100644 --- a/backend/tests/api/v1/test_search_endpoints.py +++ b/backend/tests/api/v1/test_search_endpoints.py @@ -17,7 +17,11 @@ def _check_elasticsearch_error(response): """Check if response is a 500 due to Elasticsearch error and skip if so.""" if response.status_code == 500: error_data = response.json() - error_str = str(error_data.get("error", "")).lower() + error_parts = [str(error_data.get("error", ""))] + for error in error_data.get("errors", []): + if isinstance(error, dict): + error_parts.extend(str(error.get(key, "")) for key in ("code", "title", "detail")) + error_str = " ".join(error_parts).lower() if any(term in error_str for term in ["elasticsearch", "index", "connection", "not found"]): pytest.skip(f"Elasticsearch not available: {error_data.get('error', 'Unknown error')}") diff --git a/backend/tests/elasticsearch/test_search.py b/backend/tests/elasticsearch/test_search.py index 87d5e1e..e363a53 100644 --- a/backend/tests/elasticsearch/test_search.py +++ b/backend/tests/elasticsearch/test_search.py @@ -5,6 +5,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest +from fastapi import HTTPException from app.elasticsearch.search import ( BBOX_SPATIAL_BOOST_WEIGHT, @@ -849,18 +850,25 @@ async def search_side_effect(*, query=None, **kwargs): @pytest.mark.asyncio async def test_search_resources_error_handling(self): - """Test search_resources error handling.""" + """Elasticsearch failures should not expose query internals to callers.""" # Mock the Elasticsearch client to raise an exception mock_es = AsyncMock() - mock_es.search.side_effect = Exception("Elasticsearch connection error") + mock_es.search.side_effect = Exception("Elasticsearch connection secret") # Mock the es client with patch("app.elasticsearch.search.es", mock_es): # Call search_resources and expect an exception - with pytest.raises(Exception) as exc_info: + with pytest.raises(HTTPException) as exc_info: await search_resources(query="test", fq=None, skip=0, limit=10, sort=None) - assert "Elasticsearch connection error" in str(exc_info.value) + assert exc_info.value.status_code == 500 + assert exc_info.value.detail == { + "message": "Elasticsearch query failed", + "code": "elasticsearch_query_failed", + } + assert "connection secret" not in str(exc_info.value.detail) + assert "query" not in exc_info.value.detail + assert "index" not in exc_info.value.detail @pytest.mark.asyncio async def test_search_resources_with_suggestions(self): diff --git a/backend/tests/services/test_mcp_service.py b/backend/tests/services/test_mcp_service.py index 89a2566..c958d74 100644 --- a/backend/tests/services/test_mcp_service.py +++ b/backend/tests/services/test_mcp_service.py @@ -107,7 +107,16 @@ async def test_search_resources_tool_surfaces_api_errors(self): "url": "http://localhost:8000/api/v1/search?q=palestine", "content_type": "application/json", "location": None, - "data": {"error": "Elasticsearch search failed"}, + "data": { + "errors": [ + { + "status": 500, + "code": "elasticsearch_search_failed", + "title": "Search failed", + "detail": "Elasticsearch search failed.", + } + ] + }, } with patch.object(service, "_api_request", AsyncMock(return_value=api_payload)): @@ -118,7 +127,7 @@ async def test_search_resources_tool_surfaces_api_errors(self): assert payload["status_code"] == 500 assert payload["error_type"] == "elasticsearch" assert payload["query"] == "palestine" - assert payload["data"]["error"] == "Elasticsearch search failed" + assert payload["data"]["errors"][0]["code"] == "elasticsearch_search_failed" @pytest.mark.asyncio async def test_search_resources_tool_surfaces_connection_errors(self): diff --git a/backend/tests/services/test_search_service.py b/backend/tests/services/test_search_service.py index 1104e8c..4722788 100644 --- a/backend/tests/services/test_search_service.py +++ b/backend/tests/services/test_search_service.py @@ -234,6 +234,12 @@ async def test_search_with_real_data_and_processing(self): try: result = await service.search(q="map", page=1, limit=5) + if "error" in result: + assert result["message"] == "Search operation failed" + assert result["error_type"] in {"connection", "elasticsearch"} + assert "event loop" not in str(result.get("error", "")).lower() + return + # Verify the structure assert "data" in result assert "meta" in result @@ -262,6 +268,8 @@ async def test_search_with_real_data_and_processing(self): assert "resourceProcessing" in result["queryTime"] assert "totalResponseTime" in result["queryTime"] + except AssertionError: + raise except Exception as e: # Handle connection errors gracefully assert (