diff --git a/dandi/bids_validator_deno/_validator.py b/dandi/bids_validator_deno/_validator.py index d97f953db..e811f2c5a 100644 --- a/dandi/bids_validator_deno/_validator.py +++ b/dandi/bids_validator_deno/_validator.py @@ -13,7 +13,7 @@ from pydantic import DirectoryPath, validate_call from dandi.utils import find_parent_directory_containing -from dandi.validate_types import ( +from dandi.validate.types import ( Origin, OriginType, Scope, diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index b0b5012f5..bc86b4982 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -1,6 +1,5 @@ from __future__ import annotations -from collections.abc import Iterable import logging import os import re @@ -11,8 +10,59 @@ from .base import devel_debug_option, devel_option, map_to_click_exceptions from ..utils import pluralize -from ..validate import validate as validate_ -from ..validate_types import Severity, ValidationResult +from ..validate.core import validate as validate_ +from ..validate.types import Severity, ValidationResult + + +def _collect_results( + paths: tuple[str, ...], + schema: str | None, + devel_debug: bool, + allow_any_path: bool, +) -> list[ValidationResult]: + """Run validation and collect all results into a list.""" + # Avoid heavy import by importing within function: + from ..pynwb_utils import ignore_benign_pynwb_warnings + + # Don't log validation warnings, as this command reports them to the user + # anyway: + root = logging.getLogger() + for h in root.handlers: + h.addFilter(lambda r: not getattr(r, "validating", False)) + + if not paths: + paths = (os.curdir,) + # below we are using load_namespaces but it causes HDMF to whine if there + # is no cached name spaces in the file. It is benign but not really useful + # at this point, so we ignore it although ideally there should be a formal + # way to get relevant warnings (not errors) from PyNWB + ignore_benign_pynwb_warnings() + + return list( + validate_( + *paths, + schema_version=schema, + devel_debug=devel_debug, + allow_any_path=allow_any_path, + ) + ) + + +def _filter_results( + results: list[ValidationResult], + min_severity: str, + ignore: str | None, +) -> list[ValidationResult]: + """Filter results by minimum severity and ignore pattern.""" + min_severity_value = Severity[min_severity].value + filtered = [ + r + for r in results + if r.severity is not None and r.severity.value >= min_severity_value + ] + if ignore is not None: + filtered = [r for r in filtered if not re.search(ignore, r.id)] + return filtered @click.command() @@ -102,49 +152,15 @@ def validate( Exits with non-0 exit code if any file is not compliant. """ - # Avoid heavy import by importing within function: - from ..pynwb_utils import ignore_benign_pynwb_warnings - - # Don't log validation warnings, as this command reports them to the user - # anyway: - root = logging.getLogger() - for h in root.handlers: - h.addFilter(lambda r: not getattr(r, "validating", False)) - - if not paths: - paths = (os.curdir,) - # below we are using load_namespaces but it causes HDMF to whine if there - # is no cached name spaces in the file. It is benign but not really useful - # at this point, so we ignore it although ideally there should be a formal - # way to get relevant warnings (not errors) from PyNWB - ignore_benign_pynwb_warnings() - - validator_result = validate_( - *paths, - schema_version=schema, - devel_debug=devel_debug, - allow_any_path=allow_any_path, - ) - - min_severity_value = Severity[min_severity].value - - filtered_results = [ - i - for i in validator_result - if i.severity is not None and i.severity.value >= min_severity_value - ] - - _process_issues(filtered_results, grouping, ignore) + results = _collect_results(paths, schema, devel_debug, allow_any_path) + filtered = _filter_results(results, min_severity, ignore) + _process_issues(filtered, grouping) def _process_issues( - validator_result: Iterable[ValidationResult], + issues: list[ValidationResult], grouping: str, - ignore: str | None = None, ) -> None: - issues = [i for i in validator_result if i.severity is not None] - if ignore is not None: - issues = [i for i in issues if not re.search(ignore, i.id)] purviews = [i.purview for i in issues] if grouping == "none": display_errors( diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index 6694ef3ec..978bd7e91 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -5,7 +5,7 @@ from ..cmd_validate import _process_issues, validate from ...tests.xfail import mark_xfail_windows_python313_posixsubprocess -from ...validate_types import ( +from ...validate.types import ( Origin, OriginType, Scope, diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 15bc616c4..b5be008bf 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -29,7 +29,7 @@ from dandi.metadata.core import get_default_metadata from dandi.misctypes import DUMMY_DANDI_ETAG, Digest, LocalReadableFile, P from dandi.utils import post_upload_size_check, pre_upload_size_check, yaml_load -from dandi.validate_types import ( +from dandi.validate.types import ( ORIGIN_INTERNAL_DANDI, ORIGIN_VALIDATION_DANDI, Origin, diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 7e43163ec..bcda0f6d2 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -16,7 +16,7 @@ from ..consts import ZARR_MIME_TYPE, dandiset_metadata_file from ..metadata.core import add_common_metadata, prepare_metadata from ..misctypes import Digest -from ..validate_types import ( +from ..validate.types import ( ORIGIN_VALIDATION_DANDI_LAYOUT, Scope, Severity, @@ -92,7 +92,7 @@ def _get_metadata(self) -> None: with self._lock: if self._asset_metadata is None: # Import here to avoid circular import - from dandi.validate import validate_bids + from dandi.validate.core import validate_bids # === Validate the dataset using bidsschematools === # This is done to obtain the metadata for each asset in the dataset diff --git a/dandi/files/zarr.py b/dandi/files/zarr.py index efb1efd2d..805ce3a16 100644 --- a/dandi/files/zarr.py +++ b/dandi/files/zarr.py @@ -10,10 +10,10 @@ import json import os import os.path -import urllib.parse from pathlib import Path from time import sleep from typing import Any, Optional +import urllib.parse from dandischema.models import BareAsset, DigestType from pydantic import BaseModel, ConfigDict, ValidationError @@ -47,7 +47,7 @@ ) from .bases import LocalDirectoryAsset -from ..validate_types import ( +from ..validate.types import ( ORIGIN_VALIDATION_DANDI_ZARR, Origin, OriginType, diff --git a/dandi/organize.py b/dandi/organize.py index ac313fe86..4ba0047ba 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -43,7 +43,7 @@ pluralize, yaml_load, ) -from .validate_types import ( +from .validate.types import ( ORIGIN_VALIDATION_DANDI_LAYOUT, Scope, Severity, diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 7ffe552a8..5010725e4 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -42,7 +42,7 @@ ) from .misctypes import Readable from .utils import get_module_version, is_url -from .validate_types import ( +from .validate.types import ( Origin, OriginType, Scope, diff --git a/dandi/tests/test_bids_validator_deno/test_validator.py b/dandi/tests/test_bids_validator_deno/test_validator.py index d0e812c49..2782c42f2 100644 --- a/dandi/tests/test_bids_validator_deno/test_validator.py +++ b/dandi/tests/test_bids_validator_deno/test_validator.py @@ -27,7 +27,7 @@ ) from dandi.consts import dandiset_metadata_file from dandi.tests.fixtures import BIDS_TESTDATA_SELECTION -from dandi.validate_types import ( +from dandi.validate.types import ( OriginType, Scope, Severity, diff --git a/dandi/upload.py b/dandi/upload.py index 1694aee23..324e31129 100644 --- a/dandi/upload.py +++ b/dandi/upload.py @@ -49,7 +49,7 @@ from .support import pyout as pyouts from .support.pyout import naturalsize from .utils import ensure_datetime, path_is_subpath, pluralize -from .validate_types import Severity +from .validate.types import Severity def _check_dandidownload_paths(dfile: DandiFile) -> None: diff --git a/dandi/validate/__init__.py b/dandi/validate/__init__.py new file mode 100644 index 000000000..659b81765 --- /dev/null +++ b/dandi/validate/__init__.py @@ -0,0 +1,47 @@ +"""Validation of DANDI datasets against schemas and standards. + +This subpackage provides validation functionality for dandisets, including: +- DANDI schema validation +- BIDS standard validation +- File layout and organization validation +- Metadata completeness checking + +Submodules: +- core: Main validation functions (validate, validate_bids) +- types: Data types and models (ValidationResult, Origin, Severity, etc.) +- io: JSONL read/write utilities for validation results + +Note: core is NOT eagerly imported here to avoid circular imports +(core → dandi.files → dandi.validate.types → dandi.validate.__init__). +Import from dandi.validate.core directly for validate/validate_bids. +""" + +from .types import ( + ORIGIN_INTERNAL_DANDI, + ORIGIN_VALIDATION_DANDI, + ORIGIN_VALIDATION_DANDI_LAYOUT, + ORIGIN_VALIDATION_DANDI_ZARR, + Origin, + OriginType, + Scope, + Severity, + Severity_, + Standard, + ValidationResult, + Validator, +) + +__all__ = [ + "ORIGIN_INTERNAL_DANDI", + "ORIGIN_VALIDATION_DANDI", + "ORIGIN_VALIDATION_DANDI_LAYOUT", + "ORIGIN_VALIDATION_DANDI_ZARR", + "Origin", + "OriginType", + "Scope", + "Severity", + "Severity_", + "Standard", + "ValidationResult", + "Validator", +] diff --git a/dandi/validate.py b/dandi/validate/core.py similarity index 97% rename from dandi/validate.py rename to dandi/validate/core.py index c32000dab..f30cf95b5 100644 --- a/dandi/validate.py +++ b/dandi/validate/core.py @@ -13,10 +13,7 @@ import os from pathlib import Path -from .consts import dandiset_metadata_file -from .files import find_dandi_files -from .utils import find_parent_directory_containing -from .validate_types import ( +from .types import ( ORIGIN_VALIDATION_DANDI_LAYOUT, Origin, OriginType, @@ -26,6 +23,9 @@ ValidationResult, Validator, ) +from ..consts import dandiset_metadata_file +from ..files import find_dandi_files +from ..utils import find_parent_directory_containing BIDS_TO_DANDI = { "subject": "subject_id", diff --git a/dandi/validate/tests/__init__.py b/dandi/validate/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dandi/tests/test_validate.py b/dandi/validate/tests/test_core.py similarity index 97% rename from dandi/tests/test_validate.py rename to dandi/validate/tests/test_core.py index edbf4ae8f..64c32090d 100644 --- a/dandi/tests/test_validate.py +++ b/dandi/validate/tests/test_core.py @@ -4,11 +4,8 @@ import pytest -from .fixtures import BIDS_TESTDATA_SELECTION -from .. import __version__ -from ..consts import dandiset_metadata_file -from ..validate import validate -from ..validate_types import ( +from ..core import validate +from ..types import ( Origin, OriginType, Scope, @@ -17,6 +14,9 @@ ValidationResult, Validator, ) +from ... import __version__ +from ...consts import dandiset_metadata_file +from ...tests.fixtures import BIDS_TESTDATA_SELECTION def test_validate_nwb_error(simple3_nwb: Path) -> None: diff --git a/dandi/tests/test_validate_types.py b/dandi/validate/tests/test_types.py similarity index 99% rename from dandi/tests/test_validate_types.py rename to dandi/validate/tests/test_types.py index eb43e251a..9b3b3abf1 100644 --- a/dandi/tests/test_validate_types.py +++ b/dandi/validate/tests/test_types.py @@ -6,7 +6,7 @@ from pydantic import ValidationError import pytest -from dandi.validate_types import ( +from dandi.validate.types import ( Origin, OriginType, Scope, diff --git a/dandi/validate_types.py b/dandi/validate/types.py similarity index 100% rename from dandi/validate_types.py rename to dandi/validate/types.py