dandi · yarikoptic · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026
diff --git a/.gitignore b/.gitignore
@@ -16,3 +16,4 @@ sandbox/
 venv/
 venvs/
 .DS_Store
+uv.lock
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -33,6 +33,11 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 - Prefer specific exceptions over generic ones
 - For CLI, use click library patterns
 - Imports organized: stdlib, third-party, local (alphabetical within groups)
+- **Imports must be at the top of the file** — do NOT place imports inside
+  functions or methods unless there is a concrete reason (circular dependency,
+  or heavy transitive imports like `pynwb`/`h5py`/`nwbinspector` that would
+  slow down module load for unrelated code paths).  When deferring an import
+  for weight, add the comment `# Avoid heavy import by importing within function:`.
 
 ## Documentation
 - Keep docstrings updated when changing function signatures

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
@@ -18,7 +18,7 @@
 from dandischema.digests.dandietag import DandiETag
 from dandischema.models import BareAsset, CommonModel
 from dandischema.models import Dandiset as DandisetMeta
-from dandischema.models import get_schema_version
+from dandischema.models import StandardsType, get_schema_version, nwb_standard
 from packaging.version import Version
 from pydantic import ValidationError
 from pydantic_core import ErrorDetails
@@ -41,6 +41,20 @@
     Validator,
 )
 
+# True when the installed dandischema exposes the per-asset dataStandard field
+# and related StandardsType enhancements (version, extensions).  All these
+# fields ship together starting with dandischema 0.12.2.
+# TODO: remove this guard (and all branches that check it) once the minimum
+# required dandischema version is >= 0.12.2.
+_SCHEMA_BAREASSET_HAS_DATASTANDARD = "dataStandard" in BareAsset.model_fields
+if not _SCHEMA_BAREASSET_HAS_DATASTANDARD and Version(
+    dandischema.__version__
+) >= Version("0.12.2"):
+    raise RuntimeError(
+        f"dandischema {dandischema.__version__} should have "
+        f"'dataStandard' field on BareAsset"
+    )
+
 lgr = dandi.get_logger()
 
 # TODO -- should come from schema.  This is just a simplistic example for now
@@ -504,6 +518,36 @@ def get_metadata(
             else:
                 raise
         metadata.path = self.path
+        if _SCHEMA_BAREASSET_HAS_DATASTANDARD:
+            kwargs: dict[str, Any] = dict(nwb_standard)
+            # Avoid heavy import by importing within function:
+            from dandi.pynwb_utils import get_nwb_extensions, get_nwb_version
+
+            if nwb_ver := get_nwb_version(self.filepath):
+                kwargs["version"] = nwb_ver
+            try:
+                nwb_exts = get_nwb_extensions(self.filepath)
+            except Exception:
+                lgr.debug(
+                    "Failed to extract NWB extensions from %s",
+                    self.filepath,
+                    exc_info=True,
+                )
+                nwb_exts = {}
+            if nwb_exts:
+                kwargs["extensions"] = [
+                    StandardsType(name=name, version=ver).model_dump(  # type: ignore[call-arg]
+                        mode="json", exclude_none=True
+                    )
+                    for name, ver in sorted(nwb_exts.items())
+                ]
+            nwb = StandardsType(**kwargs)
+            # TODO: use metadata.dataStandard directly once min dandischema >= 0.12.2
+            cur = getattr(metadata, "dataStandard", None)
+            if cur is None:
+                setattr(metadata, "dataStandard", [nwb])
+            elif nwb not in cur:
+                cur.append(nwb)
         return metadata
 
     # TODO: @validate_cache.memoize_path

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
@@ -3,15 +3,32 @@
 from collections import defaultdict
 from dataclasses import dataclass, field
 from datetime import datetime
+import json
 from pathlib import Path
 from threading import Lock
 import weakref
 
-from dandischema.models import BareAsset
+from dandischema.models import (
+    BareAsset,
+    StandardsType,
+    bids_standard,
+    ome_ngff_standard,
+)
 
+import dandi
 from dandi.bids_validator_deno import bids_validate
 
-from .bases import GenericAsset, LocalFileAsset, NWBAsset
+from .bases import (
+    _SCHEMA_BAREASSET_HAS_DATASTANDARD,
+    GenericAsset,
+    LocalFileAsset,
+    NWBAsset,
+)
@@ -18,17 +18,12 @@
 import dandi
 from dandi.bids_validator_deno import bids_validate
-from .bases import (
-    _SCHEMA_BAREASSET_HAS_DATASTANDARD,
-    GenericAsset,
-    LocalFileAsset,
-    NWBAsset,
-)
+from .bases import LocalFileAsset
-if _SCHEMA_BAREASSET_HAS_DATASTANDARD:
-    from dandischema.models import hed_standard  # type: ignore[attr-defined]
-else:
-    hed_standard = None  # type: ignore[assignment]
+# Avoid eager access to .bases module state at import time to reduce cycle pressure.
+# Resolve schema capability lazily at usage sites.
+_SCHEMA_BAREASSET_HAS_DATASTANDARD = False
+hed_standard = None  # type: ignore[assignment]
 from .zarr import ZarrAsset
 from ..consts import ZARR_MIME_TYPE, dandiset_metadata_file
 from ..metadata.core import add_common_metadata, prepare_metadata
@@ -18,17 +18,12 @@
 import dandi
 from dandi.bids_validator_deno import bids_validate

-from .bases import (
-    _SCHEMA_BAREASSET_HAS_DATASTANDARD,
-    GenericAsset,
-    LocalFileAsset,
-    NWBAsset,
-)
+from .bases import LocalFileAsset

-if _SCHEMA_BAREASSET_HAS_DATASTANDARD:
-    from dandischema.models import hed_standard  # type: ignore[attr-defined]
-else:
-    hed_standard = None  # type: ignore[assignment]
+# Avoid eager access to .bases module state at import time to reduce cycle pressure.
+# Resolve schema capability lazily at usage sites.
+_SCHEMA_BAREASSET_HAS_DATASTANDARD = False
+hed_standard = None  # type: ignore[assignment]
 from .zarr import ZarrAsset
 from ..consts import ZARR_MIME_TYPE, dandiset_metadata_file
 from ..metadata.core import add_common_metadata, prepare_metadata
+
+if _SCHEMA_BAREASSET_HAS_DATASTANDARD:
+    from dandischema.models import hed_standard  # type: ignore[attr-defined]
+else:
+    hed_standard = None  # type: ignore[assignment]
 from .zarr import ZarrAsset
 from ..consts import ZARR_MIME_TYPE, dandiset_metadata_file
 from ..metadata.core import add_common_metadata, prepare_metadata
@@ -23,10 +40,32 @@
     ValidationResult,
 )
 
+lgr = dandi.get_logger()
+
 BIDS_ASSET_ERRORS = ("BIDS.NON_BIDS_PATH_PLACEHOLDER",)
 BIDS_DATASET_ERRORS = ("BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",)
 
 
+def _add_standard(
+    metadata: BareAsset,
+    standard_dict: dict,
+    version: str | None = None,
+) -> None:
+    """Add a data standard to asset metadata if the field is available."""
+    if not _SCHEMA_BAREASSET_HAS_DATASTANDARD:
+        return
+    kwargs = dict(standard_dict)
+    if version:
+        kwargs["version"] = version
+    standard = StandardsType(**kwargs)
+    # TODO: use metadata.dataStandard directly once min dandischema >= 0.12.2
+    cur = getattr(metadata, "dataStandard", None)
+    if cur is None:
+        setattr(metadata, "dataStandard", [standard])
+    elif standard not in cur:
+        cur.append(standard)
+
+
 @dataclass
 class BIDSDatasetDescriptionAsset(LocalFileAsset):
     """
@@ -192,7 +231,48 @@
         assert self._dataset_errors is not None
         return self._dataset_errors.copy()
 
-    # get_metadata(): inherit use of default metadata from LocalFileAsset
+    def get_metadata(
+        self,
+        digest: Digest | None = None,
+        ignore_errors: bool = True,
+    ) -> BareAsset:
+        metadata = super().get_metadata(digest=digest, ignore_errors=ignore_errors)
+        try:
+            with open(self.filepath) as f:
+                desc = json.load(f)
+        except (OSError, json.JSONDecodeError) as e:
+            lgr.warning("Failed to read %s: %s", self.filepath, e)
+            _add_standard(metadata, bids_standard)
+            return metadata
+        _add_standard(metadata, bids_standard, version=desc.get("BIDSVersion"))
+        if hed_standard and (hed_version := desc.get("HEDVersion")):
+            # HEDVersion can be a string or list.
+            # List form: ["8.2.0", "sc:1.0.0"] where first element is base
+            # HED version and subsequent "prefix:version" entries are library
+            # schemas recorded as extensions.
+            if isinstance(hed_version, list):
+                version = hed_version[0] if hed_version else None
+                library_entries = hed_version[1:]
+            else:
+                version = hed_version
+                library_entries = []
+            kwargs: dict = dict(hed_standard)
+            if version:
+                kwargs["version"] = version
+            if library_entries:
+                extensions = []
+                for entry in library_entries:
+                    # Format is "prefix:version" (e.g. "sc:1.0.0")
+                    if ":" in str(entry):
+                        lib_name, lib_ver = str(entry).split(":", 1)
+                    else:
+                        lib_name, lib_ver = str(entry), None
+                    ext = StandardsType(name=lib_name, version=lib_ver)  # type: ignore[call-arg]
+                    extensions.append(ext.model_dump(mode="json", exclude_none=True))
+                if extensions:
+                    kwargs["extensions"] = extensions
+            _add_standard(metadata, kwargs)
+        return metadata
 
 
 @dataclass
@@ -312,6 +392,8 @@
         add_common_metadata(metadata, self.filepath, start_time, end_time, digest)
         metadata.path = self.path
         metadata.encodingFormat = ZARR_MIME_TYPE
+        if Path(self.path).suffixes == [".ome", ".zarr"]:
+            _add_standard(metadata, ome_ngff_standard)
         return metadata
 
 

diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py
@@ -152,6 +152,49 @@ def _sanitize(v: Any) -> str:
     return None
 
 
+# Namespaces bundled with NWB/HDMF core — not extensions
+_NWB_CORE_NAMESPACES = frozenset({"core", "hdmf-common", "hdmf-experimental"})
+
+
+def get_nwb_extensions(filepath: str | Path | Readable) -> dict[str, str]:
+    """Return NWB extensions embedded in an HDF5 file.
+
+    Reads the ``specifications`` group of an NWB HDF5 file and returns a
+    mapping of extension namespace names to their latest embedded version,
+    excluding core NWB/HDMF namespaces.
+
+    Parameters
+    ----------
+    filepath
+        Path to an NWB ``.nwb`` HDF5 file, or a :class:`Readable`.
+
+    Returns
+    -------
+    dict[str, str]
+        ``{namespace_name: latest_version}`` for each non-core namespace
+        found in the file's ``specifications`` group.  Empty dict if no
+        extensions are present or the group does not exist.
+    """
+    extensions: dict[str, str] = {}
+    with open_readable(filepath) as fp, h5py.File(fp, "r") as h5file:
+        specs = h5file.get("specifications")
+        if specs is None:
+            return extensions
+        for name in specs:
+            if name in _NWB_CORE_NAMESPACES:
+                continue
+            ns_group = specs[name]
+            if not isinstance(ns_group, h5py.Group):
+                continue
+            try:
+                sorted_versions = sorted(ns_group, key=Version)
+                if sorted_versions:
+                    extensions[name] = sorted_versions[-1]
+            except Exception:
+                lgr.debug("Failed to parse versions for NWB extension %s", name)
+    return extensions
+
+
 def get_neurodata_types_to_modalities_map() -> dict[str, str]:
     """Return a dict to map neurodata types known to pynwb to "modalities"
 

diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import json
 from operator import attrgetter
 import os
 from pathlib import Path
@@ -29,6 +30,7 @@
     dandi_file,
     find_dandi_files,
 )
+from ..files.bases import _SCHEMA_BAREASSET_HAS_DATASTANDARD
 
 lgr = get_logger()
 
@@ -621,3 +623,90 @@ def test_validate_invalid_zarr3(path: str, expected_result_ids: set[str]) -> Non
 
     result_ids = {r.id for r in zf.get_validation_errors()}
     assert result_ids == expected_result_ids
+
+
+@pytest.mark.ai_generated
+class TestBIDSDatasetDescriptionDataStandard:
+    """Tests for per-asset dataStandard population from dataset_description.json"""
+
+    @staticmethod
+    def _make_bids_dd(tmp_path: Path, content: dict) -> BIDSDatasetDescriptionAsset:
+        dd_path = tmp_path / "dataset_description.json"
+        dd_path.write_text(json.dumps(content))
+        return BIDSDatasetDescriptionAsset(
+            filepath=dd_path,
+            path="dataset_description.json",
+            dandiset_path=tmp_path,
+        )
+
+    @staticmethod
+    def _standard_names(metadata):  # type: ignore[no-untyped-def]
+        if not _SCHEMA_BAREASSET_HAS_DATASTANDARD:
+            pytest.skip("dandischema too old, no dataStandard on BareAsset")
+        return [s.name for s in (metadata.dataStandard or [])]
+
+    def test_bids_always_set(self, tmp_path: Path) -> None:
+        asset = self._make_bids_dd(
+            tmp_path,
+            {"Name": "Test", "BIDSVersion": "1.9.0"},
+        )
+        names = self._standard_names(asset.get_metadata())
+        assert "Brain Imaging Data Structure (BIDS)" in names
+
+    def test_hed_detected_when_hedversion_present(self, tmp_path: Path) -> None:
+        asset = self._make_bids_dd(
+            tmp_path,
+            {"Name": "Test", "BIDSVersion": "1.9.0", "HEDVersion": "8.2.0"},
+        )
+        names = self._standard_names(asset.get_metadata())
+        assert "Hierarchical Event Descriptors (HED)" in names
+        assert "Brain Imaging Data Structure (BIDS)" in names
+
+    def test_hed_not_detected_when_hedversion_absent(self, tmp_path: Path) -> None:
+        asset = self._make_bids_dd(
+            tmp_path,
+            {"Name": "Test", "BIDSVersion": "1.9.0"},
+        )
+        names = self._standard_names(asset.get_metadata())
+        assert "Hierarchical Event Descriptors (HED)" not in names
+
+    def test_hed_detected_with_list_hedversion(self, tmp_path: Path) -> None:
+        """HEDVersion can be a list of strings per BIDS spec."""
+        asset = self._make_bids_dd(
+            tmp_path,
+            {
+                "Name": "Test",
+                "BIDSVersion": "1.9.0",
+                "HEDVersion": ["8.2.0", "sc:1.0.0"],
+            },
+        )
+        names = self._standard_names(asset.get_metadata())
+        assert "Hierarchical Event Descriptors (HED)" in names
+
+    def test_hed_library_schemas_as_extensions(self, tmp_path: Path) -> None:
+        """HED library schemas in list HEDVersion populate extensions."""
+        if not _SCHEMA_BAREASSET_HAS_DATASTANDARD:
+            pytest.skip("dandischema too old, no dataStandard on BareAsset")
+        asset = self._make_bids_dd(
+            tmp_path,
+            {
+                "Name": "Test",
+                "BIDSVersion": "1.9.0",
+                "HEDVersion": ["8.2.0", "sc:1.0.0", "lang:1.1.0"],
+            },
+        )
+        metadata = asset.get_metadata()
+        hed_standards = [
+            # TODO: use metadata.dataStandard directly once min dandischema >= 0.12.2
+            s
+            for s in (getattr(metadata, "dataStandard", None) or [])
+            if s.name == "Hierarchical Event Descriptors (HED)"
+        ]
+        assert len(hed_standards) == 1
+        hed = hed_standards[0]
+        assert hed.version == "8.2.0"
+        assert hed.extensions is not None
+        ext_names = {e.name for e in hed.extensions}
+        assert ext_names == {"sc", "lang"}
+        ext_map = {e.name: e.version for e in hed.extensions}
+        assert ext_map == {"sc": "1.0.0", "lang": "1.1.0"}
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,3 +16,4 @@ sandbox/ @@
     venv/
     venvs/
     .DS_Store
+    uv.lock