From f4675e57bdc017b92771913e01706894eb43675a Mon Sep 17 00:00:00 2001
From: Alireza Hajebrahimi <6937697+iarata@users.noreply.github.com>
Date: Thu, 26 Mar 2026 13:00:44 +0100
Subject: [PATCH] Refactored v0.2.0

---
 .github/workflows/ci.yml                      |   29 +
 .github/workflows/release.yml                 |  132 ++
 CHANGELOG.md                                  |   32 +-
 README.md                                     |  214 +--
 docs/_static/.gitkeep                         |    1 +
 docs/_templates/.gitkeep                      |    1 +
 docs/api.rst                                  |  142 ++
 docs/architecture.rst                         |   13 +
 docs/cli.rst                                  |   13 +
 docs/conf.py                                  |   24 +
 docs/index.rst                                |   11 +
 docs/overview.rst                             |    8 +
 examples/demo.ipynb                           |  634 ++------
 examples/disc.py                              |   97 +-
 examples/generated_types.py                   |  332 ++--
 examples/main.py                              |   80 +-
 pyproject.toml                                |   29 +-
 src/carp/__init__.py                          |   22 +-
 src/carp/cli.py                               |  137 +-
 src/carp/commandline/__init__.py              |    1 +
 src/carp/commandline/app.py                   |   56 +
 src/carp/commandline/common.py                |   48 +
 src/carp/commandline/convert.py               |   28 +
 src/carp/commandline/count.py                 |   23 +
 src/carp/commandline/export.py                |   54 +
 src/carp/commandline/participants.py          |   22 +
 src/carp/commandline/schema.py                |   22 +
 src/carp/constants.py                         |    7 +
 src/carp/core/__init__.py                     |   18 +
 src/carp/core/dependencies.py                 |   33 +
 src/carp/core/fields.py                       |   56 +
 src/carp/core/files.py                        |   53 +
 src/carp/core/models.py                       |   28 +
 src/carp/core/naming.py                       |   20 +
 src/carp/export/__init__.py                   |    5 +
 src/carp/export/service.py                    |   93 ++
 src/carp/frames/__init__.py                   |    5 +
 src/carp/frames/service.py                    |  139 ++
 src/carp/participants/__init__.py             |    7 +
 src/carp/participants/directory.py            |  152 ++
 src/carp/participants/parser.py               |   78 +
 src/carp/participants/service.py              |   51 +
 src/carp/participants/view.py                 |  105 ++
 src/carp/plotting/__init__.py                 |   11 +-
 src/carp/plotting/map_viz.py                  |  416 -----
 src/carp/plotting/prepare.py                  |   81 +
 src/carp/plotting/render.py                   |   56 +
 src/carp/plotting/service.py                  |  130 ++
 src/carp/reader.py                            | 1417 -----------------
 src/carp/records/__init__.py                  |    5 +
 src/carp/records/service.py                   |   81 +
 src/carp/schema/__init__.py                   |    5 +
 src/carp/schema/service.py                    |   30 +
 src/carp/study.py                             |   47 +
 src/carp/types/__init__.py                    |    5 +
 src/carp/types/infer.py                       |   64 +
 src/carp/types/render.py                      |   97 ++
 src/carp/types/service.py                     |   28 +
 tests/conftest.py                             |   33 +
 .../multi_phase/phase_a/data-streams.json     |  109 ++
 .../multi_phase/phase_a/participant-data.json |   64 +
 .../multi_phase/phase_b/data-streams.json     |  133 ++
 .../multi_phase/phase_b/participant-data.json |   46 +
 tests/test_cli.py                             |   57 +
 tests/test_core.py                            |   55 +
 tests/test_edge_frames_plotting.py            |   84 +
 tests/test_edge_types_cli.py                  |  103 ++
 tests/test_export.py                          |   24 +
 tests/test_frames.py                          |   29 +
 tests/test_participants.py                    |   42 +
 tests/test_real_data.py                       |   23 +
 tests/test_records_schema.py                  |   29 +
 tests/test_structure.py                       |   22 +
 tests/test_types_plotting.py                  |   44 +
 uv.lock                                       |  272 ++++
 75 files changed, 3700 insertions(+), 2997 deletions(-)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 .github/workflows/release.yml
 create mode 100644 docs/_static/.gitkeep
 create mode 100644 docs/_templates/.gitkeep
 create mode 100644 docs/api.rst
 create mode 100644 docs/architecture.rst
 create mode 100644 docs/cli.rst
 create mode 100644 docs/conf.py
 create mode 100644 docs/index.rst
 create mode 100644 docs/overview.rst
 create mode 100644 src/carp/commandline/__init__.py
 create mode 100644 src/carp/commandline/app.py
 create mode 100644 src/carp/commandline/common.py
 create mode 100644 src/carp/commandline/convert.py
 create mode 100644 src/carp/commandline/count.py
 create mode 100644 src/carp/commandline/export.py
 create mode 100644 src/carp/commandline/participants.py
 create mode 100644 src/carp/commandline/schema.py
 create mode 100644 src/carp/constants.py
 create mode 100644 src/carp/core/__init__.py
 create mode 100644 src/carp/core/dependencies.py
 create mode 100644 src/carp/core/fields.py
 create mode 100644 src/carp/core/files.py
 create mode 100644 src/carp/core/models.py
 create mode 100644 src/carp/core/naming.py
 create mode 100644 src/carp/export/__init__.py
 create mode 100644 src/carp/export/service.py
 create mode 100644 src/carp/frames/__init__.py
 create mode 100644 src/carp/frames/service.py
 create mode 100644 src/carp/participants/__init__.py
 create mode 100644 src/carp/participants/directory.py
 create mode 100644 src/carp/participants/parser.py
 create mode 100644 src/carp/participants/service.py
 create mode 100644 src/carp/participants/view.py
 delete mode 100644 src/carp/plotting/map_viz.py
 create mode 100644 src/carp/plotting/prepare.py
 create mode 100644 src/carp/plotting/render.py
 create mode 100644 src/carp/plotting/service.py
 delete mode 100644 src/carp/reader.py
 create mode 100644 src/carp/records/__init__.py
 create mode 100644 src/carp/records/service.py
 create mode 100644 src/carp/schema/__init__.py
 create mode 100644 src/carp/schema/service.py
 create mode 100644 src/carp/study.py
 create mode 100644 src/carp/types/__init__.py
 create mode 100644 src/carp/types/infer.py
 create mode 100644 src/carp/types/render.py
 create mode 100644 src/carp/types/service.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/fixtures/multi_phase/phase_a/data-streams.json
 create mode 100644 tests/fixtures/multi_phase/phase_a/participant-data.json
 create mode 100644 tests/fixtures/multi_phase/phase_b/data-streams.json
 create mode 100644 tests/fixtures/multi_phase/phase_b/participant-data.json
 create mode 100644 tests/test_cli.py
 create mode 100644 tests/test_core.py
 create mode 100644 tests/test_edge_frames_plotting.py
 create mode 100644 tests/test_edge_types_cli.py
 create mode 100644 tests/test_export.py
 create mode 100644 tests/test_frames.py
 create mode 100644 tests/test_participants.py
 create mode 100644 tests/test_real_data.py
 create mode 100644 tests/test_records_schema.py
 create mode 100644 tests/test_structure.py
 create mode 100644 tests/test_types_plotting.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..5546673
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,29 @@
+name: ci
+
+on:
+  push:
+    branches:
+      - "**"
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -e .
+          python -m pip install pytest pytest-cov mypy ruff sphinx sphinx-rtd-theme pandas pyarrow folium matplotlib
+      - name: Lint
+        run: ruff check src examples tests docs
+      - name: Type check
+        run: mypy src/carp
+      - name: Test
+        run: pytest --cov=src/carp --cov-branch --cov-fail-under=100
+      - name: Build docs
+        run: sphinx-build -b html docs docs/_build/html
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..05eebb6
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,132 @@
+name: release
+
+on:
+  push:
+    tags:
+      - "**"
+
+concurrency:
+  group: release-${{ github.ref }}
+  cancel-in-progress: false
+
+jobs:
+  validate_tag:
+    runs-on: ubuntu-latest
+    outputs:
+      version: ${{ steps.version.outputs.version }}
+      tag: ${{ steps.version.outputs.tag }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+      - id: version
+        name: Validate tag against package version
+        run: |
+          version=$(python - <<'PY'
+          import pathlib
+          import tomllib
+
+          project = tomllib.loads(pathlib.Path("pyproject.toml").read_text())
+          print(project["project"]["version"])
+          PY
+          )
+          tag="${GITHUB_REF_NAME}"
+          if [ "${tag}" != "${version}" ] && [ "${tag}" != "v${version}" ]; then
+            echo "Tag ${tag} does not match package version ${version}." >&2
+            exit 1
+          fi
+          echo "version=${version}" >> "${GITHUB_OUTPUT}"
+          echo "tag=${tag}" >> "${GITHUB_OUTPUT}"
+
+  test:
+    needs: validate_tag
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -e .
+          python -m pip install pytest pytest-cov pandas pyarrow folium matplotlib
+      - name: Run tests
+        run: pytest --cov=src/carp --cov-branch --cov-fail-under=100
+
+  quality:
+    needs: validate_tag
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -e .
+          python -m pip install pytest pytest-cov mypy ruff sphinx sphinx-rtd-theme pandas pyarrow folium matplotlib
+      - name: Lint
+        run: ruff check src examples tests docs
+      - name: Type check
+        run: mypy src/carp
+      - name: Build docs
+        run: sphinx-build -W -b html docs docs/_build/html
+
+  build:
+    needs: [test, quality]
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+      - name: Build distributions
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install build twine
+          python -m build
+          python -m twine check dist/*
+      - uses: actions/upload-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+
+  publish_pypi:
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+    permissions:
+      id-token: write
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+      - uses: pypa/gh-action-pypi-publish@release/v1
+
+  publish_github:
+    needs: [validate_tag, publish_pypi]
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+      - name: Generate checksums
+        run: shasum -a 256 dist/* > dist/SHA256SUMS.txt
+      - uses: softprops/action-gh-release@v2
+        with:
+          name: Release ${{ needs.validate_tag.outputs.tag }}
+          tag_name: ${{ needs.validate_tag.outputs.tag }}
+          generate_release_notes: true
+          files: dist/*
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 681e02e..7932cbe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,13 +1,35 @@
 # Changelog
 
-All notable changes to this project will be documented in this file.
+## [0.2.0] - 2026-03-26
 
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+### Added
+
+- New `CarpStudy` public API as the primary entrypoint for CARP study analysis
+- Modular service layout under `carp.core`, `participants`, `records`, `schema`, `export`, `frames`, `types`, `plotting`, and `commandline`
+- Self-contained pytest suite with committed multi-phase fixtures and optional `sleep-data` smoke coverage
+- 100% line and branch coverage enforcement for `src/carp`
+- Sphinx documentation site with autodoc and Napoleon support
+- GitHub Actions CI for linting, type-checking, tests, and docs builds
+- Tag-driven CD workflow that validates version tags, publishes to PyPI, and creates GitHub releases
+- Dedicated `test` and `docs` dependency groups
+
+### Changed
+
+- Replaced the legacy method-heavy design with a thin `CarpStudy` composition root and focused services
+- Kept the `carp` CLI command set stable while rewriting the implementation behind modular handlers
+- Switched plotting defaults to `dk.cachet.carp.location`
+- Made parquet filenames namespace-aware to avoid same-name type collisions
+- Added Google-style docstrings and expanded type annotations across the package
+- Refreshed the README, example scripts, generated type example, and notebook to use the new API
+- Normalized Ruff, MyPy, coverage, and documentation build configuration in `pyproject.toml`
+
+### Removed
 
-## [Unreleased]
+- Legacy `carp.reader` monolith
+- Legacy `carp.plotting.map_viz` module
+- Old `CarpDataStream`-centric example usage and stale plotting/type-generation references
 
-## [0.1.0] - 2024-12-02
+## [0.1.0]
 
 ### Added
 
diff --git a/README.md b/README.md
index 87a5fb1..574cfb4 100644
--- a/README.md
+++ b/README.md
@@ -4,202 +4,76 @@
 [![Python versions](https://img.shields.io/pypi/pyversions/carp-analytics-python.svg)](https://pypi.org/project/carp-analytics-python/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
-A high-performance Python library for processing and analysing data from [CARP](https://carp.computerome.dk/) (Copenhagen Research Platform) studies.
-
-> [!BETA]
-> The CARP package is at the beta phase and the APIs and methods might change more often.
-
-## Features
-
-- **Schema Discovery**: Automatically scans and infers the schema of the data
-- **Data Grouping**: Efficiently groups data by any field (e.g., data type, device ID) into separate files
-- **Parquet Export**: Convert JSON data to Parquet for faster subsequent analysis
-- **Participant Management**: Link and track participants across multiple study phases
-- **Visualization**: Generate location heatmaps and other visualizations
-- **Pandas Integration**: Seamlessly work with DataFrames
-
-## Installation
-
-```bash
-pip install carp-analytics-python
-```
-
-### With Optional Dependencies
-
-```bash
-# For pandas/parquet support
-pip install carp-analytics-python[pandas]
-
-# For visualization support
-pip install carp-analytics-python[viz]
-
-# For scientific computing (numpy, scipy, scikit-learn)
-pip install carp-analytics-python[science]
-
-# Install everything
-pip install carp-analytics-python[all]
-```
-
-### Development Installation
-
-```bash
-git clone https://github.com/carp-dk/carp-analytics-python.git
-cd carp-analytics-python
-
-# Using uv (recommended)
-uv sync
-
-# Or using pip
-pip install -e .
-```
+`carp-analytics-python` is a Python library for working with CARP study data. It focuses on streaming JSON records, participant lookup, schema discovery, export, parquet conversion, and optional plotting.
 
 ## Quick Start
 
 ```python
-from carp import CarpDataStream
+from carp import CarpStudy
 
-# Initialize with a data file
-data = CarpDataStream("data/study-phase-1/data-streams.json")
-
-# Scan and print the schema
-data.print_schema()
-
-# Convert to Parquet for faster analysis
-data.convert_to_parquet("output_parquet")
-
-# Load data as a DataFrame
-df = data.get_dataframe("dk.cachet.carp.stepcount", "output_parquet")
-print(df.head())
+study = CarpStudy("sleep-data/phase-1-1/data-streams.json")
+print(study.records.count())
+print(study.participants.summary_rows()[0])
 ```
 
-## Working with Participants
-
-```python
-from carp import CarpDataStream
-
-# Load data from multiple phases
-data = CarpDataStream([
-    "data/phase-1/data-streams.json",
-    "data/phase-2/data-streams.json",
-])
-
-# Print participant summary
-data.print_participants()
-
-# Access participant data via email
-participant = data.participant("user@example.com")
-
-# Get participant info
-print(participant.info())
-
-# Get available data types for this participant
-participant.print_data_types()
-
-# Get a DataFrame of step count data
-df = participant.dataframe("dk.cachet.carp.stepcount", "output_parquet")
-```
+## Main API
 
-## Data Export
+`CarpStudy` is the primary entrypoint.
 
 ```python
-# Export specific data type to JSON
-data.export_to_json("heartbeat_data.json", data_type="dk.cachet.carp.heartbeat")
+from carp import CarpStudy
 
-# Group data by data type
-data.group_by_field("dataStream.dataType.name", "output_by_type")
+study = CarpStudy([
+    "sleep-data/phase-1-1/data-streams.json",
+    "sleep-data/phase-2-1/data-streams.json",
+])
 
-# Group data by participant
-data.group_by_participant("output_by_participant")
+study.schema.scan()
+study.export.export_json("output.json", data_type="dk.cachet.carp.stepcount")
+study.frames.convert_to_parquet("output_parquet")
+study.participant("alice@example.com").info()
 ```
 
-## Visualization
+## CLI
 
-```python
-# Generate location heatmap for a participant
-participant = data.participant("user@example.com")
-participant.visualize.location(output_file="user_locations.html")
+```bash
+carp schema sleep-data/phase-1-1/data-streams.json
+carp count sleep-data/phase-1-1/data-streams.json
+carp participants sleep-data/phase-1-1/data-streams.json
+carp export sleep-data/phase-1-1/data-streams.json -o output.json -t dk.cachet.carp.stepcount
+carp group sleep-data/phase-1-1/data-streams.json -o grouped_output
+carp convert sleep-data/phase-1-1/data-streams.json -o output_parquet
 ```
 
-## Command Line Interface
+## Documentation
 
-The package includes a CLI for common operations:
+The docs are built with Sphinx, `autodoc`, and `napoleon`.
 
 ```bash
-# Show schema of data files
-carp schema data/study/data-streams.json
-
-# Convert JSON to Parquet
-carp convert data/study/data-streams.json -o output_parquet
-
-# Count items in data files
-carp count data/study/data-streams.json
-
-# List participants
-carp participants data/study/data-streams.json
-
-# Export filtered data
-carp export data/study/data-streams.json -o output.json -t dk.cachet.carp.stepcount
-
-# Group data by field
-carp group data/study/data-streams.json -f dataStream.dataType.name -o grouped_output
+python -m pip install sphinx sphinx-rtd-theme
+sphinx-build -b html docs docs/_build/html
 ```
 
-## API Reference
-
-### `CarpDataStream`
-
-The main class for working with CARP data streams.
-
-| Method | Description |
-|--------|-------------|
-| `scan_schema()` | Scan and infer the data schema |
-| `print_schema()` | Print the inferred schema as a table |
-| `convert_to_parquet(output_dir)` | Convert JSON to Parquet files |
-| `get_dataframe(data_type, parquet_dir)` | Load data as a pandas DataFrame |
-| `export_to_json(output_path, data_type)` | Export data to JSON file |
-| `group_by_field(field_path, output_dir)` | Group data by a specific field |
-| `participant(email)` | Access participant data via fluent API |
-| `print_participants()` | Print participant summary table |
-
-### `ParticipantAccessor`
-
-Fluent API for accessing individual participant data.
+## Release Automation
 
-| Method | Description |
-|--------|-------------|
-| `info()` | Get participant information as a dictionary |
-| `print_info()` | Print participant info as a table |
-| `all_data(data_type)` | Generator for all participant data |
-| `data_types()` | Get all unique data types |
-| `dataframe(data_type, parquet_dir)` | Get data as a pandas DataFrame |
-| `visualize.location()` | Generate location heatmap |
+Pushing a new version tag triggers the release workflow. The tag must match the
+package version in `pyproject.toml` as either `0.1.0` or `v0.1.0`.
 
-## Requirements
+The release workflow reruns tests, linting, type checks, docs builds, and
+package builds before it publishes the distributions to PyPI and attaches the
+same artifacts to a GitHub release.
 
-- Python 3.10+
-- ijson (for streaming JSON parsing)
-- rich (for terminal output)
-- tqdm (for progress bars)
+PyPI publishing uses GitHub Actions trusted publishing. Configure a trusted
+publisher on PyPI for this repository and the `release` workflow, with the
+`pypi` environment enabled in GitHub.
 
-Optional:
-- pandas, pyarrow (for DataFrame and Parquet support)
-- matplotlib, folium (for visualization)
-- numpy, scipy, scikit-learn (for scientific computing)
+## Examples
 
-## Contributing
-
-Contributions are welcome! Please feel free to submit a Pull Request.
-
-1. Fork the repository
-2. Create your feature branch (`git checkout -b feature/featA`)
-3. Commit your changes (`git commit -m 'Add some featA'`)
-4. Push to the branch (`git push origin feature/featA`)
-5. Open a Pull Request
-
-## Licence
-
-This project is licensed under the MIT Licence - see the [Licence](LICENSE) file for details.
+```bash
+python examples/main.py sleep-data/phase-1-1/data-streams.json
+python examples/disc.py sleep-data/phase-1-1/data-streams.json
+```
 
-## Acknowledgments
+## Optional Dependencies
 
-- [CARP - Copenhagen Research Platform](https://carp.dk/)
+`pandas` and `pyarrow` enable dataframe and parquet support. `folium` enables plotting.
diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/docs/_static/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/docs/_templates/.gitkeep b/docs/_templates/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/docs/_templates/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/docs/api.rst b/docs/api.rst
new file mode 100644
index 0000000..45844b3
--- /dev/null
+++ b/docs/api.rst
@@ -0,0 +1,142 @@
+API Reference
+=============
+
+.. automodule:: carp
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.study
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.core.models
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.core.fields
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.core.files
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.core.naming
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.core.dependencies
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.participants.parser
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.participants.directory
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.participants.view
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.participants.service
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.records.service
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.schema.service
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.export.service
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.frames.service
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.types.infer
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.types.render
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.types.service
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.plotting.prepare
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.plotting.render
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.plotting.service
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.commandline.app
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.commandline.common
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.commandline.schema
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.commandline.count
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.commandline.participants
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.commandline.export
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: carp.commandline.convert
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/architecture.rst b/docs/architecture.rst
new file mode 100644
index 0000000..4500287
--- /dev/null
+++ b/docs/architecture.rst
@@ -0,0 +1,13 @@
+Architecture
+============
+
+The package is intentionally split into small services:
+
+* ``carp.study`` composes the public `CarpStudy` entrypoint.
+* ``carp.participants`` handles participant parsing and lookup.
+* ``carp.records`` streams and filters JSON records.
+* ``carp.schema`` infers measurement schemas.
+* ``carp.export`` writes JSON output and grouped files.
+* ``carp.frames`` loads pandas dataframes and writes parquet files.
+* ``carp.types`` generates dataclasses from sampled records.
+* ``carp.plotting`` renders HTML maps for participant data.
diff --git a/docs/cli.rst b/docs/cli.rst
new file mode 100644
index 0000000..e15895a
--- /dev/null
+++ b/docs/cli.rst
@@ -0,0 +1,13 @@
+CLI
+===
+
+The command line interface exposes the same core flows as the Python API.
+
+.. code-block:: bash
+
+   carp schema sleep-data/phase-1-1/data-streams.json
+   carp count sleep-data/phase-1-1/data-streams.json
+   carp participants sleep-data/phase-1-1/data-streams.json
+   carp export sleep-data/phase-1-1/data-streams.json -o output.json -t dk.cachet.carp.stepcount
+   carp group sleep-data/phase-1-1/data-streams.json -o grouped_output
+   carp convert sleep-data/phase-1-1/data-streams.json -o output_parquet
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..ab221f3
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,24 @@
+"""Sphinx configuration for CARP Analytics."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+project_root = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(project_root / "src"))
+
+project = "CARP Analytics Python"
+author = "CARP Team"
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+]
+autodoc_typehints = "description"
+napoleon_google_docstring = True
+napoleon_numpy_docstring = False
+templates_path = ["_templates"]
+exclude_patterns = ["_build"]
+html_theme = "sphinx_rtd_theme"
+html_static_path = ["_static"]
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..e024384
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,11 @@
+CARP Analytics Python
+=====================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents
+
+   overview
+   api
+   cli
+   architecture
diff --git a/docs/overview.rst b/docs/overview.rst
new file mode 100644
index 0000000..ce5e00c
--- /dev/null
+++ b/docs/overview.rst
@@ -0,0 +1,8 @@
+Overview
+========
+
+`carp-analytics-python` is built around :class:`carp.study.CarpStudy`.
+It provides services for records, participants, schema discovery, export,
+dataframe conversion, type generation, and plotting.
+
+The package is documented with Google-style docstrings and Sphinx autodoc.
diff --git a/examples/demo.ipynb b/examples/demo.ipynb
index a6bfc40..a8b8e60 100644
--- a/examples/demo.ipynb
+++ b/examples/demo.ipynb
@@ -1,504 +1,134 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c33366d9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "from pathlib import Path\n",
-    "\n",
-    "if \"src\" not in sys.path:\n",
-    "    sys.path.append(str(Path.cwd() / \"src\"))\n",
-    "\n",
-    "from sleepiness import SleepinessData\n",
-    "\n",
-    "file_paths = [\n",
-    "    \"sleep-data/phase-1-1/data-streams.json\",\n",
-    "    \"sleep-data/phase-2-1/data-streams.json\",\n",
-    "    \"sleep-data/phase-3-1/data-streams.json\"\n",
-    "]\n",
-    "# OR\n",
-    "# file_paths = \"data/phase-1-1/data-streams.json\"\n",
-    "\n",
-    "sd = SleepinessData(file_paths)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "52dc794a",
-   "metadata": {},
-   "source": [
-    "## Participant Data Integration\n",
-    "When loading multiple data folders, the library automatically loads `participant-data.json` from each folder and unifies participants across folders (using email/SSN as identifiers)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "18e00b45",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# View all participants across all loaded data folders\n",
-    "sd.print_participants()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "914c2bed",
-   "metadata": {},
-   "source": [
-    "### Data with Participant Info\n",
-    "Iterate through data items enriched with participant information:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d35b3bd5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Get participant info for a specific deployment\n",
-    "# for item in sd._get_item_generator():\n",
-    "#     deployment_id = item.get('studyDeploymentId')\n",
-    "#     if deployment_id:\n",
-    "#         participant = sd.get_participant(deployment_id)\n",
-    "#         if participant:\n",
-    "#             print(f\"Deployment: {deployment_id[:30]}...\")\n",
-    "#             print(f\"  Unified ID: {participant.unified_participant_id}\")\n",
-    "#             print(f\"  Email: {participant.email}\")\n",
-    "#             print(f\"  Source folder: {participant.source_folder}\")\n",
-    "#         break\n",
-    "\n",
-    "# Get participant info\n",
-    "sd.participant(\"test@example.com\").info()\n",
-    "sd.participant(\"test@example.com\").print_info()\n",
-    "\n",
-    "# Get all data for this participant\n",
-    "count = 0\n",
-    "for item in sd.participant(\"test@example.com\").all_data():\n",
-    "    print(item)\n",
-    "    count += 1\n",
-    "    if count >= 5:\n",
-    "        print(\"Limit output for demo\")\n",
-    "        break\n",
-    "\n",
-    "# Filter by data type\n",
-    "for item in sd.participant(\"test@example.com\").all_data(\"dk.cachet.carp.location\"):\n",
-    "    print(item)\n",
-    "\n",
-    "# See available fields\n",
-    "sd.participant(\"test@example.com\").available_fields()\n",
-    "sd.participant(\"test@example.com\").print_available_fields()\n",
-    "\n",
-    "# See data types available\n",
-    "sd.participant(\"test@example.com\").data_types()\n",
-    "sd.participant(\"test@example.com\").print_data_types()\n",
-    "\n",
-    "# Get count\n",
-    "sd.participant(\"test@example.com\").count()\n",
-    "\n",
-    "# Get DataFrame\n",
-    "df = sd.participant(\"test@example.com\").dataframe(\"dk.cachet.carp.stepcount\")\n",
-    "\n",
-    "# Check if exists\n",
-    "sd.participant(\"test@example.com\").exists"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6145e273",
-   "metadata": {},
-   "source": [
-    "### DataFrame with Participant Info\n",
-    "Get a DataFrame enriched with participant columns:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "507cb8e8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Get DataFrame with participant columns\n",
-    "df = sd.get_dataframe_with_participants(\"dk.cachet.carp.stepcount\")\n",
-    "if df is not None and not df.empty:\n",
-    "    print(df[['participant_id', 'participant_email', 'participant_folder']].head())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d158d50b",
-   "metadata": {},
-   "source": [
-    "### Visualize Participant Data on Map\n",
-    "Generate a heatmap aggregating data for a specific participant across all their deployments:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "761607c2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sleepiness.plotting import LocationVisualizer\n",
-    "\n",
-    "# Create visualizer\n",
-    "viz = LocationVisualizer(sd)\n",
-    "\n",
-    "# Plot heatmap for a specific participant (e.g., P0002 who appears in all 3 phases)\n",
-    "viz.plot_participant_heatmap(\n",
-    "    unified_participant_id=\"P0002\",  # Choose a participant from the summary table\n",
-    "    output_file=\"participant_heatmap.html\",\n",
-    "    location_type=\"dk.cachet.carp.location\"\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f9477bd0",
-   "metadata": {},
-   "source": [
-    "## 1. Schema Discovery\n",
-    "Scan the file to understand the structure of the data."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5877bce4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sd.print_schema()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "77f655a6",
-   "metadata": {},
-   "source": [
-    "### Generate Type Definitions\n",
-    "You can generate a Python module with dataclasses representing the data schema. This allows for type-safe access to the data, including nested JSON objects."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e6bbdbd5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import importlib\n",
-    "import sleepiness.reader\n",
-    "importlib.reload(sleepiness.reader)\n",
-    "\n",
-    "# Re-initialize sd to ensure latest code is used\n",
-    "sd = sleepiness.reader.SleepinessData(file_paths)\n",
-    "sd.generate_type_definitions(output_file=\"generated_types.py\", sample_size=500)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a2fd20bb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example usage of generated types\n",
-    "try:\n",
-    "    import generated_types\n",
-    "    import importlib\n",
-    "    importlib.reload(generated_types)\n",
-    "    \n",
-    "    # Read one item and convert\n",
-    "    gen = sd._get_item_generator()\n",
-    "    item = next(gen)\n",
-    "    \n",
-    "    obj = generated_types.SleepinessItem.from_dict(item)\n",
-    "    print(f\"Converted object type: {type(obj)}\")\n",
-    "    if obj.dataStream and obj.dataStream.dataType:\n",
-    "        print(f\"Data Stream: {obj.dataStream.dataType.name}\")\n",
-    "except ImportError:\n",
-    "    print(\"Could not import generated_types. Please restart kernel or check file.\")\n",
-    "except Exception as e:\n",
-    "    print(f\"Error: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "62ff430f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "item = next(sd._get_item_generator())\n",
-    "obj = generated_types.SleepinessItem.from_dict(item)\n",
-    "sd.generate_type_definitions(output_file=\"generated_types.py\", sample_size=500)\n",
-    "\n",
-    "item = next(sd._get_item_generator())\n",
-    "obj = generated_types.SleepinessItem.from_dict(item)\n",
-    "\n",
-    "# Type-safe access\n",
-    "print(obj.dataStream.dataType.name)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f243a62f",
-   "metadata": {},
-   "source": [
-    "## 2. Count Items\n",
-    "Count the total number of records in the file."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "814969c3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "count = sd.count_items()\n",
-    "print(f\"Total items: {count}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "3f357eba",
-   "metadata": {},
-   "source": [
-    "## 3. Grouping Data\n",
-    "Split the large JSON file into smaller files based on the data type."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0e151d64",
-   "metadata": {},
-   "source": [
-    "### Explore Available Fields\n",
-    "You can scan a sample of the data to list all available fields in dot-notation. This is helpful for deciding which field to group by."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5c3c6be4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fields = sd.list_all_fields(sample_size=500)\n",
-    "print(\"Available fields for grouping:\")\n",
-    "for f in fields:\n",
-    "    print(f\" - {f}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "56ffebb8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "output_groups = \"output_groups\"\n",
-    "# sd.group_by_field(\"dataStream.studyDeploymentId\", output_groups)\n",
-    "sd.group_by_email(output_groups)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "3f9f3497",
-   "metadata": {},
-   "source": [
-    "## 4. Export to JSON\n",
-    "Export a specific data type to a separate JSON file."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2e9aa571",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sd.export_to_json(\"heartbeat.json\", data_type=\"dk.cachet.carp.heartbeat\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fc1c9eb8",
-   "metadata": {},
-   "source": [
-    "## 5. Convert to Parquet\n",
-    "Convert the data to Parquet format for efficient storage and loading."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9a648c91",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "parquet_dir = \"output_parquet\"\n",
-    "sd.convert_to_parquet(parquet_dir)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b5f02117",
-   "metadata": {},
-   "source": [
-    "## 6. Load DataFrame\n",
-    "Load data into a pandas DataFrame, utilizing the Parquet files if available."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9d9112e1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load stepcount data\n",
-    "df = sd.get_dataframe(\"dk.cachet.carp.completedtask\", parquet_dir)\n",
-    "\n",
-    "if df is not None:\n",
-    "    print(f\"Loaded {len(df)} records\")\n",
-    "    display(df.head())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7472a455",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# df first row\n",
-    "df.iloc[313].measurement"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b10095ea",
-   "metadata": {},
-   "source": [
-    "## 7. Plotting\n",
-    "Generate a heatmap of user locations and overlay step count data."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a7d3b527",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sleepiness.plotting import LocationVisualizer\n",
-    "\n",
-    "# Initialize visualizer\n",
-    "viz = LocationVisualizer(sd)\n",
-    "\n",
-    "# Pick a user ID (you can find one from the grouping step or list_all_fields)\n",
-    "# For demo purposes, let's try to find a valid ID from the loaded dataframe if available, \n",
-    "# or just use a hardcoded one if you know it.\n",
-    "study_deployment_id = \"0efd5a7f-6428-48db-8099-8d65a62606b4\" # Example ID\n",
-    "\n",
-    "# Generate heatmap\n",
-    "# Note: Ensure you have 'dk.cachet.carp.geolocation' and 'dk.cachet.carp.stepcount' data available\n",
-    "# You might need to run convert_to_parquet first if you haven't.\n",
-    "\n",
-    "\n",
-    "viz.plot_user_heatmap(\n",
-    "    study_deployment_id=study_deployment_id,\n",
-    "    location_type=\"dk.cachet.carp.location\", # Adjust type name if different\n",
-    "    step_type=\"dk.cachet.carp.stepcount\",       # Adjust type name if different\n",
-    "    output_file=\"user_heatmap.html\"\n",
-    ")\n",
-    "\n",
-    "# Display the map in the notebook\n",
-    "# from IPython.display import IFrame\n",
-    "# IFrame(src='user_heatmap.html', width=700, height=600)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "63223a42",
-   "metadata": {},
-   "source": [
-    "### Plotting with Type-Safe Objects\n",
-    "You can also convert the data to type-safe objects and pass them directly to the visualizer. This is useful if you want to manipulate the objects before plotting."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "997894b1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# 1. Get DataFrames\n",
-    "df_loc = sd.get_dataframe(\"dk.cachet.carp.location\", parquet_dir)\n",
-    "df_steps = sd.get_dataframe(\"dk.cachet.carp.stepcount\", parquet_dir)\n",
-    "\n",
-    "# 2. Filter by User\n",
-    "# Using the same ID as above\n",
-    "if df_loc is not None and not df_loc.empty:\n",
-    "    df_loc_user = df_loc[df_loc['studyDeploymentId'] == study_deployment_id]\n",
-    "    df_steps_user = df_steps[df_steps['studyDeploymentId'] == study_deployment_id] if df_steps is not None else pd.DataFrame()\n",
-    "\n",
-    "    # 3. Convert to Objects\n",
-    "    # Note: generated_types.SleepinessItem.from_dict expects a dictionary structure matching the JSON.\n",
-    "    # If df_loc comes from Parquet, it might have nested columns as dicts (if read correctly) or flat columns.\n",
-    "    # Let's assume it has nested columns or we convert it.\n",
-    "    \n",
-    "    # If the dataframe has nested dicts (e.g. 'measurement' column contains dicts):\n",
-    "    location_items = [generated_types.SleepinessItem.from_dict(row) for row in df_loc_user.to_dict('records')]\n",
-    "    step_items = [generated_types.SleepinessItem.from_dict(row) for row in df_steps_user.to_dict('records')]\n",
-    "    \n",
-    "    print(f\"Converted {len(location_items)} location items and {len(step_items)} step items.\")\n",
-    "\n",
-    "    # 4. Plot\n",
-    "    viz.plot_heatmap_from_items(\n",
-    "        location_items=location_items,\n",
-    "        step_items=step_items,\n",
-    "        output_file=\"user_heatmap_objects.html\"\n",
-    "    )\n",
-    "    \n",
-    "    # Display\n",
-    "    # IFrame(src='user_heatmap_objects.html', width=700, height=600)\n",
-    "else:\n",
-    "    print(\"No data found to plot.\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "carp-analytics-python (3.13.5)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.13.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# CARP Analytics Notebook Example\n",
+        "\n",
+        "This notebook shows the current `CarpStudy` API with the bundled `sleep-data` dataset or the committed test fixtures."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import sys\n",
+        "from pathlib import Path\n",
+        "\n",
+        "if \"src\" not in sys.path:\n",
+        "    sys.path.append(str(Path.cwd() / \"src\"))\n",
+        "\n",
+        "from carp import CarpStudy\n",
+        "\n",
+        "\n",
+        "def default_paths() -> list[Path]:\n",
+        "    sleep_paths = sorted(Path(\"sleep-data\").glob(\"phase-*/data-streams.json\"))\n",
+        "    if sleep_paths:\n",
+        "        return sleep_paths\n",
+        "    return sorted(Path(\"tests/fixtures/multi_phase\").glob(\"*/data-streams.json\"))\n",
+        "\n",
+        "\n",
+        "file_paths = default_paths()\n",
+        "study = CarpStudy(file_paths)\n",
+        "file_paths"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(f\"Total records: {study.records.count():,}\")\n",
+        "print(f\"Data types: {study.records.data_types()}\")\n",
+        "study.schema.scan()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "participant_rows = study.participants.summary_rows()\n",
+        "participant_rows[:5]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "example_email = next((row[\"emails\"] for row in participant_rows if row[\"emails\"] != \"N/A\"), None)\n",
+        "participant = study.participant(example_email) if example_email else None\n",
+        "participant.info() if participant else None"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "try:\n",
+        "    step_frame = study.frames.get_dataframe(\"dk.cachet.carp.stepcount\")\n",
+        "    step_frame.head()\n",
+        "except RuntimeError as exc:\n",
+        "    print(exc)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "generated_path = Path(\"examples/generated_types.py\")\n",
+        "study.types.generate(generated_path, sample_size=25)\n",
+        "\n",
+        "import generated_types\n",
+        "\n",
+        "first_record = next(study.records.iter_records())\n",
+        "generated_types.StudyItem.from_dict(first_record)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "if participant is not None:\n",
+        "    try:\n",
+        "        participant.plot_location(output_file=\"examples/user_heatmap.html\")\n",
+        "    except RuntimeError as exc:\n",
+        "        print(exc)"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.13"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/examples/disc.py b/examples/disc.py
index 4de8498..0832a0a 100644
--- a/examples/disc.py
+++ b/examples/disc.py
@@ -1,59 +1,38 @@
-# discover_schema.py
-import ijson
-from collections import defaultdict
-import yaml
-
-def discover_schema(file_path):
-    schema = defaultdict(set)
-
-    with open(file_path, 'rb') as f:
-        parser = ijson.parse(f)
-        current_path = []
-        for prefix, event, value in parser:
-            current_path = prefix.split('.')
-            full_path = '.'.join(current_path)
-
-            if event == 'map_key':
-                current_path.append(value)
-                continue
-            elif event in ('start_map', 'start_array'):
-                pass
-            elif event in ('end_map', 'end_array'):
-                if current_path:
-                    current_path.pop()
-                continue
-
-            # leaf value
-            if value is None:
-                type_name = 'null'
-            elif event == 'string':
-                type_name = 'string'
-            elif event in ('number', 'integer'):
-                type_name = 'number'
-            elif event == 'boolean':
-                type_name = 'boolean'
-            else:
-                type_name = event
-
-            schema['.'.join(current_path)].add(type_name)
-
-    # Convert to nice nested dict
-    nested = {}
-    for path, types in schema.items():
-        parts = path.split('.')
-        d = nested
-        for part in parts[:-1]:
-            if part not in d:
-                d[part] = {'_type': 'object', '_children': {}}
-            elif '_children' not in d[part]:
-                d[part]['_children'] = {}
-            d = d[part]['_children']
-        key = parts[-1]
-        d[key] = {'_type': list(types)} if len(types) > 1 else {'_type': list(types)[0]}
-
-    return nested
-
-if __name__ == '__main__':
-    import sys
-    schema = discover_schema(sys.argv[1])
-    print(yaml.dump(schema, default_flow_style=False, sort_keys=False))
\ No newline at end of file
+"""Compact schema-discovery example for `CarpStudy`."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+from carp import CarpStudy
+
+
+def _default_paths() -> list[Path]:
+    """Return bundled data-stream files for schema discovery."""
+
+    sleep_paths = sorted(Path("sleep-data").glob("phase-*/data-streams.json"))
+    if sleep_paths:
+        return sleep_paths
+    return sorted(Path("tests/fixtures/multi_phase").glob("*/data-streams.json"))
+
+
+def main() -> int:
+    """Load a study and print schema and field examples."""
+
+    file_paths = [Path(arg) for arg in sys.argv[1:]] or _default_paths()
+    study = CarpStudy(file_paths, load_participants=False)
+    print("Observed data types:")
+    for data_type in study.records.data_types():
+        print(f"  - {data_type}")
+    print("\nSchema summary:")
+    for data_type, fields in study.schema.scan().items():
+        print(f"  {data_type}: {', '.join(fields)}")
+    print("\nSample field paths:")
+    for field in study.records.list_fields(sample_size=3)[:12]:
+        print(f"  - {field}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/examples/generated_types.py b/examples/generated_types.py
index afd1dbe..7e8f3bf 100644
--- a/examples/generated_types.py
+++ b/examples/generated_types.py
@@ -1,250 +1,122 @@
-# Auto-generated type definitions
+"""Example generated dataclasses for CARP study records."""
 
 from __future__ import annotations
-from dataclasses import dataclass
-from typing import List, Optional, Any, Dict
+
 import json
+from dataclasses import dataclass
+from typing import Any
 
-def parse_json_field(value):
-    if isinstance(value, str):
-        try:
-            return json.loads(value)
-        except:
-            return value
-    return value
-
-@dataclass
-class SleepinessItem:
-    sequenceId: int = None
-    studyDeploymentId: str = None
-    deviceRoleName: str = None
-    measurement: Measurement = None
-    triggerIds: List[int] = None
-    syncPoint: SyncPoint = None
-    dataStream: DataStream = None
 
-    @classmethod
-    def from_dict(cls, obj: Any) -> Any:
-        if not isinstance(obj, dict): return obj
-        instance = cls()
-        val = obj.get('sequenceId')
-        instance.sequenceId = val
-        val = obj.get('studyDeploymentId')
-        instance.studyDeploymentId = val
-        val = obj.get('deviceRoleName')
-        instance.deviceRoleName = val
-        val = obj.get('measurement')
-        if val is not None:
-            instance.measurement = Measurement.from_dict(val)
-        val = obj.get('triggerIds')
-        instance.triggerIds = val
-        val = obj.get('syncPoint')
-        if val is not None:
-            instance.syncPoint = SyncPoint.from_dict(val)
-        val = obj.get('dataStream')
-        if val is not None:
-            instance.dataStream = DataStream.from_dict(val)
-        return instance
-
-@dataclass
-class Measurement:
-    sensorStartTime: int = None
-    data: Data = None
+def parse_json_field(value: Any) -> Any:
+    """Parse JSON text when a field stores serialized payload data."""
+
+    if not isinstance(value, str):
+        return value
+    try:
+        return json.loads(value)
+    except json.JSONDecodeError:
+        return value
+
+
+@dataclass(slots=True)
+class DataType:
+    """Data-type metadata for one CARP record."""
+
+    namespace: str | None = None
+    name: str | None = None
 
     @classmethod
     def from_dict(cls, obj: Any) -> Any:
-        if not isinstance(obj, dict): return obj
-        instance = cls()
-        val = obj.get('sensorStartTime')
-        instance.sensorStartTime = val
-        val = obj.get('data')
-        if val is not None:
-            instance.data = Data.from_dict(val)
-        return instance
-
-@dataclass
-class Data:
-    __type: str = None
-    period: int = None
-    deviceType: str = None
-    deviceRoleName: str = None
-    batteryLevel: int = None
-    batteryStatus: str = None
-    screenEvent: str = None
-    type_: str = None
-    confidence: int = None
-    triggerId: int = None
-    taskName: str = None
-    destinationDeviceRoleName: str = None
-    control: str = None
-    steps: int = None
-    time: str = None
-    speed: float = None
-    isMock: bool = None
-    heading: float = None
-    accuracy: float = None
-    altitude: float = None
-    latitude: float = None
-    longitude: float = None
-    speedAccuracy: float = None
-    headingAccuracy: float = None
-    verticalAccuracy: float = None
-    elapsedRealtimeNanos: int = None
-    elapsedRealtimeUncertaintyNanos: float = None
-    date: str = None
-    sunset: str = None
-    country: str = None
-    sunrise: str = None
-    tempMax: float = None
-    tempMin: float = None
-    areaName: str = None
-    humidity: float = None
-    pressure: float = None
-    windSpeed: float = None
-    cloudiness: float = None
-    windDegree: float = None
-    temperature: float = None
-    weatherMain: str = None
-    weatherDescription: str = None
+        """Build a data-type object from a dictionary."""
+
+        return obj if not isinstance(obj, dict) else cls(obj.get("namespace"), obj.get("name"))
+
+
+@dataclass(slots=True)
+class DataStream:
+    """Stream metadata attached to a CARP record."""
+
+    studyDeploymentId: str | None = None
+    deviceRoleName: str | None = None
+    dataType: DataType | None = None
 
     @classmethod
     def from_dict(cls, obj: Any) -> Any:
-        if not isinstance(obj, dict): return obj
-        instance = cls()
-        val = obj.get('__type')
-        instance.__type = val
-        val = obj.get('period')
-        instance.period = val
-        val = obj.get('deviceType')
-        instance.deviceType = val
-        val = obj.get('deviceRoleName')
-        instance.deviceRoleName = val
-        val = obj.get('batteryLevel')
-        instance.batteryLevel = val
-        val = obj.get('batteryStatus')
-        instance.batteryStatus = val
-        val = obj.get('screenEvent')
-        instance.screenEvent = val
-        val = obj.get('type')
-        instance.type_ = val
-        val = obj.get('confidence')
-        instance.confidence = val
-        val = obj.get('triggerId')
-        instance.triggerId = val
-        val = obj.get('taskName')
-        instance.taskName = val
-        val = obj.get('destinationDeviceRoleName')
-        instance.destinationDeviceRoleName = val
-        val = obj.get('control')
-        instance.control = val
-        val = obj.get('steps')
-        instance.steps = val
-        val = obj.get('time')
-        instance.time = val
-        val = obj.get('speed')
-        instance.speed = val
-        val = obj.get('isMock')
-        instance.isMock = val
-        val = obj.get('heading')
-        instance.heading = val
-        val = obj.get('accuracy')
-        instance.accuracy = val
-        val = obj.get('altitude')
-        instance.altitude = val
-        val = obj.get('latitude')
-        instance.latitude = val
-        val = obj.get('longitude')
-        instance.longitude = val
-        val = obj.get('speedAccuracy')
-        instance.speedAccuracy = val
-        val = obj.get('headingAccuracy')
-        instance.headingAccuracy = val
-        val = obj.get('verticalAccuracy')
-        instance.verticalAccuracy = val
-        val = obj.get('elapsedRealtimeNanos')
-        instance.elapsedRealtimeNanos = val
-        val = obj.get('elapsedRealtimeUncertaintyNanos')
-        instance.elapsedRealtimeUncertaintyNanos = val
-        val = obj.get('date')
-        instance.date = val
-        val = obj.get('sunset')
-        instance.sunset = val
-        val = obj.get('country')
-        instance.country = val
-        val = obj.get('sunrise')
-        instance.sunrise = val
-        val = obj.get('tempMax')
-        instance.tempMax = val
-        val = obj.get('tempMin')
-        instance.tempMin = val
-        val = obj.get('areaName')
-        instance.areaName = val
-        val = obj.get('humidity')
-        instance.humidity = val
-        val = obj.get('pressure')
-        instance.pressure = val
-        val = obj.get('windSpeed')
-        instance.windSpeed = val
-        val = obj.get('cloudiness')
-        instance.cloudiness = val
-        val = obj.get('windDegree')
-        instance.windDegree = val
-        val = obj.get('temperature')
-        instance.temperature = val
-        val = obj.get('weatherMain')
-        instance.weatherMain = val
-        val = obj.get('weatherDescription')
-        instance.weatherDescription = val
-        return instance
-
-@dataclass
-class SyncPoint:
-    synchronizedOn: str = None
-    sensorTimestampAtSyncPoint: int = None
-    relativeClockSpeed: float = None
+        """Build stream metadata from a dictionary."""
+
+        if not isinstance(obj, dict):
+            return obj
+        return cls(
+            studyDeploymentId=obj.get("studyDeploymentId"),
+            deviceRoleName=obj.get("deviceRoleName"),
+            dataType=DataType.from_dict(obj.get("dataType")),
+        )
+
+
+@dataclass(slots=True)
+class MeasurementData:
+    """Common measurement payload used in the examples."""
+
+    steps: int | None = None
+    latitude: float | None = None
+    longitude: float | None = None
+    response_json: Any = None
 
     @classmethod
     def from_dict(cls, obj: Any) -> Any:
-        if not isinstance(obj, dict): return obj
-        instance = cls()
-        val = obj.get('synchronizedOn')
-        instance.synchronizedOn = val
-        val = obj.get('sensorTimestampAtSyncPoint')
-        instance.sensorTimestampAtSyncPoint = val
-        val = obj.get('relativeClockSpeed')
-        instance.relativeClockSpeed = val
-        return instance
-
-@dataclass
-class DataStream:
-    studyDeploymentId: str = None
-    deviceRoleName: str = None
-    dataType: DataType = None
+        """Build a measurement payload from a dictionary."""
+
+        if not isinstance(obj, dict):
+            return obj
+        return cls(
+            steps=obj.get("steps"),
+            latitude=obj.get("latitude"),
+            longitude=obj.get("longitude"),
+            response_json=parse_json_field(obj.get("response_json")),
+        )
+
+
+@dataclass(slots=True)
+class Measurement:
+    """Measurement wrapper for one CARP record."""
+
+    sensorStartTime: int | None = None
+    data: MeasurementData | None = None
 
     @classmethod
     def from_dict(cls, obj: Any) -> Any:
-        if not isinstance(obj, dict): return obj
-        instance = cls()
-        val = obj.get('studyDeploymentId')
-        instance.studyDeploymentId = val
-        val = obj.get('deviceRoleName')
-        instance.deviceRoleName = val
-        val = obj.get('dataType')
-        if val is not None:
-            instance.dataType = DataType.from_dict(val)
-        return instance
-
-@dataclass
-class DataType:
-    namespace: str = None
-    name: str = None
+        """Build a measurement object from a dictionary."""
+
+        if not isinstance(obj, dict):
+            return obj
+        return cls(
+            sensorStartTime=obj.get("sensorStartTime"),
+            data=MeasurementData.from_dict(obj.get("data")),
+        )
+
+
+@dataclass(slots=True)
+class StudyItem:
+    """Example typed CARP record used by the examples notebook."""
+
+    sequenceId: int | None = None
+    studyDeploymentId: str | None = None
+    deviceRoleName: str | None = None
+    triggerIds: list[Any] | None = None
+    measurement: Measurement | None = None
+    dataStream: DataStream | None = None
 
     @classmethod
     def from_dict(cls, obj: Any) -> Any:
-        if not isinstance(obj, dict): return obj
-        instance = cls()
-        val = obj.get('namespace')
-        instance.namespace = val
-        val = obj.get('name')
-        instance.name = val
-        return instance
+        """Build a typed study item from a dictionary."""
+
+        if not isinstance(obj, dict):
+            return obj
+        return cls(
+            sequenceId=obj.get("sequenceId"),
+            studyDeploymentId=obj.get("studyDeploymentId"),
+            deviceRoleName=obj.get("deviceRoleName"),
+            triggerIds=obj.get("triggerIds"),
+            measurement=Measurement.from_dict(obj.get("measurement")),
+            dataStream=DataStream.from_dict(obj.get("dataStream")),
+        )
diff --git a/examples/main.py b/examples/main.py
index 24edf10..0ff30ed 100644
--- a/examples/main.py
+++ b/examples/main.py
@@ -1,41 +1,49 @@
 #!/usr/bin/env python3
-"""
-Example script demonstrating basic usage of the carp-analytics-python library.
+"""End-to-end example usage for `CarpStudy`."""
 
-Run from the project root after installing the package:
-    python examples/main.py data/study/data-streams.json
-"""
+from __future__ import annotations
 
-from carp import CarpDataStream
 import sys
+from pathlib import Path
 
-def main():
-    file_path = "data/study/data-streams.json"
-    if len(sys.argv) > 1:
-        file_path = sys.argv[1]
-        
-    print(f"Loading {file_path}...")
-    data = CarpDataStream(file_path)
-    
-    # Scan and print schema
-    print("Scanning schema...")
-    data.print_schema()
-    
-    # Example: Grouping data by data type
-    # output_dir = "output_groups"
-    # print(f"Grouping data into {output_dir}...")
-    # data.group_by_field("dataStream.dataType.name", output_dir)
-
-    # Convert to Parquet
-    parquet_dir = "output_parquet"
-    data.convert_to_parquet(parquet_dir)
-    
-    # Load back as DataFrame
-    df = data.get_dataframe("dk.cachet.carp.stepcount", parquet_dir)
-    if df is not None:
-        print(f"Loaded {len(df)} stepcount records.")
-        print(df.head())
-
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
+from carp import CarpStudy
+
+
+def _default_paths() -> list[Path]:
+    """Return bundled study paths for the example."""
+
+    sleep_paths = sorted(Path("sleep-data").glob("phase-*/data-streams.json"))
+    if sleep_paths:
+        return sleep_paths
+    fixture_root = Path("tests/fixtures/multi_phase")
+    return sorted(fixture_root.glob("*/data-streams.json"))
+
+
+def main() -> int:
+    """Run the example against one or more study files."""
+
+    file_paths = [Path(arg) for arg in sys.argv[1:]] or _default_paths()
+    study = CarpStudy(file_paths, load_participants=True)
+    print(f"Loaded {len(file_paths)} study file(s)")
+    print(f"Total records: {study.records.count():,}")
+    print(f"Data types: {', '.join(study.records.data_types())}")
+    rows = study.participants.summary_rows()
+    print(f"Unified participants: {len(rows)}")
+    for row in rows[:3]:
+        print(f"  {row['unified_id']}: {row['emails']} ({row['deployments']} deployments)")
+    example_email = next((row["emails"] for row in rows if row["emails"] != "N/A"), None)
+    if example_email:
+        participant = study.participant(example_email)
+        print(f"Example participant: {participant.info()}")
+    try:
+        step_frame = study.frames.get_dataframe("dk.cachet.carp.stepcount")
+    except RuntimeError as exc:
+        print(f"Skipping dataframe example: {exc}")
+    else:
+        print("Step-count preview:")
+        print(step_frame.head().to_string(index=False))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/pyproject.toml b/pyproject.toml
index 0388c4e..f657906 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,6 +80,17 @@ dev = [
     "mypy>=1.5.0",
     "ruff>=0.1.0",
     "pre-commit>=3.4.0",
+    "sphinx>=8.1.3",
+    "sphinx-rtd-theme>=3.1.0",
+]
+test = [
+    "pandas>=2.0.0",
+    "pyarrow>=14.0.0",
+    "matplotlib>=3.7.0",
+    "folium>=0.14.0",
+]
+docs = [
+    "sphinx>=8.0.0",
 ]
 
 [build-system]
@@ -101,7 +112,10 @@ packages = ["src/carp"]
 
 [tool.ruff]
 target-version = "py310"
-line-length = 100
+line-length = 140
+extend-exclude = ["examples/demo.ipynb"]
+
+[tool.ruff.lint]
 select = [
     "E",   # pycodestyle errors
     "W",   # pycodestyle warnings
@@ -116,7 +130,7 @@ ignore = [
     "B008",  # do not perform function calls in argument defaults
 ]
 
-[tool.ruff.isort]
+[tool.ruff.lint.isort]
 known-first-party = ["carp"]
 
 [tool.mypy]
@@ -126,16 +140,25 @@ warn_unused_configs = true
 disallow_untyped_defs = true
 exclude = ["examples/", "tests/"]
 
+[[tool.mypy.overrides]]
+module = ["ijson", "pandas", "pyarrow", "pyarrow.*", "folium", "folium.*"]
+ignore_missing_imports = true
+
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 python_files = ["test_*.py"]
-addopts = "-v --tb=short"
+addopts = "-v --tb=short --cov=src/carp --cov-branch --cov-report=term-missing"
+markers = [
+    "real_data: optional tests that use local sleep-data when available",
+]
 
 [tool.coverage.run]
 source = ["src/carp"]
 branch = true
 
 [tool.coverage.report]
+fail_under = 100
+show_missing = true
 exclude_lines = [
     "pragma: no cover",
     "if TYPE_CHECKING:",
diff --git a/src/carp/__init__.py b/src/carp/__init__.py
index 40bd9bd..e24f3e6 100644
--- a/src/carp/__init__.py
+++ b/src/carp/__init__.py
@@ -1,20 +1,6 @@
-"""
-CARP Analytics Python - A high-performance library for processing CARP study data.
+"""Public package interface for CARP Analytics."""
 
-This library provides tools for streaming, processing, and analysing large JSON
-data streams from CARP (Copenhagen Research Platform) clinical and research studies.
-"""
+from .study import CarpStudy
 
-from .reader import CarpDataStream, ParticipantManager, ParticipantInfo, ParticipantAccessor
-
-__version__ = "0.1.0"
-__author__ = "Copenhagen Research Platform"
-__email__ = "support@carp.dk"
-
-__all__ = [
-    "CarpDataStream",
-    "ParticipantManager",
-    "ParticipantInfo",
-    "ParticipantAccessor",
-    "__version__",
-]
+__all__ = ["CarpStudy"]
+__version__ = "0.2.0"
diff --git a/src/carp/cli.py b/src/carp/cli.py
index 586416f..e751160 100644
--- a/src/carp/cli.py
+++ b/src/carp/cli.py
@@ -1,139 +1,10 @@
-"""
-Command-line interface for CARP Analytics Python.
-"""
+"""Command-line entrypoint for CARP Analytics."""
 
-import argparse
-import sys
-
-from rich.console import Console
-
-console = Console()
+from __future__ import annotations
 
+import sys
 
-def main() -> int:
-    """Main entry point for the CLI."""
-    parser = argparse.ArgumentParser(
-        prog="carp",
-        description="CARP Analytics - Process and analyze data from CARP research studies",
-    )
-    parser.add_argument(
-        "--version",
-        action="store_true",
-        help="Show version and exit",
-    )
-    
-    subparsers = parser.add_subparsers(dest="command", help="Available commands")
-    
-    # Schema command
-    schema_parser = subparsers.add_parser("schema", help="Scan and print data schema")
-    schema_parser.add_argument("files", nargs="+", help="JSON data files to process")
-    
-    # Convert command
-    convert_parser = subparsers.add_parser("convert", help="Convert JSON to Parquet")
-    convert_parser.add_argument("files", nargs="+", help="JSON data files to convert")
-    convert_parser.add_argument(
-        "-o", "--output",
-        default="output_parquet",
-        help="Output directory for Parquet files (default: output_parquet)",
-    )
-    convert_parser.add_argument(
-        "--batch-size",
-        type=int,
-        default=10000,
-        help="Batch size for conversion (default: 10000)",
-    )
-    
-    # Count command
-    count_parser = subparsers.add_parser("count", help="Count items in data files")
-    count_parser.add_argument("files", nargs="+", help="JSON data files to count")
-    
-    # Participants command
-    participants_parser = subparsers.add_parser(
-        "participants",
-        help="List participants from data files",
-    )
-    participants_parser.add_argument("files", nargs="+", help="JSON data files to process")
-    
-    # Export command
-    export_parser = subparsers.add_parser("export", help="Export data to JSON")
-    export_parser.add_argument("files", nargs="+", help="JSON data files to process")
-    export_parser.add_argument(
-        "-o", "--output",
-        required=True,
-        help="Output JSON file path",
-    )
-    export_parser.add_argument(
-        "-t", "--type",
-        dest="data_type",
-        help="Filter by data type (e.g., dk.cachet.carp.stepcount)",
-    )
-    
-    # Group command
-    group_parser = subparsers.add_parser("group", help="Group data by field")
-    group_parser.add_argument("files", nargs="+", help="JSON data files to process")
-    group_parser.add_argument(
-        "-f", "--field",
-        default="dataStream.dataType.name",
-        help="Field path to group by (default: dataStream.dataType.name)",
-    )
-    group_parser.add_argument(
-        "-o", "--output",
-        default="output_grouped",
-        help="Output directory (default: output_grouped)",
-    )
-    
-    args = parser.parse_args()
-    
-    if args.version:
-        from carp import __version__
-        console.print(f"carp-analytics-python version {__version__}")
-        return 0
-    
-    if not args.command:
-        parser.print_help()
-        return 0
-    
-    # Import here to avoid slow startup for --help
-    from carp import CarpDataStream
-    
-    try:
-        if args.command == "schema":
-            sd = CarpDataStream(args.files, load_participants=False)
-            sd.print_schema()
-            
-        elif args.command == "convert":
-            sd = CarpDataStream(args.files, load_participants=False)
-            sd.convert_to_parquet(args.output, batch_size=args.batch_size)
-            
-        elif args.command == "count":
-            sd = CarpDataStream(args.files, load_participants=False)
-            count = sd.count_items()
-            console.print(f"[bold green]Total items: {count:,}[/bold green]")
-            
-        elif args.command == "participants":
-            sd = CarpDataStream(args.files, load_participants=True)
-            sd.print_participants()
-            
-        elif args.command == "export":
-            sd = CarpDataStream(args.files, load_participants=False)
-            sd.export_to_json(args.output, data_type=args.data_type)
-            
-        elif args.command == "group":
-            sd = CarpDataStream(args.files, load_participants=False)
-            sd.group_by_field(args.field, args.output)
-            
-    except FileNotFoundError as e:
-        console.print(f"[bold red]Error: {e}[/bold red]")
-        return 1
-    except KeyboardInterrupt:
-        console.print("\n[yellow]Interrupted.[/yellow]")
-        return 130
-    except Exception as e:
-        console.print(f"[bold red]Error: {e}[/bold red]")
-        return 1
-    
-    return 0
-
+from carp.commandline.app import main
 
 if __name__ == "__main__":
     sys.exit(main())
diff --git a/src/carp/commandline/__init__.py b/src/carp/commandline/__init__.py
new file mode 100644
index 0000000..9472d5e
--- /dev/null
+++ b/src/carp/commandline/__init__.py
@@ -0,0 +1 @@
+"""Command-line support for CARP Analytics."""
diff --git a/src/carp/commandline/app.py b/src/carp/commandline/app.py
new file mode 100644
index 0000000..90e5a9f
--- /dev/null
+++ b/src/carp/commandline/app.py
@@ -0,0 +1,56 @@
+"""Argument parsing and dispatch for the CARP CLI."""
+
+from __future__ import annotations
+
+import argparse
+
+from .common import console, print_version
+from .convert import register as register_convert
+from .count import register as register_count
+from .export import register_export, register_group
+from .participants import register as register_participants
+from .schema import register as register_schema
+
+
+def _build_parser() -> argparse.ArgumentParser:
+    """Construct the top-level CLI parser."""
+
+    parser = argparse.ArgumentParser(
+        prog="carp",
+        description="CARP Analytics - Process and analyze data from CARP research studies",
+    )
+    parser.add_argument("--version", action="store_true", help="Show version and exit")
+    subparsers = parser.add_subparsers(dest="command", help="Available commands")
+    for register in (
+        register_schema,
+        register_convert,
+        register_count,
+        register_participants,
+        register_export,
+        register_group,
+    ):
+        register(subparsers)
+    return parser
+
+
+def main(argv: list[str] | None = None) -> int:
+    """Run the CARP command-line interface."""
+
+    parser = _build_parser()
+    args = parser.parse_args(argv)
+    if args.version:
+        return print_version()
+    if not args.command:
+        parser.print_help()
+        return 0
+    try:
+        return int(args.handler(args))
+    except FileNotFoundError as exc:
+        console.print(f"[bold red]Error: {exc}[/bold red]")
+        return 1
+    except KeyboardInterrupt:
+        console.print("\n[yellow]Interrupted.[/yellow]")
+        return 130
+    except Exception as exc:
+        console.print(f"[bold red]Error: {exc}[/bold red]")
+        return 1
diff --git a/src/carp/commandline/common.py b/src/carp/commandline/common.py
new file mode 100644
index 0000000..0f558f4
--- /dev/null
+++ b/src/carp/commandline/common.py
@@ -0,0 +1,48 @@
+"""Shared CLI helpers and presenters."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from rich.console import Console
+from rich.table import Table
+
+from carp import __version__
+from carp.study import CarpStudy
+
+console = Console()
+
+
+def build_study(files: Any, load_participants: bool = True) -> CarpStudy:
+    """Construct a study from CLI arguments."""
+
+    return CarpStudy(files, load_participants=load_participants)
+
+
+def print_version() -> int:
+    """Print the package version and return a success status."""
+
+    console.print(f"carp-analytics-python version {__version__}")
+    return 0
+
+
+def print_schema(schema_map: dict[str, list[str]]) -> None:
+    """Render a schema table."""
+
+    table = Table(title="Inferred Schema")
+    table.add_column("Data Type", style="cyan")
+    table.add_column("Fields", style="magenta")
+    for data_type, fields in schema_map.items():
+        table.add_row(data_type, ", ".join(fields))
+    console.print(table)
+
+
+def print_participants(rows: list[dict[str, str]]) -> None:
+    """Render participant summary rows."""
+
+    table = Table(title="Participants Summary")
+    for column in ("unified_id", "deployments", "folders", "emails", "ssns", "names"):
+        table.add_column(column.replace("_", " ").title())
+    for row in rows:
+        table.add_row(*(row[key] for key in row))
+    console.print(table)
diff --git a/src/carp/commandline/convert.py b/src/carp/commandline/convert.py
new file mode 100644
index 0000000..fede4d2
--- /dev/null
+++ b/src/carp/commandline/convert.py
@@ -0,0 +1,28 @@
+"""CLI command for parquet conversion."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from .common import build_study, console
+
+
+def register(subparsers: Any) -> None:
+    """Register the `convert` subcommand."""
+
+    parser = subparsers.add_parser("convert", help="Convert JSON to Parquet")
+    parser.add_argument("files", nargs="+", help="JSON data files to convert")
+    parser.add_argument("-o", "--output", default="output_parquet", help="Output directory")
+    parser.add_argument("--batch-size", type=int, default=10_000, help="Batch size")
+    parser.set_defaults(handler=run)
+
+
+def run(args: Any) -> int:
+    """Execute the `convert` subcommand."""
+
+    files = build_study(args.files, load_participants=False).frames.convert_to_parquet(
+        args.output,
+        batch_size=args.batch_size,
+    )
+    console.print(f"[bold green]Created {len(files)} parquet files.[/bold green]")
+    return 0
diff --git a/src/carp/commandline/count.py b/src/carp/commandline/count.py
new file mode 100644
index 0000000..37eebfa
--- /dev/null
+++ b/src/carp/commandline/count.py
@@ -0,0 +1,23 @@
+"""CLI command for record counting."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from .common import build_study, console
+
+
+def register(subparsers: Any) -> None:
+    """Register the `count` subcommand."""
+
+    parser = subparsers.add_parser("count", help="Count items in data files")
+    parser.add_argument("files", nargs="+", help="JSON data files to count")
+    parser.set_defaults(handler=run)
+
+
+def run(args: Any) -> int:
+    """Execute the `count` subcommand."""
+
+    count = build_study(args.files, load_participants=False).records.count()
+    console.print(f"[bold green]Total items: {count:,}[/bold green]")
+    return 0
diff --git a/src/carp/commandline/export.py b/src/carp/commandline/export.py
new file mode 100644
index 0000000..b8be9b9
--- /dev/null
+++ b/src/carp/commandline/export.py
@@ -0,0 +1,54 @@
+"""CLI commands for exporting study data."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from .common import build_study, console
+
+
+def register_export(subparsers: Any) -> None:
+    """Register the `export` subcommand."""
+
+    parser = subparsers.add_parser("export", help="Export data to JSON")
+    parser.add_argument("files", nargs="+", help="JSON data files to process")
+    parser.add_argument("-o", "--output", required=True, help="Output JSON file path")
+    parser.add_argument("-t", "--type", dest="data_type", help="Filter by data type")
+    parser.set_defaults(handler=run_export)
+
+
+def register_group(subparsers: Any) -> None:
+    """Register the `group` subcommand."""
+
+    parser = subparsers.add_parser("group", help="Group data by field")
+    parser.add_argument("files", nargs="+", help="JSON data files to process")
+    parser.add_argument(
+        "-f",
+        "--field",
+        default="dataStream.dataType.name",
+        help="Field path to group by",
+    )
+    parser.add_argument("-o", "--output", default="output_grouped", help="Output directory")
+    parser.set_defaults(handler=run_group)
+
+
+def run_export(args: Any) -> int:
+    """Execute the `export` subcommand."""
+
+    output = build_study(args.files, load_participants=False).export.export_json(
+        args.output,
+        args.data_type,
+    )
+    console.print(f"[bold green]Exported data to {output}[/bold green]")
+    return 0
+
+
+def run_group(args: Any) -> int:
+    """Execute the `group` subcommand."""
+
+    files = build_study(args.files, load_participants=False).export.group_by_field(
+        args.field,
+        args.output,
+    )
+    console.print(f"[bold green]Created {len(files)} grouped files.[/bold green]")
+    return 0
diff --git a/src/carp/commandline/participants.py b/src/carp/commandline/participants.py
new file mode 100644
index 0000000..a58ddb5
--- /dev/null
+++ b/src/carp/commandline/participants.py
@@ -0,0 +1,22 @@
+"""CLI command for participant summaries."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from .common import build_study, print_participants
+
+
+def register(subparsers: Any) -> None:
+    """Register the `participants` subcommand."""
+
+    parser = subparsers.add_parser("participants", help="List participants from data files")
+    parser.add_argument("files", nargs="+", help="JSON data files to process")
+    parser.set_defaults(handler=run)
+
+
+def run(args: Any) -> int:
+    """Execute the `participants` subcommand."""
+
+    print_participants(build_study(args.files).participants.summary_rows())
+    return 0
diff --git a/src/carp/commandline/schema.py b/src/carp/commandline/schema.py
new file mode 100644
index 0000000..c0d1386
--- /dev/null
+++ b/src/carp/commandline/schema.py
@@ -0,0 +1,22 @@
+"""CLI command for schema discovery."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from .common import build_study, print_schema
+
+
+def register(subparsers: Any) -> None:
+    """Register the `schema` subcommand."""
+
+    parser = subparsers.add_parser("schema", help="Scan and print data schema")
+    parser.add_argument("files", nargs="+", help="JSON data files to process")
+    parser.set_defaults(handler=run)
+
+
+def run(args: Any) -> int:
+    """Execute the `schema` subcommand."""
+
+    print_schema(build_study(args.files, load_participants=False).schema.scan())
+    return 0
diff --git a/src/carp/constants.py b/src/carp/constants.py
new file mode 100644
index 0000000..f7ad041
--- /dev/null
+++ b/src/carp/constants.py
@@ -0,0 +1,7 @@
+"""Shared constants for CARP Analytics."""
+
+DATA_STREAM_FILE = "data-streams.json"
+PARTICIPANT_FILE = "participant-data.json"
+DEFAULT_LOCATION_TYPE = "dk.cachet.carp.location"
+DEFAULT_STEP_TYPE = "dk.cachet.carp.stepcount"
+UNKNOWN_VALUE = "unknown"
diff --git a/src/carp/core/__init__.py b/src/carp/core/__init__.py
new file mode 100644
index 0000000..d99bee6
--- /dev/null
+++ b/src/carp/core/__init__.py
@@ -0,0 +1,18 @@
+"""Core utilities shared across CARP Analytics services."""
+
+from .fields import collect_field_paths, deployment_id_from_record, full_data_type
+from .files import JsonArrayWriter, iter_json_array, resolve_paths
+from .models import ParticipantInfo
+from .naming import parquet_stem, sanitize_filename
+
+__all__ = [
+    "JsonArrayWriter",
+    "ParticipantInfo",
+    "collect_field_paths",
+    "deployment_id_from_record",
+    "full_data_type",
+    "iter_json_array",
+    "parquet_stem",
+    "resolve_paths",
+    "sanitize_filename",
+]
diff --git a/src/carp/core/dependencies.py b/src/carp/core/dependencies.py
new file mode 100644
index 0000000..767a266
--- /dev/null
+++ b/src/carp/core/dependencies.py
@@ -0,0 +1,33 @@
+"""Optional dependency helpers."""
+
+from __future__ import annotations
+
+import importlib
+import importlib.util
+from typing import Any
+
+
+def module_available(module_name: str) -> bool:
+    """Return whether a module can be imported."""
+
+    return importlib.util.find_spec(module_name) is not None
+
+
+def import_or_raise(module_name: str, extra_name: str) -> Any:
+    """Import a dependency or raise a helpful runtime error.
+
+    Args:
+        module_name: Importable module name.
+        extra_name: Package extra or install hint shown to the user.
+
+    Returns:
+        The imported module.
+
+    Raises:
+        RuntimeError: If the dependency is unavailable.
+    """
+
+    try:
+        return importlib.import_module(module_name)
+    except ImportError as exc:  # pragma: no cover - exercised through callers.
+        raise RuntimeError(f"{module_name} is required for this feature. Install the `{extra_name}` extras.") from exc
diff --git a/src/carp/core/fields.py b/src/carp/core/fields.py
new file mode 100644
index 0000000..2ae681a
--- /dev/null
+++ b/src/carp/core/fields.py
@@ -0,0 +1,56 @@
+"""Helpers for nested CARP record structures."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from carp.constants import UNKNOWN_VALUE
+
+
+def get_nested_value(value: Any, path: str, default: Any = None) -> Any:
+    """Resolve a dot-separated path from nested dictionaries."""
+
+    current = value
+    for part in path.split("."):
+        if not isinstance(current, dict):
+            return default
+        current = current.get(part)
+        if current is None:
+            return default
+    return current
+
+
+def collect_field_paths(value: Any, prefix: str = "") -> set[str]:
+    """Collect dot-separated field paths from nested dictionaries."""
+
+    paths: set[str] = set()
+    if isinstance(value, dict):
+        for key, child in value.items():
+            path = f"{prefix}.{key}" if prefix else key
+            paths.add(path)
+            paths.update(collect_field_paths(child, path))
+    elif isinstance(value, list):
+        if prefix:
+            paths.add(f"{prefix}[]")
+        if value:
+            paths.update(collect_field_paths(value[0], f"{prefix}[]"))
+    return paths
+
+
+def full_data_type(item: dict[str, Any]) -> str:
+    """Return the fully qualified data type for a CARP record."""
+
+    data_type = get_nested_value(item, "dataStream.dataType", {})
+    namespace = data_type.get("namespace", UNKNOWN_VALUE)
+    name = data_type.get("name", UNKNOWN_VALUE)
+    return f"{namespace}.{name}"
+
+
+def deployment_id_from_record(item: dict[str, Any]) -> str | None:
+    """Return the deployment identifier for a CARP record."""
+
+    top_level = item.get("studyDeploymentId")
+    if isinstance(top_level, str):
+        return top_level
+    nested = get_nested_value(item, "dataStream.studyDeploymentId")
+    return nested if isinstance(nested, str) else None
diff --git a/src/carp/core/files.py b/src/carp/core/files.py
new file mode 100644
index 0000000..51ed658
--- /dev/null
+++ b/src/carp/core/files.py
@@ -0,0 +1,53 @@
+"""Filesystem helpers for CARP Analytics."""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Iterator, Sequence
+from pathlib import Path
+from typing import Any
+
+import ijson
+
+
+def resolve_paths(file_paths: str | Path | Sequence[str | Path]) -> tuple[Path, ...]:
+    """Validate and normalize data-stream paths."""
+
+    raw_paths = [file_paths] if isinstance(file_paths, (str, Path)) else list(file_paths)
+    resolved = tuple(Path(path) for path in raw_paths)
+    for path in resolved:
+        if not path.exists():
+            raise FileNotFoundError(f"File not found: {path}")
+    return resolved
+
+
+def iter_json_array(file_path: Path) -> Iterator[dict[str, Any]]:
+    """Stream JSON array items from disk using `ijson`."""
+
+    with file_path.open("rb") as handle:
+        yield from ijson.items(handle, "item", use_float=True)
+
+
+class JsonArrayWriter:
+    """Incrementally write JSON arrays without buffering the full payload."""
+
+    def __init__(self, output_path: Path):
+        self.output_path = output_path
+        self.output_path.parent.mkdir(parents=True, exist_ok=True)
+        self._handle = self.output_path.open("w", encoding="utf-8")
+        self._first_item = True
+        self._handle.write("[")
+
+    def write(self, item: dict[str, Any]) -> None:
+        """Append one JSON object to the array."""
+
+        if not self._first_item:
+            self._handle.write(",")
+        json.dump(item, self._handle)
+        self._first_item = False
+
+    def close(self) -> None:
+        """Finalize and close the output file."""
+
+        self._handle.write("]")
+        self._handle.close()
diff --git a/src/carp/core/models.py b/src/carp/core/models.py
new file mode 100644
index 0000000..e05b467
--- /dev/null
+++ b/src/carp/core/models.py
@@ -0,0 +1,28 @@
+"""Domain models shared by multiple subsystems."""
+
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from typing import Any
+
+
+@dataclass(slots=True)
+class ParticipantInfo:
+    """Normalized participant metadata for one deployment."""
+
+    study_deployment_id: str
+    role_name: str = "Participant"
+    full_name: str | None = None
+    sex: str | None = None
+    ssn: str | None = None
+    user_id: str | None = None
+    email: str | None = None
+    consent_signed: bool = False
+    consent_timestamp: str | None = None
+    source_folder: str | None = None
+    unified_participant_id: str | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        """Return a JSON-serializable representation of the participant."""
+
+        return asdict(self)
diff --git a/src/carp/core/naming.py b/src/carp/core/naming.py
new file mode 100644
index 0000000..f143977
--- /dev/null
+++ b/src/carp/core/naming.py
@@ -0,0 +1,20 @@
+"""File and identifier naming helpers."""
+
+from __future__ import annotations
+
+from carp.constants import UNKNOWN_VALUE
+
+
+def sanitize_filename(value: str, allowed: str = "-_") -> str:
+    """Return a filesystem-safe representation of a string."""
+
+    safe = "".join(char for char in value if char.isalnum() or char in allowed).strip()
+    return safe or UNKNOWN_VALUE
+
+
+def parquet_stem(data_type: str) -> str:
+    """Return a namespace-aware parquet stem for a data type."""
+
+    namespace, _, name = data_type.rpartition(".")
+    stem = f"{namespace}__{name}" if namespace else data_type
+    return sanitize_filename(stem, allowed="-_.")
diff --git a/src/carp/export/__init__.py b/src/carp/export/__init__.py
new file mode 100644
index 0000000..4d036f8
--- /dev/null
+++ b/src/carp/export/__init__.py
@@ -0,0 +1,5 @@
+"""Data export services."""
+
+from .service import ExportService
+
+__all__ = ["ExportService"]
diff --git a/src/carp/export/service.py b/src/carp/export/service.py
new file mode 100644
index 0000000..a04ba98
--- /dev/null
+++ b/src/carp/export/service.py
@@ -0,0 +1,93 @@
+"""JSON export and grouping services."""
+
+from __future__ import annotations
+
+from collections.abc import Callable, Iterable
+from pathlib import Path
+from typing import Any
+
+from carp.constants import UNKNOWN_VALUE
+from carp.core.fields import get_nested_value
+from carp.core.files import JsonArrayWriter
+from carp.core.naming import sanitize_filename
+
+
+class ExportService:
+    """Export CARP records to JSON arrays."""
+
+    def __init__(self, records: Any) -> None:
+        self._records = records
+
+    def export_json(self, output_path: str | Path, data_type: str | None = None) -> Path:
+        """Write matching records to a JSON array file."""
+
+        writer = JsonArrayWriter(Path(output_path))
+        try:
+            for item in self._records.iter_records(data_type):
+                writer.write(item)
+        finally:
+            writer.close()
+        return Path(output_path)
+
+    def group_by_field(self, field_path: str, output_dir: str | Path) -> list[Path]:
+        """Group records by a nested field path."""
+
+        def key_factory(item: dict[str, Any]) -> str:
+            value = get_nested_value(item, field_path, UNKNOWN_VALUE)
+            return sanitize_filename(str(value), allowed="-_.@")
+
+        return self._write_groups(Path(output_dir), self._records.iter_records(), key_factory)
+
+    def group_by_participant(self, output_dir: str | Path, data_type: str | None = None) -> list[Path]:
+        """Group records by unified participant identifier."""
+
+        def key_factory(item: dict[str, Any]) -> str:
+            participant = item.get("_participant", {})
+            return sanitize_filename(
+                str(participant.get("unified_participant_id", UNKNOWN_VALUE)),
+                allowed="-_.@",
+            )
+
+        return self._write_groups(Path(output_dir), self._records.iter_with_participants(data_type), key_factory)
+
+    def group_by_identity(
+        self,
+        field_name: str,
+        output_dir: str | Path,
+        data_type: str | None = None,
+    ) -> list[Path]:
+        """Group records by a participant identity field."""
+
+        def key_factory(item: dict[str, Any]) -> str:
+            participant = item.get("_participant", {})
+            value = participant.get(field_name) or UNKNOWN_VALUE
+            return sanitize_filename(str(value), allowed="-_.@")
+
+        return self._write_groups(
+            Path(output_dir),
+            self._records.iter_with_participants(data_type),
+            key_factory,
+        )
+
+    def _write_groups(
+        self,
+        output_dir: Path,
+        items: Iterable[dict[str, Any]],
+        key_factory: Callable[[dict[str, Any]], str],
+    ) -> list[Path]:
+        """Write grouped JSON files and return created paths."""
+
+        writers: dict[str, JsonArrayWriter] = {}
+        output_dir.mkdir(parents=True, exist_ok=True)
+        try:
+            for item in items:
+                key = key_factory(item)
+                writer = writers.get(key)
+                if writer is None:
+                    writer = JsonArrayWriter(output_dir / f"{key}.json")
+                    writers[key] = writer
+                writer.write(item)
+        finally:
+            for writer in writers.values():
+                writer.close()
+        return sorted(writer.output_path for writer in writers.values())
diff --git a/src/carp/frames/__init__.py b/src/carp/frames/__init__.py
new file mode 100644
index 0000000..5353942
--- /dev/null
+++ b/src/carp/frames/__init__.py
@@ -0,0 +1,5 @@
+"""Dataframe and parquet services."""
+
+from .service import FrameService
+
+__all__ = ["FrameService"]
diff --git a/src/carp/frames/service.py b/src/carp/frames/service.py
new file mode 100644
index 0000000..4b87173
--- /dev/null
+++ b/src/carp/frames/service.py
@@ -0,0 +1,139 @@
+"""Dataframe loading and parquet conversion for CARP studies."""
+
+from __future__ import annotations
+
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+
+from carp.core.dependencies import import_or_raise
+from carp.core.naming import parquet_stem
+
+
+class FrameService:
+    """Load CARP data as dataframes or parquet files."""
+
+    def __init__(self, records: Any, participant_directory: Any) -> None:
+        self._records = records
+        self._participants = participant_directory
+
+    def parquet_path(self, data_type: str, output_dir: str | Path) -> Path:
+        """Return the namespace-aware parquet path for a data type."""
+
+        return Path(output_dir) / f"{parquet_stem(data_type)}.parquet"
+
+    def get_dataframe(self, data_type: str, parquet_dir: str | Path | None = None) -> Any:
+        """Return a dataframe for one data type."""
+
+        pandas = import_or_raise("pandas", "pandas")
+        if parquet_dir:
+            parquet_path = self.parquet_path(data_type, parquet_dir)
+            if parquet_path.exists():
+                return pandas.read_parquet(parquet_path)
+        return pandas.DataFrame(list(self._records.iter_records(data_type)))
+
+    def get_dataframe_with_participants(
+        self,
+        data_type: str,
+        parquet_dir: str | Path | None = None,
+    ) -> Any:
+        """Return a dataframe enriched with participant metadata."""
+
+        pandas = import_or_raise("pandas", "pandas")
+        frame = self.get_dataframe(data_type, parquet_dir)
+        if frame.empty:
+            return frame
+        deployment_ids = self._deployment_series(frame)
+        participant_rows = deployment_ids.apply(self._participant_row)
+        return pandas.concat([frame, participant_rows], axis=1)
+
+    def convert_to_parquet(
+        self,
+        output_dir: str | Path,
+        batch_size: int = 10_000,
+    ) -> list[Path]:
+        """Convert the study to namespace-aware parquet files."""
+
+        pyarrow = import_or_raise("pyarrow", "pandas")
+        parquet = import_or_raise("pyarrow.parquet", "pandas")
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+        buffers: dict[str, list[dict[str, Any]]] = defaultdict(list)
+        writers: dict[str, Any] = {}
+        try:
+            for item in self._records.iter_records():
+                data_type = self._records.data_type(item)
+                buffers[data_type].append(item)
+                if len(buffers[data_type]) >= batch_size:
+                    self._flush_buffer(pyarrow, parquet, output_path, data_type, buffers, writers)
+        finally:
+            for data_type, buffer in buffers.items():
+                if buffer:
+                    self._flush_buffer(pyarrow, parquet, output_path, data_type, buffers, writers)
+            for writer in writers.values():
+                writer.close()
+        return sorted(self.parquet_path(data_type, output_path) for data_type in writers)
+
+    def _participant_row(self, deployment_id: str | None) -> Any:
+        """Return participant columns for one deployment identifier."""
+
+        pandas = import_or_raise("pandas", "pandas")
+        participant = self._participants.get_participant(deployment_id or "")
+        if not participant:
+            return pandas.Series(
+                {
+                    "participant_id": None,
+                    "participant_email": None,
+                    "participant_folder": None,
+                }
+            )
+        return pandas.Series(
+            {
+                "participant_id": participant.unified_participant_id,
+                "participant_email": participant.email,
+                "participant_folder": participant.source_folder,
+            }
+        )
+
+    def _deployment_series(self, frame: Any) -> Any:
+        """Return deployment identifiers from a dataframe."""
+
+        if "studyDeploymentId" in frame.columns:
+            return frame["studyDeploymentId"]
+        return frame["dataStream"].apply(lambda value: value.get("studyDeploymentId") if isinstance(value, dict) else None)
+
+    def _flush_buffer(
+        self,
+        pyarrow: Any,
+        parquet: Any,
+        output_path: Path,
+        data_type: str,
+        buffers: dict[str, list[dict[str, Any]]],
+        writers: dict[str, Any],
+    ) -> None:
+        """Flush one buffered parquet batch to disk."""
+
+        table = pyarrow.Table.from_pylist(buffers[data_type])
+        path = self.parquet_path(data_type, output_path)
+        writer = writers.get(data_type)
+        if writer is None:
+            writers[data_type] = parquet.ParquetWriter(path, table.schema)
+            writer = writers[data_type]
+        elif not table.schema.equals(writer.schema):
+            table = self._align_table(pyarrow, table, writer.schema)
+        writer.write_table(table)
+        buffers[data_type].clear()
+
+    def _align_table(self, pyarrow: Any, table: Any, schema: Any) -> Any:
+        """Align a batch to an existing parquet schema."""
+
+        columns = []
+        for field in schema:
+            if field.name not in table.column_names:
+                columns.append(pyarrow.nulls(len(table), type=field.type))
+                continue
+            column = table[field.name]
+            if not column.type.equals(field.type):
+                column = column.cast(field.type)
+            columns.append(column)
+        return pyarrow.Table.from_arrays(columns, schema=schema)
diff --git a/src/carp/participants/__init__.py b/src/carp/participants/__init__.py
new file mode 100644
index 0000000..7bb3835
--- /dev/null
+++ b/src/carp/participants/__init__.py
@@ -0,0 +1,7 @@
+"""Participant services and models."""
+
+from .directory import ParticipantDirectory
+from .service import ParticipantService
+from .view import ParticipantView
+
+__all__ = ["ParticipantDirectory", "ParticipantService", "ParticipantView"]
diff --git a/src/carp/participants/directory.py b/src/carp/participants/directory.py
new file mode 100644
index 0000000..445efa3
--- /dev/null
+++ b/src/carp/participants/directory.py
@@ -0,0 +1,152 @@
+"""Participant lookup and unification services."""
+
+from __future__ import annotations
+
+from collections import defaultdict
+from pathlib import Path
+
+from carp.constants import PARTICIPANT_FILE
+from carp.core.models import ParticipantInfo
+
+from .parser import load_participant_file
+
+
+def _normalize(value: str | None) -> str | None:
+    """Normalize string identifiers for matching."""
+
+    if not value:
+        return None
+    clean = value.strip().lower()
+    return clean or None
+
+
+class ParticipantDirectory:
+    """Store participant metadata across one or more study phases."""
+
+    def __init__(self, participants_by_deployment: dict[str, ParticipantInfo] | None = None):
+        self.participants_by_deployment = participants_by_deployment or {}
+        self.unified_participants: dict[str, list[ParticipantInfo]] = {}
+        self._counter = 0
+        if self.participants_by_deployment:
+            self._unify()
+
+    @classmethod
+    def from_folders(cls, folders: tuple[Path, ...]) -> ParticipantDirectory:
+        """Build a participant directory from phase folders."""
+
+        participants: dict[str, ParticipantInfo] = {}
+        for folder in folders:
+            file_path = folder / PARTICIPANT_FILE
+            if file_path.exists():
+                participants.update(load_participant_file(file_path))
+        return cls(participants)
+
+    def get_participant(self, deployment_id: str) -> ParticipantInfo | None:
+        """Return one participant by deployment identifier."""
+
+        return self.participants_by_deployment.get(deployment_id)
+
+    def get_unified_participant(self, unified_id: str) -> list[ParticipantInfo]:
+        """Return all deployments for one unified participant."""
+
+        return list(self.unified_participants.get(unified_id, []))
+
+    def find_by_email(self, email: str) -> list[ParticipantInfo]:
+        """Find all participant deployments matching an email address."""
+
+        target = _normalize(email)
+        return [p for p in self.participants_by_deployment.values() if _normalize(p.email) == target]
+
+    def find_by_ssn(self, ssn: str) -> list[ParticipantInfo]:
+        """Find all participant deployments matching an SSN."""
+
+        return [p for p in self.participants_by_deployment.values() if p.ssn == ssn]
+
+    def find_by_name(self, name: str) -> list[ParticipantInfo]:
+        """Find all participant deployments matching a full name."""
+
+        target = _normalize(name)
+        return [p for p in self.participants_by_deployment.values() if _normalize(p.full_name) == target]
+
+    def deployment_ids(self, field_name: str, value: str) -> tuple[str, ...]:
+        """Return deployment identifiers for a participant lookup."""
+
+        matches = getattr(self, f"find_by_{field_name}")(value)
+        return tuple(participant.study_deployment_id for participant in matches)
+
+    def summary_rows(self) -> list[dict[str, str]]:
+        """Return human-readable participant summary rows."""
+
+        rows: list[dict[str, str]] = []
+        for unified_id, participants in self.unified_participants.items():
+            folders = sorted({p.source_folder for p in participants if p.source_folder})
+            emails = sorted({p.email for p in participants if p.email})
+            ssns = sorted({p.ssn for p in participants if p.ssn})
+            names = sorted({p.full_name for p in participants if p.full_name})
+            rows.append(
+                {
+                    "unified_id": unified_id,
+                    "deployments": str(len(participants)),
+                    "folders": ", ".join(folders) or "N/A",
+                    "emails": ", ".join(emails) or "N/A",
+                    "ssns": ", ".join(ssns) or "N/A",
+                    "names": ", ".join(names) or "N/A",
+                }
+            )
+        return rows
+
+    def _register_group(self, participants: list[ParticipantInfo], assigned: set[str]) -> None:
+        """Register one unified participant group."""
+
+        unified_id = f"P{self._counter:04d}"
+        self._counter += 1
+        for participant in participants:
+            participant.unified_participant_id = unified_id
+            assigned.add(participant.study_deployment_id)
+        self.unified_participants[unified_id] = participants
+
+    def _unify(self) -> None:
+        """Assign unified participant identifiers across phases."""
+
+        assigned: set[str] = set()
+        matchers = ("email", "ssn", "name")
+        grouped: dict[str, dict[str, list[ParticipantInfo]]] = {
+            "email": defaultdict(list),
+            "ssn": defaultdict(list),
+            "name": defaultdict(list),
+        }
+        for participant in self.participants_by_deployment.values():
+            if email := _normalize(participant.email):
+                grouped["email"][email].append(participant)
+            if participant.ssn:
+                grouped["ssn"][participant.ssn].append(participant)
+            if name := _normalize(participant.full_name):
+                grouped["name"][name].append(participant)
+        for matcher in matchers:
+            for participants in grouped[matcher].values():
+                pending = [participant for participant in participants if participant.study_deployment_id not in assigned]
+                if pending:
+                    self._register_group(pending, assigned)
+        for participant in self.participants_by_deployment.values():
+            if participant.study_deployment_id not in assigned:
+                self._register_group([participant], assigned)
+        self._propagate()
+
+    def _propagate(self) -> None:
+        """Share the best known metadata across unified deployments."""
+
+        for participants in self.unified_participants.values():
+            fields = {
+                "full_name": next((p.full_name for p in participants if p.full_name), None),
+                "sex": next((p.sex for p in participants if p.sex), None),
+                "ssn": next((p.ssn for p in participants if p.ssn), None),
+                "email": next((p.email for p in participants if p.email), None),
+                "user_id": next((p.user_id for p in participants if p.user_id), None),
+                "consent_timestamp": next((p.consent_timestamp for p in participants if p.consent_timestamp), None),
+            }
+            signed = any(p.consent_signed for p in participants)
+            for participant in participants:
+                participant.consent_signed = signed
+                for field_name, value in fields.items():
+                    if value and not getattr(participant, field_name):
+                        setattr(participant, field_name, value)
diff --git a/src/carp/participants/parser.py b/src/carp/participants/parser.py
new file mode 100644
index 0000000..654d589
--- /dev/null
+++ b/src/carp/participants/parser.py
@@ -0,0 +1,78 @@
+"""Parsing helpers for `participant-data.json` files."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from carp.core.models import ParticipantInfo
+
+
+def _coerce_full_name(value: object) -> str | None:
+    """Normalize CARP full-name payloads."""
+
+    if isinstance(value, str):
+        return value or None
+    if not isinstance(value, dict):
+        return None
+    parts = [value.get(key) for key in ("firstName", "middleName", "lastName")]
+    clean = [part.strip() for part in parts if isinstance(part, str) and part.strip()]
+    return " ".join(clean) or None
+
+
+def _coerce_ssn(value: object) -> str | None:
+    """Normalize CARP SSN payloads."""
+
+    if isinstance(value, str):
+        return value or None
+    if isinstance(value, dict):
+        nested = value.get("socialSecurityNumber")
+        return str(nested) if nested else None
+    return None
+
+
+def _apply_consent(participant: ParticipantInfo, value: object) -> None:
+    """Populate consent-related participant fields."""
+
+    if not isinstance(value, dict):
+        return
+    participant.consent_signed = True
+    participant.consent_timestamp = value.get("signedTimestamp")
+    participant.user_id = value.get("userId")
+    participant.email = value.get("name")
+    if participant.full_name:
+        return
+    consent_payload = value.get("consent")
+    if not isinstance(consent_payload, str):
+        return
+    try:
+        signature = json.loads(consent_payload).get("signature", {})
+    except json.JSONDecodeError:
+        return
+    first_name = (signature.get("firstName") or "").strip()
+    last_name = (signature.get("lastName") or "").strip()
+    participant.full_name = f"{first_name} {last_name}".strip() or None
+
+
+def load_participant_file(file_path: Path) -> dict[str, ParticipantInfo]:
+    """Load participant records from a single phase folder."""
+
+    participants: dict[str, ParticipantInfo] = {}
+    data = json.loads(file_path.read_text(encoding="utf-8"))
+    for entry in data:
+        deployment_id = entry.get("studyDeploymentId")
+        if not deployment_id:
+            continue
+        for role in entry.get("roles", []):
+            info = ParticipantInfo(
+                study_deployment_id=deployment_id,
+                role_name=role.get("roleName", "Participant"),
+                source_folder=file_path.parent.name,
+            )
+            role_data = role.get("data", {})
+            info.full_name = _coerce_full_name(role_data.get("dk.carp.webservices.input.full_name"))
+            info.sex = role_data.get("dk.cachet.carp.input.sex")
+            info.ssn = _coerce_ssn(role_data.get("dk.carp.webservices.input.ssn"))
+            _apply_consent(info, role_data.get("dk.carp.webservices.input.informed_consent"))
+            participants[deployment_id] = info
+    return participants
diff --git a/src/carp/participants/service.py b/src/carp/participants/service.py
new file mode 100644
index 0000000..3a1847a
--- /dev/null
+++ b/src/carp/participants/service.py
@@ -0,0 +1,51 @@
+"""High-level participant service for `CarpStudy`."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from .directory import ParticipantDirectory
+from .view import ParticipantView
+
+
+class ParticipantService:
+    """Expose participant-centric queries and views."""
+
+    def __init__(self, study: Any, directory: ParticipantDirectory) -> None:
+        self._study = study
+        self._directory = directory
+
+    def view(self, email: str) -> ParticipantView:
+        """Return a participant-scoped view by email."""
+
+        return ParticipantView(self._study, email)
+
+    def by_email(self, email: str) -> list[Any]:
+        """Return participant deployments for an email address."""
+
+        return self._directory.find_by_email(email)
+
+    def by_ssn(self, ssn: str) -> list[Any]:
+        """Return participant deployments for an SSN."""
+
+        return self._directory.find_by_ssn(ssn)
+
+    def by_name(self, name: str) -> list[Any]:
+        """Return participant deployments for a full name."""
+
+        return self._directory.find_by_name(name)
+
+    def deployment_ids(self, field_name: str, value: str) -> tuple[str, ...]:
+        """Return deployment identifiers for a participant lookup."""
+
+        return self._directory.deployment_ids(field_name, value)
+
+    def unified(self, unified_id: str) -> list[Any]:
+        """Return deployments for a unified participant identifier."""
+
+        return self._directory.get_unified_participant(unified_id)
+
+    def summary_rows(self) -> list[dict[str, str]]:
+        """Return participant summary rows for presentation layers."""
+
+        return self._directory.summary_rows()
diff --git a/src/carp/participants/view.py b/src/carp/participants/view.py
new file mode 100644
index 0000000..383c28c
--- /dev/null
+++ b/src/carp/participants/view.py
@@ -0,0 +1,105 @@
+"""Participant-centric study accessors."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+from typing import Any
+
+
+class ParticipantView:
+    """Provide participant-scoped access to study data."""
+
+    def __init__(self, study: Any, email: str):
+        self._study = study
+        self._email = email
+
+    @property
+    def participants(self) -> list[Any]:
+        """Return underlying participant models for the view."""
+
+        return list(self._study.participants.by_email(self._email))
+
+    @property
+    def deployment_ids(self) -> tuple[str, ...]:
+        """Return deployment identifiers for the participant."""
+
+        return tuple(self._study.participants.deployment_ids("email", self._email))
+
+    @property
+    def exists(self) -> bool:
+        """Return whether the participant exists in the study."""
+
+        return bool(self.participants)
+
+    def info(self) -> dict[str, Any] | None:
+        """Return merged participant metadata."""
+
+        if not self.participants:
+            return None
+        base = self.participants[0]
+        return {
+            "email": self._email,
+            "unified_id": base.unified_participant_id,
+            "full_name": base.full_name,
+            "ssn": base.ssn,
+            "sex": base.sex,
+            "user_id": base.user_id,
+            "consent_signed": base.consent_signed,
+            "consent_timestamp": base.consent_timestamp,
+            "folders": sorted({p.source_folder for p in self.participants if p.source_folder}),
+            "deployment_ids": sorted(self.deployment_ids),
+            "num_deployments": len(self.deployment_ids),
+        }
+
+    def iter_records(self, data_type: str | None = None) -> Iterator[dict[str, Any]]:
+        """Yield participant records with an optional data-type filter."""
+
+        yield from self._study.records.iter_records(data_type, self.deployment_ids)
+
+    def available_fields(self, sample_size: int = 100) -> list[str]:
+        """Return participant-visible field paths."""
+
+        fields: set[str] = set()
+        for index, item in enumerate(self.iter_records()):
+            if index >= sample_size:
+                break
+            fields.update(self._study.records.collect_fields(item))
+        return sorted(fields)
+
+    def data_types(self) -> list[str]:
+        """Return unique data types for the participant."""
+
+        return sorted({self._study.records.data_type(item) for item in self.iter_records()})
+
+    def count(self, data_type: str | None = None) -> int:
+        """Return the number of participant records."""
+
+        return sum(1 for _ in self.iter_records(data_type))
+
+    def dataframe(self, data_type: str, parquet_dir: str | None = None) -> Any:
+        """Return a dataframe filtered to the participant."""
+
+        frame = self._study.frames.get_dataframe(data_type, parquet_dir)
+        if frame is None or frame.empty:
+            return frame
+        deployment_ids = self._study.plots.candidate_series(
+            frame,
+            ["studyDeploymentId", "dataStream.studyDeploymentId"],
+        )
+        return frame if deployment_ids is None else frame[deployment_ids.isin(self.deployment_ids)]
+
+    def plot_location(
+        self,
+        output_file: str | None = None,
+        parquet_dir: str | None = None,
+        include_steps: bool = True,
+    ) -> str | None:
+        """Render a location plot for the participant."""
+
+        result = self._study.plots.participant(
+            self._email,
+            output_file=output_file,
+            parquet_dir=parquet_dir,
+            include_steps=include_steps,
+        )
+        return None if result is None else str(result)
diff --git a/src/carp/plotting/__init__.py b/src/carp/plotting/__init__.py
index 2a8f3b3..97cd027 100644
--- a/src/carp/plotting/__init__.py
+++ b/src/carp/plotting/__init__.py
@@ -1,10 +1,5 @@
-"""
-Visualization module for CARP Analytics data.
+"""Plotting services for CARP studies."""
 
-This module provides visualization tools for participant location data,
-including heatmaps and geographic visualizations.
-"""
+from .service import PlotService
 
-from .map_viz import LocationVisualizer, ParticipantVisualizer
-
-__all__ = ["LocationVisualizer", "ParticipantVisualizer"]
+__all__ = ["PlotService"]
diff --git a/src/carp/plotting/map_viz.py b/src/carp/plotting/map_viz.py
deleted file mode 100644
index 3c567d5..0000000
--- a/src/carp/plotting/map_viz.py
+++ /dev/null
@@ -1,416 +0,0 @@
-import pandas as pd
-import folium
-from folium.plugins import HeatMap
-from typing import Optional, List, Any, Set, TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from ..reader import CarpDataStream
-
-from rich.console import Console
-
-console = Console()
-
-
-class ParticipantVisualizer:
-    """
-    Fluent API for visualizing participant data.
-    Usage: sd.participant("email").visualize.location()
-    """
-    
-    def __init__(self, sleepiness_data: 'CarpDataStream', deployment_ids: Set[str], email: str):
-        self._sd = sleepiness_data
-        self._deployment_ids = deployment_ids
-        self._email = email
-    
-    def location(
-        self,
-        output_file: Optional[str] = None,
-        location_type: str = "dk.cachet.carp.geolocation",
-        step_type: str = "dk.cachet.carp.stepcount",
-        include_steps: bool = True,
-        parquet_dir: Optional[str] = "output_parquet"
-    ) -> Optional[str]:
-        """
-        Generate a location heatmap for this participant.
-        
-        Args:
-            output_file: Output HTML file path. Defaults to "{email}_location.html"
-            location_type: Data type for location data
-            step_type: Data type for step count data
-            include_steps: Whether to overlay step count markers
-            parquet_dir: Directory with parquet files for faster loading
-            
-        Returns:
-            Path to the generated HTML file, or None if no data found
-        """
-        if output_file is None:
-            # Sanitize email for filename
-            safe_email = self._email.replace("@", "_at_").replace(".", "_")
-            output_file = f"{safe_email}_location.html"
-        
-        console.print(f"[bold blue]Generating location heatmap for {self._email}...[/bold blue]")
-        
-        # Load location data
-        df_loc = self._sd.get_dataframe(location_type, parquet_dir)
-        
-        if df_loc is None or df_loc.empty:
-            console.print(f"[bold red]No location data found for type {location_type}[/bold red]")
-            return None
-        
-        # Filter by deployment IDs
-        user_series_loc = self._get_field(df_loc, ['studyDeploymentId', 'dataStream.studyDeploymentId'])
-        if user_series_loc is not None:
-            df_loc = df_loc[user_series_loc.isin(self._deployment_ids)]
-        
-        if df_loc.empty:
-            console.print(f"[bold red]No location data found for {self._email}[/bold red]")
-            return None
-        
-        # Load step data if requested
-        df_steps = pd.DataFrame()
-        if include_steps:
-            df_steps_raw = self._sd.get_dataframe(step_type, parquet_dir)
-            if df_steps_raw is not None and not df_steps_raw.empty:
-                user_series_steps = self._get_field(df_steps_raw, ['studyDeploymentId', 'dataStream.studyDeploymentId'])
-                if user_series_steps is not None:
-                    df_steps = df_steps_raw[user_series_steps.isin(self._deployment_ids)]
-        
-        # Extract coordinates
-        df_loc['_lat'] = self._get_field(df_loc, ['measurement.data.latitude', 'latitude'])
-        df_loc['_lon'] = self._get_field(df_loc, ['measurement.data.longitude', 'longitude'])
-        df_loc['_time'] = self._get_field(df_loc, ['measurement.sensorStartTime', 'sensorStartTime'])
-        
-        if df_loc['_lat'].isnull().all() or df_loc['_lon'].isnull().all():
-            console.print("[bold red]Could not find latitude/longitude columns in location data[/bold red]")
-            return None
-        
-        # Extract step data
-        if not df_steps.empty:
-            df_steps['_steps'] = self._get_field(df_steps, ['measurement.data.steps', 'steps'])
-            df_steps['_time'] = self._get_field(df_steps, ['measurement.sensorStartTime', 'sensorStartTime'])
-        
-        # Render the map
-        self._render_map(df_loc, df_steps, output_file)
-        return output_file
-    
-    def _get_field(self, df: pd.DataFrame, candidates: List[str]) -> Optional[pd.Series]:
-        """Extract a series from dataframe using candidate field paths."""
-        for path in candidates:
-            if path in df.columns:
-                return df[path]
-            
-            parts = path.split('.')
-            if parts[0] in df.columns:
-                try:
-                    series = df[parts[0]]
-                    for part in parts[1:]:
-                        series = series.apply(lambda x: x.get(part) if isinstance(x, dict) else None)
-                    return series
-                except Exception:
-                    pass
-        return None
-    
-    def _render_map(self, df_loc: pd.DataFrame, df_steps: pd.DataFrame, output_file: str):
-        """Render the heatmap to an HTML file."""
-        df_loc = df_loc.dropna(subset=['_lat', '_lon'])
-        
-        if df_loc.empty:
-            console.print("[bold red]No valid coordinates found after filtering[/bold red]")
-            return
-        
-        center_lat = df_loc['_lat'].mean()
-        center_lon = df_loc['_lon'].mean()
-        
-        m = folium.Map(location=[center_lat, center_lon], zoom_start=12)
-        
-        # Add heatmap layer
-        heat_data = df_loc[['_lat', '_lon']].values.tolist()
-        HeatMap(heat_data).add_to(m)
-        
-        # Add step markers
-        if not df_steps.empty and '_steps' in df_steps.columns and '_time' in df_steps.columns:
-            if '_time' in df_loc.columns:
-                df_loc_sorted = df_loc.sort_values('_time')
-                df_steps_sorted = df_steps.sort_values('_time')
-                
-                df_loc_sorted['_time'] = df_loc_sorted['_time'].astype('int64')
-                df_steps_sorted['_time'] = df_steps_sorted['_time'].astype('int64')
-                
-                merged = pd.merge_asof(
-                    df_steps_sorted,
-                    df_loc_sorted[['_time', '_lat', '_lon']],
-                    on='_time',
-                    direction='nearest',
-                    tolerance=300_000_000  # 5 minutes in microseconds
-                )
-                
-                for _, row in merged.iterrows():
-                    if pd.notnull(row['_lat']) and pd.notnull(row['_lon']) and pd.notnull(row['_steps']):
-                        steps = row['_steps']
-                        if steps > 0:
-                            folium.CircleMarker(
-                                location=[row['_lat'], row['_lon']],
-                                radius=min(max(steps / 10, 3), 20),
-                                popup=f"Steps: {steps}<br>Time: {row['_time']}",
-                                color="blue",
-                                fill=True,
-                                fill_color="blue"
-                            ).add_to(m)
-        
-        m.save(output_file)
-        console.print(f"[bold green]Heatmap saved to {output_file}[/bold green]")
-
-
-class LocationVisualizer:
-    def __init__(self, sd: 'CarpDataStream'):
-        self.sd = sd
-
-    def _get_field(self, df: pd.DataFrame, candidates: List[str]) -> Optional[pd.Series]:
-        """
-        Tries to extract a series from the dataframe using a list of candidate field paths.
-        Supports dot-notation for nested dict columns.
-        """
-        for path in candidates:
-            if path in df.columns:
-                return df[path]
-            
-            # Try nested
-            parts = path.split('.')
-            if parts[0] in df.columns:
-                try:
-                    series = df[parts[0]]
-                    for part in parts[1:]:
-                        # Handle None/NaN
-                        series = series.apply(lambda x: x.get(part) if isinstance(x, dict) else None)
-                    return series
-                except Exception:
-                    pass
-        return None
-
-    def _render_map(self, df_loc: pd.DataFrame, df_steps: pd.DataFrame, output_file: str):
-        """
-        Internal method to render the map from prepared dataframes.
-        Expects df_loc to have _lat, _lon, _time columns.
-        Expects df_steps to have _steps, _time columns.
-        """
-        # Drop NaNs in location
-        df_loc = df_loc.dropna(subset=['_lat', '_lon'])
-        
-        if df_loc.empty:
-            console.print("[bold red]No valid coordinates found after filtering[/bold red]")
-            return
-
-        # Create Map
-        center_lat = df_loc['_lat'].mean()
-        center_lon = df_loc['_lon'].mean()
-        
-        m = folium.Map(location=[center_lat, center_lon], zoom_start=12)
-        
-        # Add Heatmap
-        heat_data = df_loc[['_lat', '_lon']].values.tolist()
-        HeatMap(heat_data).add_to(m)
-        
-        # Add Step Markers
-        if not df_steps.empty:
-            if '_steps' in df_steps.columns and '_time' in df_steps.columns and '_time' in df_loc.columns:
-                # Sort by time
-                df_loc = df_loc.sort_values('_time')
-                df_steps = df_steps.sort_values('_time')
-                
-                # Ensure types match
-                df_loc['_time'] = df_loc['_time'].astype('int64')
-                df_steps['_time'] = df_steps['_time'].astype('int64')
-                
-                merged = pd.merge_asof(
-                    df_steps, 
-                    df_loc[['_time', '_lat', '_lon']], 
-                    on='_time', 
-                    direction='nearest',
-                    tolerance=300_000_000 # 5 minutes in microseconds
-                )
-                
-                for idx, row in merged.iterrows():
-                    if pd.notnull(row['_lat']) and pd.notnull(row['_lon']) and pd.notnull(row['_steps']):
-                        steps = row['_steps']
-                        if steps > 0:
-                            folium.CircleMarker(
-                                location=[row['_lat'], row['_lon']],
-                                radius=min(max(steps / 10, 3), 20),
-                                popup=f"Steps: {steps}<br>Time: {row['_time']}",
-                                color="blue",
-                                fill=True,
-                                fill_color="blue"
-                            ).add_to(m)
-                            
-        # Save
-        m.save(output_file)
-        console.print(f"[bold green]Heatmap saved to {output_file}[/bold green]")
-
-    def plot_heatmap_from_items(
-        self,
-        location_items: List[Any],
-        step_items: Optional[List[Any]] = None,
-        output_file: str = "user_heatmap.html"
-    ):
-        """
-        Generates a heatmap from a list of type-safe objects (e.g. generated SleepinessItem).
-        """
-        console.print(f"[bold blue]Generating heatmap from {len(location_items)} location items...[/bold blue]")
-        
-        # Helper to safely get attributes
-        def get_attr(obj, path):
-            parts = path.split('.')
-            curr = obj
-            for p in parts:
-                if curr is None:
-                    return None
-                curr = getattr(curr, p, None)
-            return curr
-
-        # Extract Location Data
-        loc_data = []
-        for item in location_items:
-            lat = get_attr(item, 'measurement.data.latitude')
-            lon = get_attr(item, 'measurement.data.longitude')
-            time = get_attr(item, 'measurement.sensorStartTime')
-            
-            if lat is not None and lon is not None:
-                loc_data.append({'_lat': lat, '_lon': lon, '_time': time})
-        
-        df_loc = pd.DataFrame(loc_data)
-        
-        if df_loc.empty:
-            console.print("[bold red]No valid coordinates found in location items[/bold red]")
-            return
-
-        # Extract Step Data
-        df_steps = pd.DataFrame()
-        if step_items:
-            step_data = []
-            for item in step_items:
-                steps = get_attr(item, 'measurement.data.steps')
-                time = get_attr(item, 'measurement.sensorStartTime')
-                if steps is not None:
-                    step_data.append({'_steps': steps, '_time': time})
-            df_steps = pd.DataFrame(step_data)
-            
-        self._render_map(df_loc, df_steps, output_file)
-
-    def plot_user_heatmap(
-        self, 
-        study_deployment_id: str, 
-        location_type: str = "dk.cachet.carp.geolocation",
-        step_type: str = "dk.cachet.carp.stepcount",
-        parquet_dir: Optional[str] = "output_parquet",
-        output_file: str = "user_heatmap.html"
-    ):
-        """
-        Generates a heatmap of user locations and overlays step count data.
-        """
-        console.print(f"[bold blue]Generating heatmap for user {study_deployment_id}...[/bold blue]")
-        
-        # 1. Load Data
-        df_loc = self.sd.get_dataframe(location_type, parquet_dir)
-        df_steps = self.sd.get_dataframe(step_type, parquet_dir)
-        
-        if df_loc is None or df_loc.empty:
-            console.print(f"[bold red]No location data found for type {location_type}[/bold red]")
-            return
-            
-        if df_steps is None:
-            console.print(f"[yellow]No step data found for type {step_type}. Plotting location only.[/yellow]")
-            df_steps = pd.DataFrame()
-
-        # 2. Filter by User
-        user_series_loc = self._get_field(df_loc, ['studyDeploymentId', 'dataStream.studyDeploymentId'])
-        if user_series_loc is not None:
-            df_loc = df_loc[user_series_loc == study_deployment_id]
-        
-        if df_loc.empty:
-            console.print(f"[bold red]No location data found for user {study_deployment_id}[/bold red]")
-            return
-
-        if not df_steps.empty:
-            user_series_steps = self._get_field(df_steps, ['studyDeploymentId', 'dataStream.studyDeploymentId'])
-            if user_series_steps is not None:
-                df_steps = df_steps[user_series_steps == study_deployment_id]
-
-        # 3. Extract Coordinates and Time
-        df_loc['_lat'] = self._get_field(df_loc, ['measurement.data.latitude', 'latitude'])
-        df_loc['_lon'] = self._get_field(df_loc, ['measurement.data.longitude', 'longitude'])
-        df_loc['_time'] = self._get_field(df_loc, ['measurement.sensorStartTime', 'sensorStartTime'])
-        
-        if df_loc['_lat'].isnull().all() or df_loc['_lon'].isnull().all():
-            console.print("[bold red]Could not find latitude/longitude columns in location data[/bold red]")
-            return
-            
-        # 6. Add Step Markers
-        if not df_steps.empty:
-            df_steps['_steps'] = self._get_field(df_steps, ['measurement.data.steps', 'steps'])
-            df_steps['_time'] = self._get_field(df_steps, ['measurement.sensorStartTime', 'sensorStartTime'])
-            
-        self._render_map(df_loc, df_steps, output_file)
-
-    def plot_participant_heatmap(
-        self, 
-        unified_participant_id: str, 
-        location_type: str = "dk.cachet.carp.geolocation",
-        step_type: str = "dk.cachet.carp.stepcount",
-        parquet_dir: Optional[str] = "output_parquet",
-        output_file: str = "participant_heatmap.html"
-    ):
-        """
-        Generates a heatmap for a specific unified participant across all their deployments.
-        This aggregates data from all phases/folders for the same participant.
-        """
-        # Get all deployment IDs for this participant
-        participants = self.sd.participant_manager.get_unified_participant(unified_participant_id)
-        if not participants:
-            console.print(f"[bold red]No participant found with ID {unified_participant_id}[/bold red]")
-            return
-        
-        deployment_ids = [p.study_deployment_id for p in participants]
-        console.print(f"[bold blue]Generating heatmap for participant {unified_participant_id} "
-                     f"({len(deployment_ids)} deployments)...[/bold blue]")
-        
-        # 1. Load Data
-        df_loc = self.sd.get_dataframe(location_type, parquet_dir)
-        df_steps = self.sd.get_dataframe(step_type, parquet_dir)
-        
-        if df_loc is None or df_loc.empty:
-            console.print(f"[bold red]No location data found for type {location_type}[/bold red]")
-            return
-            
-        if df_steps is None:
-            console.print(f"[yellow]No step data found for type {step_type}. Plotting location only.[/yellow]")
-            df_steps = pd.DataFrame()
-
-        # 2. Filter by all User deployments
-        user_series_loc = self._get_field(df_loc, ['studyDeploymentId', 'dataStream.studyDeploymentId'])
-        if user_series_loc is not None:
-            df_loc = df_loc[user_series_loc.isin(deployment_ids)]
-        
-        if df_loc.empty:
-            console.print(f"[bold red]No location data found for participant {unified_participant_id}[/bold red]")
-            return
-
-        if not df_steps.empty:
-            user_series_steps = self._get_field(df_steps, ['studyDeploymentId', 'dataStream.studyDeploymentId'])
-            if user_series_steps is not None:
-                df_steps = df_steps[user_series_steps.isin(deployment_ids)]
-
-        # 3. Extract Coordinates and Time
-        df_loc['_lat'] = self._get_field(df_loc, ['measurement.data.latitude', 'latitude'])
-        df_loc['_lon'] = self._get_field(df_loc, ['measurement.data.longitude', 'longitude'])
-        df_loc['_time'] = self._get_field(df_loc, ['measurement.sensorStartTime', 'sensorStartTime'])
-        
-        if df_loc['_lat'].isnull().all() or df_loc['_lon'].isnull().all():
-            console.print("[bold red]Could not find latitude/longitude columns in location data[/bold red]")
-            return
-            
-        # 4. Add Step Markers
-        if not df_steps.empty:
-            df_steps['_steps'] = self._get_field(df_steps, ['measurement.data.steps', 'steps'])
-            df_steps['_time'] = self._get_field(df_steps, ['measurement.sensorStartTime', 'sensorStartTime'])
-            
-        self._render_map(df_loc, df_steps, output_file)
diff --git a/src/carp/plotting/prepare.py b/src/carp/plotting/prepare.py
new file mode 100644
index 0000000..9dc40a4
--- /dev/null
+++ b/src/carp/plotting/prepare.py
@@ -0,0 +1,81 @@
+"""Plot-data preparation helpers."""
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+from typing import Any
+
+from carp.core.dependencies import import_or_raise
+
+
+def _extract_part(value: Any, part: str) -> Any:
+    """Extract one nested key from a dictionary value."""
+
+    return value.get(part) if isinstance(value, dict) else None
+
+
+def candidate_series(frame: Any, candidates: Iterable[str]) -> Any:
+    """Return the first matching dataframe series for the given candidates."""
+
+    for path in candidates:
+        if path in frame.columns:
+            return frame[path]
+        parts = path.split(".")
+        if parts[0] not in frame.columns:
+            continue
+        series = frame[parts[0]]
+        for part in parts[1:]:
+            series = series.apply(_extract_part, args=(part,))
+        return series
+    return None
+
+
+def prepare_location_frame(frame: Any) -> Any:
+    """Add normalized plotting columns to a location dataframe."""
+
+    location = frame.copy()
+    location["_lat"] = candidate_series(location, ["measurement.data.latitude", "latitude"])
+    location["_lon"] = candidate_series(location, ["measurement.data.longitude", "longitude"])
+    location["_time"] = candidate_series(
+        location,
+        ["measurement.sensorStartTime", "sensorStartTime"],
+    )
+    return location
+
+
+def prepare_step_frame(frame: Any) -> Any:
+    """Add normalized plotting columns to a step dataframe."""
+
+    steps = frame.copy()
+    steps["_steps"] = candidate_series(steps, ["measurement.data.steps", "steps"])
+    steps["_time"] = candidate_series(steps, ["measurement.sensorStartTime", "sensorStartTime"])
+    return steps
+
+
+def frames_from_items(location_items: list[Any], step_items: list[Any] | None = None) -> tuple[Any, Any]:
+    """Build plotting dataframes from type-safe objects."""
+
+    pandas = import_or_raise("pandas", "viz")
+
+    def attr_path(value: Any, path: str) -> Any:
+        current = value
+        for part in path.split("."):
+            current = getattr(current, part, None)
+            if current is None:
+                return None
+        return current
+
+    location_rows = []
+    for item in location_items:
+        latitude = attr_path(item, "measurement.data.latitude")
+        longitude = attr_path(item, "measurement.data.longitude")
+        timestamp = attr_path(item, "measurement.sensorStartTime")
+        if latitude is not None and longitude is not None:
+            location_rows.append({"_lat": latitude, "_lon": longitude, "_time": timestamp})
+    step_rows = []
+    for item in step_items or []:
+        steps = attr_path(item, "measurement.data.steps")
+        timestamp = attr_path(item, "measurement.sensorStartTime")
+        if steps is not None:
+            step_rows.append({"_steps": steps, "_time": timestamp})
+    return pandas.DataFrame(location_rows), pandas.DataFrame(step_rows)
diff --git a/src/carp/plotting/render.py b/src/carp/plotting/render.py
new file mode 100644
index 0000000..d1a32c2
--- /dev/null
+++ b/src/carp/plotting/render.py
@@ -0,0 +1,56 @@
+"""HTML map rendering helpers."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from carp.core.dependencies import import_or_raise
+
+
+def render_heatmap(location_frame: Any, step_frame: Any, output_path: str | Path) -> str | None:
+    """Render a heatmap and optional step markers to HTML."""
+
+    pandas = import_or_raise("pandas", "viz")
+    folium = import_or_raise("folium", "viz")
+    heatmap = import_or_raise("folium.plugins", "viz").HeatMap
+    if {"_lat", "_lon"} - set(location_frame.columns):
+        return None
+    location = location_frame.dropna(subset=["_lat", "_lon"])
+    if location.empty:
+        return None
+    map_view = folium.Map(location=[location["_lat"].mean(), location["_lon"].mean()], zoom_start=12)
+    heatmap(location[["_lat", "_lon"]].values.tolist()).add_to(map_view)
+    if not step_frame.empty and {"_steps", "_time"} <= set(step_frame.columns):
+        merged = _merge_steps(pandas, location, step_frame)
+        for _, row in merged.iterrows():
+            if row["_steps"] and pandas.notnull(row["_lat"]) and pandas.notnull(row["_lon"]):
+                folium.CircleMarker(
+                    location=[row["_lat"], row["_lon"]],
+                    radius=min(max(row["_steps"] / 10, 3), 20),
+                    popup=f"Steps: {row['_steps']}<br>Time: {row['_time']}",
+                    color="blue",
+                    fill=True,
+                    fill_color="blue",
+                ).add_to(map_view)
+    path = Path(output_path)
+    map_view.save(path)
+    return str(path)
+
+
+def _merge_steps(pandas: Any, location: Any, step_frame: Any) -> Any:
+    """Merge step markers onto the nearest location timestamps."""
+
+    if "_time" not in location.columns or "_time" not in step_frame.columns:
+        return step_frame.iloc[0:0]
+    sorted_location = location.sort_values("_time").copy()
+    sorted_steps = step_frame.sort_values("_time").copy()
+    sorted_location["_time"] = sorted_location["_time"].astype("int64")
+    sorted_steps["_time"] = sorted_steps["_time"].astype("int64")
+    return pandas.merge_asof(
+        sorted_steps,
+        sorted_location[["_time", "_lat", "_lon"]],
+        on="_time",
+        direction="nearest",
+        tolerance=300_000_000,
+    )
diff --git a/src/carp/plotting/service.py b/src/carp/plotting/service.py
new file mode 100644
index 0000000..f4db132
--- /dev/null
+++ b/src/carp/plotting/service.py
@@ -0,0 +1,130 @@
+"""High-level plotting service for study and participant data."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from carp.constants import DEFAULT_LOCATION_TYPE, DEFAULT_STEP_TYPE
+from carp.core.naming import sanitize_filename
+
+from .prepare import candidate_series, frames_from_items, prepare_location_frame, prepare_step_frame
+from .render import render_heatmap
+
+
+class PlotService:
+    """Render HTML maps from study data or typed objects."""
+
+    def __init__(self, frames: Any, participants: Any) -> None:
+        self._frames = frames
+        self._participants = participants
+        self.candidate_series = candidate_series
+
+    def participant(
+        self,
+        email: str,
+        output_file: str | None = None,
+        location_type: str = DEFAULT_LOCATION_TYPE,
+        step_type: str = DEFAULT_STEP_TYPE,
+        parquet_dir: str | None = None,
+        include_steps: bool = True,
+    ) -> str | None:
+        """Render a participant heatmap from an email address."""
+
+        view = self._participants.view(email)
+        if not view.exists:
+            return None
+        default_name = sanitize_filename(email.replace("@", "_at_"), allowed="-_.")
+        return self._plot_for_deployments(
+            view.deployment_ids,
+            output_file or f"{default_name}_location.html",
+            location_type,
+            step_type,
+            parquet_dir,
+            include_steps,
+        )
+
+    def deployment(
+        self,
+        deployment_id: str,
+        output_file: str = "deployment_heatmap.html",
+        location_type: str = DEFAULT_LOCATION_TYPE,
+        step_type: str = DEFAULT_STEP_TYPE,
+        parquet_dir: str | None = None,
+        include_steps: bool = True,
+    ) -> str | None:
+        """Render a heatmap for a single deployment."""
+
+        return self._plot_for_deployments(
+            (deployment_id,),
+            output_file,
+            location_type,
+            step_type,
+            parquet_dir,
+            include_steps,
+        )
+
+    def unified(
+        self,
+        unified_id: str,
+        output_file: str = "participant_heatmap.html",
+        location_type: str = DEFAULT_LOCATION_TYPE,
+        step_type: str = DEFAULT_STEP_TYPE,
+        parquet_dir: str | None = None,
+        include_steps: bool = True,
+    ) -> str | None:
+        """Render a heatmap for a unified participant."""
+
+        deployment_ids = tuple(participant.study_deployment_id for participant in self._participants.unified(unified_id))
+        if not deployment_ids:
+            return None
+        return self._plot_for_deployments(
+            deployment_ids,
+            output_file,
+            location_type,
+            step_type,
+            parquet_dir,
+            include_steps,
+        )
+
+    def from_items(
+        self,
+        location_items: list[Any],
+        step_items: list[Any] | None = None,
+        output_file: str = "user_heatmap.html",
+    ) -> str | None:
+        """Render a heatmap from type-safe Python objects."""
+
+        location_frame, step_frame = frames_from_items(location_items, step_items)
+        return render_heatmap(location_frame, step_frame, output_file)
+
+    def _plot_for_deployments(
+        self,
+        deployment_ids: tuple[str, ...],
+        output_file: str,
+        location_type: str,
+        step_type: str,
+        parquet_dir: str | None,
+        include_steps: bool,
+    ) -> str | None:
+        """Render a heatmap for a set of deployments."""
+
+        location_frame = self._frames.get_dataframe(location_type, parquet_dir)
+        if location_frame.empty:
+            return None
+        location_ids = candidate_series(location_frame, ["studyDeploymentId", "dataStream.studyDeploymentId"])
+        if location_ids is None:
+            return None
+        filtered_location = prepare_location_frame(location_frame[location_ids.isin(deployment_ids)])
+        if filtered_location.empty:
+            return None
+        if not include_steps:
+            return render_heatmap(filtered_location, filtered_location.iloc[0:0], output_file)
+        step_frame = self._frames.get_dataframe(step_type, parquet_dir)
+        if step_frame.empty:
+            return render_heatmap(filtered_location, step_frame, output_file)
+        step_ids = candidate_series(step_frame, ["studyDeploymentId", "dataStream.studyDeploymentId"])
+        if step_ids is None:
+            return render_heatmap(filtered_location, step_frame.iloc[0:0], Path(output_file))
+        filtered_steps = prepare_step_frame(step_frame[step_ids.isin(deployment_ids)])
+        return render_heatmap(filtered_location, filtered_steps, Path(output_file))
diff --git a/src/carp/reader.py b/src/carp/reader.py
deleted file mode 100644
index 7e61d7f..0000000
--- a/src/carp/reader.py
+++ /dev/null
@@ -1,1417 +0,0 @@
-import ijson
-from pathlib import Path
-from typing import Generator, Any, Dict, Optional, List, Set
-from collections import defaultdict
-from rich.console import Console
-from rich.table import Table
-from tqdm import tqdm
-import json
-from dataclasses import dataclass, field
-
-console = Console()
-
-
-@dataclass
-class ParticipantInfo:
-    """Represents participant information from participant-data.json"""
-
-    study_deployment_id: str
-    role_name: str = "Participant"
-    full_name: Optional[str] = None
-    sex: Optional[str] = None
-    ssn: Optional[str] = None
-    user_id: Optional[str] = None
-    email: Optional[str] = None
-    consent_signed: bool = False
-    consent_timestamp: Optional[str] = None
-    source_folder: Optional[str] = None
-    # Unified participant ID assigned when same participant is detected across folders
-    unified_participant_id: Optional[str] = None
-
-    def to_dict(self) -> Dict[str, Any]:
-        return {
-            "study_deployment_id": self.study_deployment_id,
-            "role_name": self.role_name,
-            "full_name": self.full_name,
-            "sex": self.sex,
-            "ssn": self.ssn,
-            "user_id": self.user_id,
-            "email": self.email,
-            "consent_signed": self.consent_signed,
-            "consent_timestamp": self.consent_timestamp,
-            "source_folder": self.source_folder,
-            "unified_participant_id": self.unified_participant_id,
-        }
-
-
-class ParticipantManager:
-    """
-    Manages participant data across multiple data folders.
-    Links participants across folders using SSN or user ID as identifiers.
-    """
-
-    def __init__(self):
-        # studyDeploymentId -> ParticipantInfo
-        self.participants_by_deployment: Dict[str, ParticipantInfo] = {}
-        # unified_participant_id -> list of ParticipantInfo (same person across folders)
-        self.unified_participants: Dict[str, List[ParticipantInfo]] = {}
-        # For generating unified IDs
-        self._unified_id_counter = 0
-
-    def load_participant_data(self, data_folders: List[Path]):
-        """
-        Loads participant data from participant-data.json files in each data folder.
-        """
-        console.print(
-            f"[bold blue]Loading participant data from {len(data_folders)} folders...[/bold blue]"
-        )
-
-        for folder in data_folders:
-            participant_file = folder / "participant-data.json"
-            if participant_file.exists():
-                self._load_single_file(participant_file, folder.name)
-            else:
-                console.print(f"[yellow]No participant-data.json found in {folder}[/yellow]")
-
-        # After loading all, unify participants
-        self._unify_participants()
-
-        console.print(
-            f"[bold green]Loaded {len(self.participants_by_deployment)} participant deployments, "
-            f"{len(self.unified_participants)} unique participants[/bold green]"
-        )
-
-    def _load_single_file(self, file_path: Path, folder_name: str):
-        """Load participant data from a single file."""
-        try:
-            with open(file_path, "r") as f:
-                data = json.load(f)
-        except (json.JSONDecodeError, IOError) as e:
-            console.print(f"[red]Error reading {file_path}: {e}[/red]")
-            return
-
-        for entry in data:
-            deployment_id = entry.get("studyDeploymentId")
-            if not deployment_id:
-                continue
-
-            roles = entry.get("roles", [])
-            for role in roles:
-                role_name = role.get("roleName", "Unknown")
-                role_data = role.get("data", {})
-
-                # Extract participant info from various fields
-                participant = ParticipantInfo(
-                    study_deployment_id=deployment_id,
-                    role_name=role_name,
-                    source_folder=folder_name,
-                )
-
-                # Extract full name (can be a dict with firstName/lastName or a string)
-                full_name_data = role_data.get("dk.carp.webservices.input.full_name")
-                if full_name_data:
-                    if isinstance(full_name_data, dict):
-                        # Combine firstName, middleName, lastName
-                        parts = []
-                        if full_name_data.get("firstName"):
-                            parts.append(full_name_data["firstName"])
-                        if full_name_data.get("middleName"):
-                            parts.append(full_name_data["middleName"])
-                        if full_name_data.get("lastName"):
-                            parts.append(full_name_data["lastName"])
-                        if parts:
-                            participant.full_name = " ".join(parts)
-                    elif isinstance(full_name_data, str):
-                        participant.full_name = full_name_data
-
-                # Extract sex
-                sex_data = role_data.get("dk.cachet.carp.input.sex")
-                if sex_data:
-                    participant.sex = sex_data
-
-                # Extract SSN (can be a dict with socialSecurityNumber or a string)
-                ssn_data = role_data.get("dk.carp.webservices.input.ssn")
-                if ssn_data:
-                    if isinstance(ssn_data, dict):
-                        ssn_value = ssn_data.get("socialSecurityNumber")
-                        if ssn_value:
-                            participant.ssn = str(ssn_value)
-                    elif isinstance(ssn_data, str):
-                        participant.ssn = ssn_data
-
-                # Extract consent info
-                consent_data = role_data.get("dk.carp.webservices.input.informed_consent")
-                if consent_data:
-                    participant.consent_signed = True
-                    if isinstance(consent_data, dict):
-                        participant.consent_timestamp = consent_data.get("signedTimestamp")
-                        participant.user_id = consent_data.get("userId")
-                        participant.email = consent_data.get(
-                            "name"
-                        )  # email is stored in "name" field
-
-                        # Extract name from consent signature if not already set
-                        if not participant.full_name:
-                            consent_json_str = consent_data.get("consent")
-                            if consent_json_str and isinstance(consent_json_str, str):
-                                try:
-                                    consent_doc = json.loads(consent_json_str)
-                                    signature = consent_doc.get("signature", {})
-                                    if isinstance(signature, dict):
-                                        first_name = (signature.get("firstName") or "").strip()
-                                        last_name = (signature.get("lastName") or "").strip()
-                                        if first_name or last_name:
-                                            participant.full_name = (
-                                                f"{first_name} {last_name}".strip()
-                                            )
-                                except json.JSONDecodeError:
-                                    pass
-
-                self.participants_by_deployment[deployment_id] = participant
-
-    def _unify_participants(self):
-        """
-        Identify the same participant across different folders/deployments.
-        Uses email as primary identifier (most accurate), falls back to SSN, then full name.
-        """
-        # Group by identifier
-        by_email: Dict[str, List[ParticipantInfo]] = defaultdict(list)
-        by_ssn: Dict[str, List[ParticipantInfo]] = defaultdict(list)
-        by_name: Dict[str, List[ParticipantInfo]] = defaultdict(list)
-
-        for p in self.participants_by_deployment.values():
-            # Email, SSN, name must be strings for use as dict keys
-            if p.email and isinstance(p.email, str):
-                by_email[p.email.lower()].append(p)  # normalize email to lowercase
-            if p.ssn and isinstance(p.ssn, str):
-                by_ssn[p.ssn].append(p)
-            if p.full_name and isinstance(p.full_name, str):
-                by_name[p.full_name.strip().lower()].append(p)  # normalize name
-
-        # Assign unified IDs, preferring email grouping (most accurate)
-        assigned: Set[str] = set()  # deployment IDs already assigned
-
-        # First pass: use email (most accurate identifier)
-        for email, participants in by_email.items():
-            unified_id = f"P{self._unified_id_counter:04d}"
-            self._unified_id_counter += 1
-
-            for p in participants:
-                if p.study_deployment_id not in assigned:
-                    p.unified_participant_id = unified_id
-                    assigned.add(p.study_deployment_id)
-
-            self.unified_participants[unified_id] = participants
-
-        # Second pass: use SSN for remaining
-        for ssn, participants in by_ssn.items():
-            unassigned = [p for p in participants if p.study_deployment_id not in assigned]
-            if not unassigned:
-                continue
-
-            unified_id = f"P{self._unified_id_counter:04d}"
-            self._unified_id_counter += 1
-
-            for p in unassigned:
-                p.unified_participant_id = unified_id
-                assigned.add(p.study_deployment_id)
-
-            self.unified_participants[unified_id] = unassigned
-
-        # Third pass: use full name for remaining
-        for name, participants in by_name.items():
-            unassigned = [p for p in participants if p.study_deployment_id not in assigned]
-            if not unassigned:
-                continue
-
-            unified_id = f"P{self._unified_id_counter:04d}"
-            self._unified_id_counter += 1
-
-            for p in unassigned:
-                p.unified_participant_id = unified_id
-                assigned.add(p.study_deployment_id)
-
-            self.unified_participants[unified_id] = unassigned
-
-        # Remaining participants get their own unified ID
-        for p in self.participants_by_deployment.values():
-            if p.study_deployment_id not in assigned:
-                unified_id = f"P{self._unified_id_counter:04d}"
-                self._unified_id_counter += 1
-                p.unified_participant_id = unified_id
-                self.unified_participants[unified_id] = [p]
-
-        # Propagate name/SSN data across unified participants
-        # If any deployment has name/SSN, share it with all deployments of same participant
-        self._propagate_participant_data()
-
-    def _propagate_participant_data(self):
-        """
-        Propagate name, SSN, and other data to all records of the same unified participant.
-        If one deployment has data that others don't, copy it to all.
-        """
-        for unified_id, participants in self.unified_participants.items():
-            # Collect best available data from all records
-            best_full_name = None
-            best_ssn = None
-            best_sex = None
-
-            for p in participants:
-                if p.full_name and isinstance(p.full_name, str) and not best_full_name:
-                    best_full_name = p.full_name
-                if p.ssn and isinstance(p.ssn, str) and not best_ssn:
-                    best_ssn = p.ssn
-                if p.sex and not best_sex:
-                    best_sex = p.sex
-
-            # Apply to all records
-            for p in participants:
-                if best_full_name and not p.full_name:
-                    p.full_name = best_full_name
-                if best_ssn and not p.ssn:
-                    p.ssn = best_ssn
-                if best_sex and not p.sex:
-                    p.sex = best_sex
-
-    def get_participant(self, study_deployment_id: str) -> Optional[ParticipantInfo]:
-        """Get participant info by study deployment ID."""
-        return self.participants_by_deployment.get(study_deployment_id)
-
-    def get_unified_participant(self, unified_id: str) -> List[ParticipantInfo]:
-        """Get all deployments for a unified participant."""
-        return self.unified_participants.get(unified_id, [])
-
-    def find_by_email(self, email: str) -> List[ParticipantInfo]:
-        """Find all participant records matching an email address."""
-        email_lower = email.lower()
-        return [
-            p
-            for p in self.participants_by_deployment.values()
-            if p.email and p.email.lower() == email_lower
-        ]
-
-    def find_by_ssn(self, ssn: str) -> List[ParticipantInfo]:
-        """Find all participant records matching an SSN."""
-        return [p for p in self.participants_by_deployment.values() if p.ssn and p.ssn == ssn]
-
-    def find_by_name(self, name: str) -> List[ParticipantInfo]:
-        """Find all participant records matching a full name (case-insensitive)."""
-        name_lower = name.strip().lower()
-        return [
-            p
-            for p in self.participants_by_deployment.values()
-            if p.full_name and p.full_name.strip().lower() == name_lower
-        ]
-
-    def get_deployment_ids_by_email(self, email: str) -> List[str]:
-        """Get all deployment IDs for a participant by email."""
-        return [p.study_deployment_id for p in self.find_by_email(email)]
-
-    def get_deployment_ids_by_ssn(self, ssn: str) -> List[str]:
-        """Get all deployment IDs for a participant by SSN."""
-        return [p.study_deployment_id for p in self.find_by_ssn(ssn)]
-
-    def get_deployment_ids_by_name(self, name: str) -> List[str]:
-        """Get all deployment IDs for a participant by name."""
-        return [p.study_deployment_id for p in self.find_by_name(name)]
-
-    def print_summary(self):
-        """Print a summary table of participants."""
-        table = Table(title="Participants Summary")
-        table.add_column("Unified ID", style="cyan")
-        table.add_column("Deployments", style="magenta")
-        table.add_column("Folders", style="green")
-        table.add_column("Email", style="yellow")
-        table.add_column("SSN", style="red")
-        table.add_column("Full Name", style="white")
-
-        for unified_id, participants in self.unified_participants.items():
-            folders = set(p.source_folder for p in participants if p.source_folder)
-            emails = set(p.email for p in participants if p.email and isinstance(p.email, str))
-            ssns = set(p.ssn for p in participants if p.ssn and isinstance(p.ssn, str))
-            names = set(
-                p.full_name for p in participants if p.full_name and isinstance(p.full_name, str)
-            )
-            table.add_row(
-                unified_id,
-                str(len(participants)),
-                ", ".join(sorted(folders)),
-                ", ".join(emails) if emails else "N/A",
-                ", ".join(ssns) if ssns else "N/A",
-                ", ".join(names) if names else "N/A",
-            )
-
-        console.print(table)
-
-
-class ParticipantAccessor:
-    """
-    Fluent API for accessing participant data.
-    Usage: sd.participant("email@example.com").info(), .all_data(), .available_fields()
-           sd.participant("email@example.com").visualize.location()
-    """
-
-    def __init__(self, sleepiness_data: "CarpDataStream", email: str):
-        self._sd = sleepiness_data
-        self._email = email
-        self._participants = sleepiness_data.participant_manager.find_by_email(email)
-        self._deployment_ids = set(
-            sleepiness_data.participant_manager.get_deployment_ids_by_email(email)
-        )
-        self._visualizer = None
-
-    @property
-    def exists(self) -> bool:
-        """Check if participant exists."""
-        return len(self._participants) > 0
-
-    @property
-    def visualize(self):
-        """
-        Access visualization methods for this participant.
-        Usage: sd.participant("email").visualize.location()
-        """
-        if self._visualizer is None:
-            from .plotting import ParticipantVisualizer
-
-            self._visualizer = ParticipantVisualizer(self._sd, self._deployment_ids, self._email)
-        return self._visualizer
-
-    def info(self) -> Optional[Dict[str, Any]]:
-        """
-        Get participant information as a dictionary.
-        Returns combined info from all deployments for this participant.
-        """
-        if not self._participants:
-            return None
-
-        # Get first participant as base
-        base = self._participants[0]
-
-        # Combine info from all records
-        all_folders = set()
-        all_deployment_ids = set()
-
-        for p in self._participants:
-            if p.source_folder:
-                all_folders.add(p.source_folder)
-            all_deployment_ids.add(p.study_deployment_id)
-
-        return {
-            "email": self._email,
-            "unified_id": base.unified_participant_id,
-            "full_name": base.full_name,
-            "ssn": base.ssn,
-            "sex": base.sex,
-            "user_id": base.user_id,
-            "consent_signed": base.consent_signed,
-            "consent_timestamp": base.consent_timestamp,
-            "folders": sorted(all_folders),
-            "deployment_ids": sorted(all_deployment_ids),
-            "num_deployments": len(all_deployment_ids),
-        }
-
-    def print_info(self):
-        """Print participant information in a formatted table."""
-        info = self.info()
-        if not info:
-            console.print(f"[red]No participant found with email: {self._email}[/red]")
-            return
-
-        table = Table(title=f"Participant: {self._email}")
-        table.add_column("Field", style="cyan")
-        table.add_column("Value", style="white")
-
-        for key, value in info.items():
-            if isinstance(value, list):
-                value = ", ".join(str(v) for v in value)
-            table.add_row(key, str(value) if value is not None else "N/A")
-
-        console.print(table)
-
-    def all_data(self, data_type: Optional[str] = None) -> Generator[Dict[str, Any], None, None]:
-        """
-        Get all data items for this participant.
-        Optionally filter by data type (e.g., "dk.cachet.carp.stepcount").
-        """
-        yield from self._sd._get_data_by_deployment_ids(self._deployment_ids, data_type)
-
-    def available_fields(self, sample_size: int = 100) -> Set[str]:
-        """
-        Discover all available fields in this participant's data.
-        Scans a sample of records and returns field paths in dot-notation.
-        """
-        fields = set()
-        count = 0
-
-        for item in self.all_data():
-            if count >= sample_size:
-                break
-            self._collect_fields(item, "", fields)
-            count += 1
-
-        return fields
-
-    def _collect_fields(self, obj: Any, prefix: str, fields: Set[str]):
-        """Recursively collect field paths."""
-        if isinstance(obj, dict):
-            for key, value in obj.items():
-                path = f"{prefix}.{key}" if prefix else key
-                fields.add(path)
-                self._collect_fields(value, path, fields)
-        elif isinstance(obj, list) and obj:
-            # Sample first item in list
-            self._collect_fields(obj[0], f"{prefix}[]", fields)
-
-    def print_available_fields(self, sample_size: int = 100):
-        """Print all available fields in a formatted list."""
-        fields = self.available_fields(sample_size)
-        console.print(f"[bold]Available fields for {self._email}:[/bold]")
-        for f in sorted(fields):
-            console.print(f"  - {f}")
-
-    def data_types(self) -> Set[str]:
-        """Get all unique data types for this participant."""
-        types = set()
-        for item in self.all_data():
-            data_stream = item.get("dataStream", {})
-            data_type = data_stream.get("dataType", {})
-            type_name = data_type.get("name")
-            if type_name:
-                types.add(type_name)
-        return types
-
-    def print_data_types(self):
-        """Print all data types available for this participant."""
-        types = self.data_types()
-        console.print(f"[bold]Data types for {self._email}:[/bold]")
-        for t in sorted(types):
-            console.print(f"  - {t}")
-
-    def count(self, data_type: Optional[str] = None) -> int:
-        """Count total data items for this participant."""
-        return sum(1 for _ in self.all_data(data_type))
-
-    def dataframe(self, data_type: str, parquet_dir: Optional[str] = None):
-        """
-        Get a pandas DataFrame of this participant's data for a specific type.
-        Uses parquet files if available and parquet_dir is specified.
-        """
-        try:
-            import pandas as pd
-        except ImportError:
-            console.print(
-                "[red]pandas is required for dataframe(). Install with: pip install pandas[/red]"
-            )
-            return None
-
-        if parquet_dir:
-            # Try to load from parquet and filter
-            df = self._sd.get_dataframe(data_type, parquet_dir)
-            if df is not None and not df.empty:
-                return df[df["studyDeploymentId"].isin(self._deployment_ids)]
-
-        # Fall back to streaming
-        items = list(self.all_data(data_type))
-        if not items:
-            return pd.DataFrame()
-        return pd.DataFrame(items)
-
-
-class CarpDataStream:
-    def __init__(self, file_paths: str | Path | List[str | Path], load_participants: bool = True):
-        if isinstance(file_paths, (str, Path)):
-            file_paths = [file_paths]
-
-        self.file_paths = [Path(p) for p in file_paths]
-        for p in self.file_paths:
-            if not p.exists():
-                raise FileNotFoundError(f"File not found: {p}")
-
-        self.schema_cache = {}
-        self.participant_manager = ParticipantManager()
-
-        # Auto-detect and load participant data from parent folders
-        if load_participants:
-            self._auto_load_participants()
-
-    def _auto_load_participants(self):
-        """
-        Automatically detect and load participant data from the data folders
-        containing the input files.
-        """
-        data_folders = set()
-        for file_path in self.file_paths:
-            # Each file is typically in a phase folder like data/phase-1-1/data-streams.json
-            parent = file_path.parent
-            if (parent / "participant-data.json").exists():
-                data_folders.add(parent)
-
-        if data_folders:
-            self.participant_manager.load_participant_data(list(data_folders))
-
-    def load_participants_from_folders(self, folders: List[str | Path]):
-        """
-        Manually load participant data from specific folders.
-        Useful when files are in a different location than the input data.
-        """
-        folder_paths = [Path(f) for f in folders]
-        self.participant_manager.load_participant_data(folder_paths)
-
-    def participant(self, email: str) -> ParticipantAccessor:
-        """
-        Access participant data via email using a fluent API.
-
-        Usage:
-            sd.participant("email@example.com").info()
-            sd.participant("email@example.com").all_data()
-            sd.participant("email@example.com").available_fields()
-            sd.participant("email@example.com").data_types()
-            sd.participant("email@example.com").dataframe("dk.cachet.carp.stepcount")
-        """
-        return ParticipantAccessor(self, email)
-
-    def get_participant(self, study_deployment_id: str) -> Optional[ParticipantInfo]:
-        """Get participant info by study deployment ID."""
-        return self.participant_manager.get_participant(study_deployment_id)
-
-    def find_participant_by_email(self, email: str) -> List[ParticipantInfo]:
-        """Find all participant records matching an email address."""
-        return self.participant_manager.find_by_email(email)
-
-    def find_participant_by_ssn(self, ssn: str) -> List[ParticipantInfo]:
-        """Find all participant records matching an SSN."""
-        return self.participant_manager.find_by_ssn(ssn)
-
-    def find_participant_by_name(self, name: str) -> List[ParticipantInfo]:
-        """Find all participant records matching a full name."""
-        return self.participant_manager.find_by_name(name)
-
-    def get_data_by_email(
-        self, email: str, data_type: Optional[str] = None
-    ) -> Generator[Dict[str, Any], None, None]:
-        """
-        Get all data items for a participant identified by email.
-        Optionally filter by data type.
-        """
-        deployment_ids = set(self.participant_manager.get_deployment_ids_by_email(email))
-        yield from self._get_data_by_deployment_ids(deployment_ids, data_type)
-
-    def get_data_by_ssn(
-        self, ssn: str, data_type: Optional[str] = None
-    ) -> Generator[Dict[str, Any], None, None]:
-        """
-        Get all data items for a participant identified by SSN.
-        Optionally filter by data type.
-        """
-        deployment_ids = set(self.participant_manager.get_deployment_ids_by_ssn(ssn))
-        yield from self._get_data_by_deployment_ids(deployment_ids, data_type)
-
-    def get_data_by_name(
-        self, name: str, data_type: Optional[str] = None
-    ) -> Generator[Dict[str, Any], None, None]:
-        """
-        Get all data items for a participant identified by full name.
-        Optionally filter by data type.
-        """
-        deployment_ids = set(self.participant_manager.get_deployment_ids_by_name(name))
-        yield from self._get_data_by_deployment_ids(deployment_ids, data_type)
-
-    def _get_data_by_deployment_ids(
-        self, deployment_ids: set, data_type: Optional[str] = None
-    ) -> Generator[Dict[str, Any], None, None]:
-        """Internal helper to filter data by deployment IDs and optionally by type."""
-        if not deployment_ids:
-            return
-
-        for item in self._get_item_generator():
-            item_deployment_id = item.get("studyDeploymentId")
-            if not item_deployment_id:
-                item_deployment_id = item.get("dataStream", {}).get("studyDeploymentId")
-
-            if item_deployment_id not in deployment_ids:
-                continue
-
-            if data_type:
-                dt = item.get("dataStream", {}).get("dataType", {})
-                target_namespace, target_name = data_type.rsplit(".", 1)
-                if dt.get("name") != target_name or dt.get("namespace") != target_namespace:
-                    continue
-
-            yield item
-
-    def print_participants(self):
-        """Print a summary of all participants."""
-        self.participant_manager.print_summary()
-
-    def _get_item_generator(self) -> Generator[Dict[str, Any], None, None]:
-        """
-        Returns a generator that yields items from the JSON files.
-        Uses ijson for memory-efficient streaming.
-        """
-        for file_path in self.file_paths:
-            with open(file_path, "rb") as f:
-                # Assuming the file is a list of objects.
-                # 'item' matches objects in a list.
-                # use_float=True ensures numbers are floats, avoiding Decimal schema mismatches in PyArrow
-                yield from ijson.items(f, "item", use_float=True)
-
-    def _get_item_generator_with_participant(
-        self, include_participant: bool = False
-    ) -> Generator[Dict[str, Any], None, None]:
-        """
-        Returns a generator that yields items from the JSON files,
-        optionally enriched with participant info.
-        """
-        for item in self._get_item_generator():
-            if include_participant:
-                deployment_id = item.get("studyDeploymentId")
-                if not deployment_id:
-                    deployment_id = item.get("dataStream", {}).get("studyDeploymentId")
-
-                if deployment_id:
-                    participant = self.participant_manager.get_participant(deployment_id)
-                    if participant:
-                        item = item.copy()  # Don't mutate original
-                        item["_participant"] = participant.to_dict()
-
-            yield item
-
-    def get_data_with_participants(
-        self, data_type: Optional[str] = None
-    ) -> Generator[Dict[str, Any], None, None]:
-        """
-        Yields items enriched with participant information.
-        If data_type is specified, filters to that type.
-        """
-        gen = self._get_item_generator_with_participant(include_participant=True)
-
-        if data_type:
-            target_namespace, target_name = data_type.rsplit(".", 1)
-            for item in gen:
-                dt = item.get("dataStream", {}).get("dataType", {})
-                if dt.get("name") == target_name and dt.get("namespace") == target_namespace:
-                    yield item
-        else:
-            yield from gen
-
-    def group_by_participant(self, output_dir: str | Path, data_type: Optional[str] = None):
-        """
-        Groups data by unified participant ID and exports each group to a separate JSON file.
-        Useful for analyzing individual participant data across all phases.
-        """
-        output_dir = Path(output_dir)
-        output_dir.mkdir(parents=True, exist_ok=True)
-
-        console.print(f"[bold blue]Grouping data by participant into {output_dir}...[/bold blue]")
-
-        files = {}
-        gen = self.get_data_with_participants(data_type)
-
-        try:
-            for item in tqdm(gen, desc="Grouping by participant"):
-                participant_info = item.get("_participant", {})
-                unified_id = participant_info.get("unified_participant_id", "unknown")
-
-                if unified_id not in files:
-                    f = open(output_dir / f"{unified_id}.json", "w")
-                    f.write("[")
-                    files[unified_id] = {"handle": f, "first": True}
-
-                f_info = files[unified_id]
-                if not f_info["first"]:
-                    f_info["handle"].write(",")
-                json.dump(item, f_info["handle"])
-                f_info["first"] = False
-
-        finally:
-            for f_info in files.values():
-                f_info["handle"].write("]")
-                f_info["handle"].close()
-
-        console.print(
-            f"[bold green]Grouping complete! Created {len(files)} participant files.[/bold green]"
-        )
-
-    def group_by_email(self, output_dir: str | Path, data_type: Optional[str] = None):
-        """
-        Groups data by participant email and exports each group to a separate JSON file.
-        """
-        self._group_by_field_value(output_dir, "email", data_type)
-
-    def group_by_ssn(self, output_dir: str | Path, data_type: Optional[str] = None):
-        """
-        Groups data by participant SSN and exports each group to a separate JSON file.
-        """
-        self._group_by_field_value(output_dir, "ssn", data_type)
-
-    def group_by_name(self, output_dir: str | Path, data_type: Optional[str] = None):
-        """
-        Groups data by participant full name and exports each group to a separate JSON file.
-        """
-        self._group_by_field_value(output_dir, "full_name", data_type)
-
-    def _group_by_field_value(
-        self, output_dir: str | Path, field: str, data_type: Optional[str] = None
-    ):
-        """Internal helper to group data by a participant field (email, ssn, or full_name)."""
-        output_dir = Path(output_dir)
-        output_dir.mkdir(parents=True, exist_ok=True)
-
-        console.print(f"[bold blue]Grouping data by {field} into {output_dir}...[/bold blue]")
-
-        files = {}
-        gen = self.get_data_with_participants(data_type)
-
-        try:
-            for item in tqdm(gen, desc=f"Grouping by {field}"):
-                participant_info = item.get("_participant", {})
-                value = participant_info.get(field, "unknown")
-
-                if not value or not isinstance(value, str):
-                    value = "unknown"
-
-                # Sanitize filename
-                safe_value = "".join(
-                    c for c in value if c.isalnum() or c in ("-", "_", "@", ".")
-                ).strip()
-                if not safe_value:
-                    safe_value = "unknown"
-
-                if safe_value not in files:
-                    f = open(output_dir / f"{safe_value}.json", "w")
-                    f.write("[")
-                    files[safe_value] = {"handle": f, "first": True}
-
-                f_info = files[safe_value]
-                if not f_info["first"]:
-                    f_info["handle"].write(",")
-                json.dump(item, f_info["handle"])
-                f_info["first"] = False
-
-        finally:
-            for f_info in files.values():
-                f_info["handle"].write("]")
-                f_info["handle"].close()
-
-        console.print(f"[bold green]Grouping complete! Created {len(files)} files.[/bold green]")
-
-    def get_dataframe_with_participants(
-        self, data_type: str, parquet_dir: Optional[str | Path] = None
-    ):
-        """
-        Returns a pandas DataFrame for the specified data type, enriched with participant info.
-        Adds columns: participant_id, participant_email, participant_folder
-        """
-        try:
-            import pandas as pd
-        except ImportError:
-            console.print("[bold red]pandas is required for DataFrame conversion.[/bold red]")
-            return None
-
-        # Get base dataframe
-        df = self.get_dataframe(data_type, parquet_dir)
-        if df is None or df.empty:
-            return df
-
-        # Add participant columns
-        def get_participant_info(deployment_id):
-            p = self.participant_manager.get_participant(deployment_id)
-            if p:
-                return pd.Series(
-                    {
-                        "participant_id": p.unified_participant_id,
-                        "participant_email": p.email,
-                        "participant_folder": p.source_folder,
-                    }
-                )
-            return pd.Series(
-                {"participant_id": None, "participant_email": None, "participant_folder": None}
-            )
-
-        # Extract studyDeploymentId from dataStream column if it exists
-        if "dataStream" in df.columns:
-            deployment_ids = df["dataStream"].apply(
-                lambda x: x.get("studyDeploymentId") if isinstance(x, dict) else None
-            )
-        elif "studyDeploymentId" in df.columns:
-            deployment_ids = df["studyDeploymentId"]
-        else:
-            console.print("[yellow]Could not find studyDeploymentId column[/yellow]")
-            return df
-
-        participant_info = deployment_ids.apply(get_participant_info)
-        return pd.concat([df, participant_info], axis=1)
-
-    def scan_schema(self) -> Dict[str, Any]:
-        """
-        Scans the entire file to infer the schema of the data.
-        Returns a dictionary mapping data types to their field structures.
-        """
-        schemas = defaultdict(set)
-
-        # We need to count items for tqdm, but counting requires a pass.
-        # For very large files, we might just use file size or unknown length.
-        # Let's try to estimate or just use a simple progress bar.
-
-        console.print(f"[bold blue]Scanning schema for {len(self.file_paths)} files...[/bold blue]")
-
-        # We can use tqdm wrapping the generator, but we don't know total length easily without reading.
-        # We can use file size as a proxy if we read raw bytes, but ijson handles the reading.
-        # Let's just use a counter.
-
-        count = 0
-        with tqdm(desc="Processing items", unit=" items") as pbar:
-            for item in self._get_item_generator():
-                data_type = item.get("dataStream", {}).get("dataType", {}).get("name", "unknown")
-                namespace = (
-                    item.get("dataStream", {}).get("dataType", {}).get("namespace", "unknown")
-                )
-                full_type = f"{namespace}.{data_type}"
-
-                measurement_data = item.get("measurement", {}).get("data", {})
-
-                # Collect keys
-                for key in measurement_data.keys():
-                    schemas[full_type].add(key)
-
-                count += 1
-                if count % 1000 == 0:
-                    pbar.update(1000)
-            pbar.update(count % 1000)
-
-        # Convert sets to lists for JSON serialization/display
-        self.schema_cache = {k: list(v) for k, v in schemas.items()}
-        return self.schema_cache
-
-    def print_schema(self):
-        if not self.schema_cache:
-            self.scan_schema()
-
-        table = Table(title="Inferred Schema")
-        table.add_column("Data Type", style="cyan")
-        table.add_column("Fields", style="magenta")
-
-        for dtype, fields in self.schema_cache.items():
-            table.add_row(dtype, ", ".join(sorted(fields)))
-
-        console.print(table)
-
-    def get_data_by_type(self, target_type: str) -> Generator[Dict[str, Any], None, None]:
-        """
-        Yields items of a specific data type.
-        """
-        target_namespace, target_name = target_type.rsplit(".", 1)
-
-        for item in self._get_item_generator():
-            dt = item.get("dataStream", {}).get("dataType", {})
-            if dt.get("name") == target_name and dt.get("namespace") == target_namespace:
-                yield item
-
-    def export_to_json(self, output_path: str, data_type: Optional[str] = None):
-        """
-        Exports data to a JSON file. Can filter by data type.
-        """
-        console.print(f"[bold green]Exporting data to {output_path}...[/bold green]")
-
-        generator = self.get_data_by_type(data_type) if data_type else self._get_item_generator()
-
-        with open(output_path, "w") as f:
-            f.write("[")
-            first = True
-            for item in tqdm(generator, desc="Exporting"):
-                if not first:
-                    f.write(",")
-                json.dump(item, f)
-                first = False
-            f.write("]")
-
-        console.print("[bold green]Export complete![/bold green]")
-
-    def group_by_field(self, field_path: str, output_dir: str | Path):
-        """
-        Groups data by a specific field and exports each group to a separate JSON file.
-        field_path is a dot-separated string, e.g., 'dataStream.dataType.name'.
-        """
-        output_dir = Path(output_dir)
-        output_dir.mkdir(parents=True, exist_ok=True)
-
-        console.print(f"[bold blue]Grouping data by {field_path} into {output_dir}...[/bold blue]")
-
-        # We can't keep all files open if there are too many groups.
-        # But for things like dataType, there are usually < 20 groups.
-        # A safe approach for low memory is to read the file once and append to files,
-        # but opening/closing files for every line is slow.
-        # A middle ground is to keep a cache of open file handles, closing LRU if too many.
-
-        # For simplicity and speed assuming reasonable number of groups (<100):
-        files = {}
-
-        try:
-            for item in tqdm(self._get_item_generator(), desc="Grouping"):
-                # Extract value
-                value = item
-                for part in field_path.split("."):
-                    if isinstance(value, dict):
-                        value = value.get(part)
-                    else:
-                        value = None
-                        break
-
-                if value is None:
-                    value = "unknown"
-
-                value = str(value)
-                # Sanitize filename
-                safe_value = "".join(c for c in value if c.isalnum() or c in ("-", "_")).strip()
-                if not safe_value:
-                    safe_value = "unknown"
-
-                if safe_value not in files:
-                    f = open(output_dir / f"{safe_value}.json", "w")
-                    f.write("[")
-                    files[safe_value] = {"handle": f, "first": True}
-
-                f_info = files[safe_value]
-                if not f_info["first"]:
-                    f_info["handle"].write(",")
-                json.dump(item, f_info["handle"])
-                f_info["first"] = False
-
-        finally:
-            for f_info in files.values():
-                f_info["handle"].write("]")
-                f_info["handle"].close()
-
-        console.print(f"[bold green]Grouping complete! Created {len(files)} files.[/bold green]")
-
-    def count_items(self) -> int:
-        """
-        Counts the total number of items in the JSON file.
-        """
-        console.print(f"[bold blue]Counting items in {len(self.file_paths)} files...[/bold blue]")
-        count = 0
-        for _ in tqdm(self._get_item_generator(), desc="Counting"):
-            count += 1
-        return count
-
-    def convert_to_parquet(self, output_dir: str | Path, batch_size: int = 10000):
-        """
-        Converts the JSON data to Parquet files, grouped by data type.
-        Requires pyarrow and pandas.
-        """
-        import importlib.util
-
-        if not importlib.util.find_spec("pyarrow") or not importlib.util.find_spec("pandas"):
-            console.print(
-                "[bold red]pyarrow and pandas are required for Parquet conversion. Please install them.[/bold red]"
-            )
-            return
-
-        output_dir = Path(output_dir)
-        output_dir.mkdir(parents=True, exist_ok=True)
-
-        console.print(f"[bold blue]Converting to Parquet in {output_dir}...[/bold blue]")
-
-        writers = {}
-        buffers = defaultdict(list)
-
-        try:
-            for item in tqdm(self._get_item_generator(), desc="Converting"):
-                # Determine type
-                try:
-                    dtype = item.get("dataStream", {}).get("dataType", {}).get("name", "unknown")
-                    # Sanitize
-                    safe_name = "".join(c for c in dtype if c.isalnum() or c in ("-", "_")).strip()
-                    if not safe_name:
-                        safe_name = "unknown"
-                except (AttributeError, TypeError):
-                    safe_name = "unknown"
-
-                buffers[safe_name].append(item)
-
-                if len(buffers[safe_name]) >= batch_size:
-                    self._flush_buffer_to_parquet(
-                        safe_name, buffers[safe_name], writers, output_dir
-                    )
-                    buffers[safe_name].clear()
-
-        finally:
-            # Flush remaining
-            for name, buf in buffers.items():
-                if buf:
-                    self._flush_buffer_to_parquet(name, buf, writers, output_dir)
-
-            # Close writers
-            for writer in writers.values():
-                writer.close()
-
-        console.print(
-            f"[bold green]Conversion complete! Created {len(writers)} Parquet files.[/bold green]"
-        )
-
-    def _flush_buffer_to_parquet(self, name, buffer, writers, output_dir):
-        import pyarrow as pa
-        import pyarrow.parquet as pq
-
-        if not buffer:
-            return
-
-        try:
-            # PyArrow's from_pylist is robust but might need explicit schema if types vary.
-            # We let it infer for now.
-            table = pa.Table.from_pylist(buffer)
-        except Exception as e:
-            console.print(f"[red]Error converting batch for {name}: {e}[/red]")
-            return
-
-        if name not in writers:
-            file_path = output_dir / f"{name}.parquet"
-            # Use the schema from the first batch
-            writers[name] = pq.ParquetWriter(file_path, table.schema)
-
-        try:
-            # If the new batch has a different schema (e.g. missing fields or new fields),
-            # write_table might fail or produce a file with multiple schemas (which is bad).
-            # Ideally we should unify schemas, but that requires reading all data first.
-            # For now, we assume schema consistency or that PyArrow handles minor diffs.
-            # If strict schema validation fails, we might need to cast.
-
-            # Check if schema matches writer's schema
-            if not table.schema.equals(writers[name].schema):
-                # Try to cast to the writer's schema
-                # This handles cases where a field is missing (null) or type promotion is needed
-                try:
-                    table = table.cast(writers[name].schema)
-                except Exception:
-                    # If casting fails, we might have a problem.
-                    # For now, log and skip or try to write anyway (which might fail)
-                    # console.print(f"[yellow]Schema mismatch for {name}. Attempting cast... {cast_error}[/yellow]")
-                    pass
-
-            writers[name].write_table(table)
-        except Exception as e:
-            console.print(f"[red]Error writing batch for {name}: {e}[/red]")
-
-    def get_dataframe(self, data_type: str, parquet_dir: Optional[str | Path] = None):
-        """
-        Returns a pandas DataFrame for the specified data type.
-        If parquet_dir is provided and contains the corresponding parquet file, it loads from there.
-        Otherwise, it scans the JSON file (which is slower).
-        """
-        try:
-            import pandas as pd
-        except ImportError:
-            console.print(
-                "[bold red]pandas is required for DataFrame conversion. Please install it.[/bold red]"
-            )
-            return None
-
-        # Check Parquet first
-        if parquet_dir:
-            parquet_dir = Path(parquet_dir)
-            # data_type might be full namespace "dk.cachet.carp.heartbeat"
-            # or just "heartbeat" if we simplified names in conversion.
-            # Our conversion uses simplified names.
-
-            simple_name = data_type.split(".")[-1]
-            parquet_path = parquet_dir / f"{simple_name}.parquet"
-
-            if parquet_path.exists():
-                console.print(f"[bold blue]Loading {data_type} from {parquet_path}...[/bold blue]")
-                return pd.read_parquet(parquet_path)
-
-            # Try full name just in case
-            safe_full_name = "".join(c for c in data_type if c.isalnum() or c in ("-", "_")).strip()
-            parquet_path_full = parquet_dir / f"{safe_full_name}.parquet"
-            if parquet_path_full.exists():
-                console.print(
-                    f"[bold blue]Loading {data_type} from {parquet_path_full}...[/bold blue]"
-                )
-                return pd.read_parquet(parquet_path_full)
-
-        # Fallback to JSON scan
-        console.print(
-            f"[bold yellow]Parquet file not found. Scanning JSON for {data_type}...[/bold yellow]"
-        )
-        data = list(tqdm(self.get_data_by_type(data_type), desc="Loading to DataFrame"))
-        return pd.DataFrame(data)
-
-    def list_all_fields(self, sample_size: int = 100) -> List[str]:
-        """
-        Scans a sample of items to find all available dot-separated field paths.
-        Useful for determining what fields can be used in group_by_field.
-        """
-        console.print(
-            f"[bold blue]Scanning first {sample_size} items to find field paths...[/bold blue]"
-        )
-        paths = set()
-
-        def _recurse(obj, current_path):
-            if isinstance(obj, dict):
-                for k, v in obj.items():
-                    new_path = f"{current_path}.{k}" if current_path else k
-                    paths.add(new_path)
-                    _recurse(v, new_path)
-
-        count = 0
-        for item in self._get_item_generator():
-            _recurse(item, "")
-            count += 1
-            if count >= sample_size:
-                break
-
-        return sorted(list(paths))
-
-    def generate_type_definitions(
-        self, output_file: str = "generated_types.py", sample_size: int = 1000
-    ):
-        """
-        Generates a Python module with dataclasses representing the data schema.
-        Detects nested JSON strings and generates types for them as well.
-        """
-        console.print(f"[bold blue]Inferring schema from first {sample_size} items...[/bold blue]")
-        schema = self._infer_full_schema(sample_size)
-
-        console.print("[bold blue]Generating code...[/bold blue]")
-        code = self._generate_code_from_schema(schema)
-
-        with open(output_file, "w") as f:
-            f.write(code)
-        console.print(f"[bold green]Generated type definitions in {output_file}[/bold green]")
-
-    def _infer_full_schema(self, sample_size: int) -> Dict[str, Any]:
-        root_schema = {"type": "object", "fields": {}}
-
-        def merge(schema, value):
-            if value is None:
-                schema["nullable"] = True
-                return
-
-            if isinstance(value, dict):
-                if schema.get("type") and schema["type"] != "object":
-                    schema["type"] = "Any"  # Conflict
-                    return
-                schema["type"] = "object"
-                if "fields" not in schema:
-                    schema["fields"] = {}
-
-                for k, v in value.items():
-                    if k not in schema["fields"]:
-                        schema["fields"][k] = {}
-                    merge(schema["fields"][k], v)
-
-            elif isinstance(value, list):
-                if schema.get("type") and schema["type"] != "list":
-                    schema["type"] = "Any"
-                    return
-                schema["type"] = "list"
-                if "item_type" not in schema:
-                    schema["item_type"] = {}
-
-                for item in value:
-                    merge(schema["item_type"], item)
-
-            else:
-                # Primitive
-                # Check if string is JSON
-                is_json = False
-                if isinstance(value, str):
-                    try:
-                        if (value.strip().startswith("{") and value.strip().endswith("}")) or (
-                            value.strip().startswith("[") and value.strip().endswith("]")
-                        ):
-                            parsed = json.loads(value)
-                            if isinstance(parsed, (dict, list)):
-                                is_json = True
-                                schema["is_json_string"] = True
-                                merge(schema, parsed)
-                                return
-                    except (json.JSONDecodeError, TypeError):
-                        pass
-
-                if not is_json:
-                    py_type = type(value).__name__
-                    # Map python types to type hints
-                    if py_type == "float":
-                        py_type = "float"
-                    elif py_type == "int":
-                        py_type = "int"
-                    elif py_type == "str":
-                        py_type = "str"
-                    elif py_type == "bool":
-                        py_type = "bool"
-
-                    if schema.get("type") == "primitive" and schema.get("python_type") != py_type:
-                        # If mixing int and float, upgrade to float
-                        if {schema.get("python_type"), py_type} == {"int", "float"}:
-                            schema["python_type"] = "float"
-                        else:
-                            schema["python_type"] = "Any"
-                    else:
-                        schema["type"] = "primitive"
-                        schema["python_type"] = py_type
-
-        count = 0
-        for item in self._get_item_generator():
-            merge(root_schema, item)
-            count += 1
-            if count >= sample_size:
-                break
-
-        return root_schema
-
-    def _generate_code_from_schema(self, schema: Dict[str, Any]) -> str:
-        classes = {}  # name -> definition
-
-        def get_type_name(schema, context_name):
-            if schema.get("type") == "object":
-                class_name = "".join(x[:1].upper() + x[1:] for x in context_name.split("_"))
-                if not class_name:
-                    class_name = "Root"
-
-                # Handle collision
-                base_name = class_name
-                counter = 1
-                while (
-                    class_name in classes
-                    and classes[class_name] is not None
-                    and classes[class_name] != schema.get("fields")
-                ):
-                    # Note: comparing fields is a weak check for equality, but sufficient for now
-                    class_name = f"{base_name}{counter}"
-                    counter += 1
-
-                if class_name not in classes:
-                    classes[class_name] = None  # Placeholder
-                    fields = []
-                    for k, v in schema.get("fields", {}).items():
-                        field_type = get_type_name(v, k)
-                        fields.append(
-                            (
-                                k,
-                                field_type,
-                                v.get("nullable", False),
-                                v.get("is_json_string", False),
-                            )
-                        )
-                    classes[class_name] = fields
-
-                return class_name
-
-            elif schema.get("type") == "list":
-                item_type = get_type_name(schema.get("item_type", {}), context_name + "_item")
-                return f"List[{item_type}]"
-
-            elif schema.get("type") == "primitive":
-                t = schema.get("python_type", "Any")
-                return "Any" if t == "Any" else t
-
-            return "Any"
-
-        get_type_name(schema, "SleepinessItem")
-
-        # Generate Code
-        lines = [
-            "# Auto-generated type definitions",
-            "",
-            "from __future__ import annotations",
-            "from dataclasses import dataclass",
-            "from typing import List, Optional, Any, Dict",
-            "import json",
-            "",
-            "def parse_json_field(value):",
-            "    if isinstance(value, str):",
-            "        try:",
-            "            return json.loads(value)",
-            "        except:",
-            "            return value",
-            "    return value",
-            "",
-        ]
-
-        for name, fields in classes.items():
-            if fields is None:
-                continue  # Should not happen if recursion finished
-
-            lines.append("@dataclass")
-            lines.append(f"class {name}:")
-            if not fields:
-                lines.append("    pass")
-
-            for fname, ftype, nullable, is_json in fields:
-                safe_fname = fname
-                if safe_fname in (
-                    "from",
-                    "class",
-                    "def",
-                    "return",
-                    "import",
-                    "type",
-                    "global",
-                    "for",
-                    "if",
-                    "else",
-                    "while",
-                ):
-                    safe_fname = f"{fname}_"
-
-                type_hint = ftype
-                if nullable:
-                    type_hint = f"Optional[{type_hint}]"
-
-                lines.append(f"    {safe_fname}: {type_hint} = None")
-
-            # Add from_dict method
-            lines.append("")
-            lines.append("    @classmethod")
-            lines.append("    def from_dict(cls, obj: Any) -> Any:")
-            lines.append("        if not isinstance(obj, dict): return obj")
-            lines.append("        instance = cls()")
-            for fname, ftype, nullable, is_json in fields:
-                safe_fname = fname
-                if safe_fname in (
-                    "from",
-                    "class",
-                    "def",
-                    "return",
-                    "import",
-                    "type",
-                    "global",
-                    "for",
-                    "if",
-                    "else",
-                    "while",
-                ):
-                    safe_fname = f"{fname}_"
-
-                base_type = ftype
-                is_list = False
-                if ftype.startswith("List[") and ftype.endswith("]"):
-                    base_type = ftype[5:-1]
-                    is_list = True
-
-                is_custom_class = base_type in classes
-
-                lines.append(f"        val = obj.get('{fname}')")
-                if is_json:
-                    lines.append("        if isinstance(val, str): val = parse_json_field(val)")
-
-                if is_custom_class:
-                    if is_list:
-                        lines.append("        if val is not None and isinstance(val, list):")
-                        lines.append(
-                            f"            instance.{safe_fname} = [{base_type}.from_dict(x) for x in val]"
-                        )
-                    else:
-                        lines.append("        if val is not None:")
-                        lines.append(
-                            f"            instance.{safe_fname} = {base_type}.from_dict(val)"
-                        )
-                else:
-                    lines.append(f"        instance.{safe_fname} = val")
-
-            lines.append("        return instance")
-            lines.append("")
-
-        return "\n".join(lines)
diff --git a/src/carp/records/__init__.py b/src/carp/records/__init__.py
new file mode 100644
index 0000000..639e1be
--- /dev/null
+++ b/src/carp/records/__init__.py
@@ -0,0 +1,5 @@
+"""Record iteration services."""
+
+from .service import RecordService
+
+__all__ = ["RecordService"]
diff --git a/src/carp/records/service.py b/src/carp/records/service.py
new file mode 100644
index 0000000..8637457
--- /dev/null
+++ b/src/carp/records/service.py
@@ -0,0 +1,81 @@
+"""CARP record iteration, filtering, and inspection."""
+
+from __future__ import annotations
+
+from collections.abc import Iterable, Iterator
+from typing import Any
+
+from carp.core.fields import collect_field_paths, deployment_id_from_record, full_data_type
+from carp.core.files import iter_json_array
+
+
+class RecordService:
+    """Stream and filter CARP records."""
+
+    def __init__(self, file_paths: tuple[Any, ...], participant_directory: Any) -> None:
+        self._file_paths = file_paths
+        self._participants = participant_directory
+
+    def iter_records(
+        self,
+        data_type: str | None = None,
+        deployment_ids: Iterable[str] | None = None,
+    ) -> Iterator[dict[str, Any]]:
+        """Yield records matching optional data-type and deployment filters."""
+
+        allowed_ids = set(deployment_ids or [])
+        for file_path in self._file_paths:
+            for item in iter_json_array(file_path):
+                if allowed_ids and deployment_id_from_record(item) not in allowed_ids:
+                    continue
+                if data_type and full_data_type(item) != data_type:
+                    continue
+                yield item
+
+    def iter_with_participants(self, data_type: str | None = None) -> Iterator[dict[str, Any]]:
+        """Yield records enriched with participant metadata."""
+
+        for item in self.iter_records(data_type):
+            participant = self._participants.get_participant(deployment_id_from_record(item) or "")
+            if not participant:
+                yield item
+                continue
+            enriched = dict(item)
+            enriched["_participant"] = participant.to_dict()
+            yield enriched
+
+    def count(
+        self,
+        data_type: str | None = None,
+        deployment_ids: Iterable[str] | None = None,
+    ) -> int:
+        """Return the number of matching records."""
+
+        return sum(1 for _ in self.iter_records(data_type, deployment_ids))
+
+    def list_fields(self, sample_size: int = 100) -> list[str]:
+        """Return field paths sampled from the first records."""
+
+        fields: set[str] = set()
+        for index, item in enumerate(self.iter_records()):
+            if index >= sample_size:
+                break
+            fields.update(self.collect_fields(item))
+        return sorted(fields)
+
+    def data_types(self) -> list[str]:
+        """Return all observed record data types."""
+
+        return sorted({self.data_type(item) for item in self.iter_records()})
+
+    @staticmethod
+    def collect_fields(item: dict[str, Any]) -> set[str]:
+        """Collect field paths for one record."""
+
+        return collect_field_paths(item)
+
+    @staticmethod
+    def data_type(item: dict[str, Any]) -> str:
+        """Return the fully qualified data type for one record."""
+
+        return full_data_type(item)
diff --git a/src/carp/schema/__init__.py b/src/carp/schema/__init__.py
new file mode 100644
index 0000000..29e6fac
--- /dev/null
+++ b/src/carp/schema/__init__.py
@@ -0,0 +1,5 @@
+"""Schema discovery services."""
+
+from .service import SchemaService
+
+__all__ = ["SchemaService"]
diff --git a/src/carp/schema/service.py b/src/carp/schema/service.py
new file mode 100644
index 0000000..f3583ab
--- /dev/null
+++ b/src/carp/schema/service.py
@@ -0,0 +1,30 @@
+"""Schema discovery for CARP studies."""
+
+from __future__ import annotations
+
+from collections import defaultdict
+from typing import Any
+
+
+class SchemaService:
+    """Infer lightweight measurement schemas grouped by data type."""
+
+    def __init__(self, records: Any) -> None:
+        self._records = records
+        self._cache: dict[str, list[str]] = {}
+
+    def scan(self) -> dict[str, list[str]]:
+        """Return inferred measurement keys grouped by data type."""
+
+        schemas: dict[str, set[str]] = defaultdict(set)
+        for item in self._records.iter_records():
+            measurement = item.get("measurement", {}).get("data", {})
+            for key in measurement.keys():
+                schemas[self._records.data_type(item)].add(key)
+        self._cache = {key: sorted(values) for key, values in sorted(schemas.items())}
+        return self._cache
+
+    def cached(self) -> dict[str, list[str]]:
+        """Return the cached schema, scanning the study if needed."""
+
+        return self._cache or self.scan()
diff --git a/src/carp/study.py b/src/carp/study.py
new file mode 100644
index 0000000..88ea2c0
--- /dev/null
+++ b/src/carp/study.py
@@ -0,0 +1,47 @@
+"""Composition root for the modular CARP Analytics API."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from carp.constants import PARTICIPANT_FILE
+from carp.core.files import resolve_paths
+from carp.export import ExportService
+from carp.frames import FrameService
+from carp.participants import ParticipantDirectory, ParticipantService
+from carp.plotting import PlotService
+from carp.records import RecordService
+from carp.schema import SchemaService
+from carp.types import TypeDefinitionService
+
+
+def _discover_participant_folders(file_paths: tuple[Path, ...]) -> tuple[Path, ...]:
+    """Return phase folders that contain participant metadata."""
+
+    folders = {path.parent for path in file_paths if (path.parent / PARTICIPANT_FILE).exists()}
+    return tuple(sorted(folders))
+
+
+class CarpStudy:
+    """Primary public entrypoint for working with CARP study data."""
+
+    def __init__(
+        self,
+        file_paths: str | Path | tuple[str | Path, ...] | list[str | Path],
+        load_participants: bool = True,
+    ):
+        self.file_paths = resolve_paths(file_paths)
+        participant_folders = _discover_participant_folders(self.file_paths) if load_participants else ()
+        self._directory = ParticipantDirectory.from_folders(participant_folders)
+        self.records = RecordService(self.file_paths, self._directory)
+        self.participants = ParticipantService(self, self._directory)
+        self.schema = SchemaService(self.records)
+        self.export = ExportService(self.records)
+        self.frames = FrameService(self.records, self._directory)
+        self.types = TypeDefinitionService(self.records)
+        self.plots = PlotService(self.frames, self.participants)
+
+    def participant(self, email: str) -> object:
+        """Return a participant-scoped view by email."""
+
+        return self.participants.view(email)
diff --git a/src/carp/types/__init__.py b/src/carp/types/__init__.py
new file mode 100644
index 0000000..240dfa7
--- /dev/null
+++ b/src/carp/types/__init__.py
@@ -0,0 +1,5 @@
+"""Type-generation services."""
+
+from .service import TypeDefinitionService
+
+__all__ = ["TypeDefinitionService"]
diff --git a/src/carp/types/infer.py b/src/carp/types/infer.py
new file mode 100644
index 0000000..f235e7d
--- /dev/null
+++ b/src/carp/types/infer.py
@@ -0,0 +1,64 @@
+"""Schema inference helpers for generated type definitions."""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+
+def _maybe_json_string(value: object) -> Any | None:
+    """Parse JSON-like strings when possible."""
+
+    if not isinstance(value, str):
+        return None
+    stripped = value.strip()
+    if not stripped or stripped[0] not in "[{" or stripped[-1] not in "]}":
+        return None
+    try:
+        parsed = json.loads(stripped)
+    except json.JSONDecodeError:
+        return None
+    return parsed if isinstance(parsed, (dict, list)) else None
+
+
+def merge_schema(schema: dict[str, Any], value: Any) -> None:
+    """Merge a Python value into an inferred schema."""
+
+    if value is None:
+        schema["nullable"] = True
+        return
+    parsed = _maybe_json_string(value)
+    if parsed is not None:
+        schema["is_json_string"] = True
+        merge_schema(schema, parsed)
+        return
+    if isinstance(value, dict):
+        schema["type"] = "object"
+        fields = schema.setdefault("fields", {})
+        for key, child in value.items():
+            merge_schema(fields.setdefault(key, {}), child)
+        return
+    if isinstance(value, list):
+        schema["type"] = "list"
+        item_type = schema.setdefault("item_type", {})
+        for child in value:
+            merge_schema(item_type, child)
+        return
+    python_type = type(value).__name__
+    if schema.get("type") == "primitive" and schema.get("python_type") != python_type:
+        pair = {schema.get("python_type"), python_type}
+        schema["python_type"] = "float" if pair == {"int", "float"} else "Any"
+        return
+    schema["type"] = "primitive"
+    schema["python_type"] = python_type
+
+
+def infer_schema(records: Any, sample_size: int) -> dict[str, Any]:
+    """Infer a schema from sampled study records."""
+
+    root = {"type": "object", "fields": {}}
+    for index, item in enumerate(records):
+        if index >= sample_size:
+            break
+        merge_schema(root, item)
+    return root
diff --git a/src/carp/types/render.py b/src/carp/types/render.py
new file mode 100644
index 0000000..6a3896c
--- /dev/null
+++ b/src/carp/types/render.py
@@ -0,0 +1,97 @@
+"""Code rendering for inferred type definitions."""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def render_types(schema: dict[str, Any], root_name: str = "StudyItem") -> str:
+    """Render dataclass code from an inferred schema."""
+
+    classes: dict[str, list[tuple[str, str, bool, bool]] | None] = {}
+
+    def type_name(node: dict[str, Any], context: str) -> str:
+        if node.get("type") == "object":
+            class_name = "".join(part[:1].upper() + part[1:] for part in context.split("_")) or root_name
+            while class_name in classes:
+                class_name = f"{class_name}Item"
+            classes[class_name] = None
+            fields = []
+            for key, value in node.get("fields", {}).items():
+                fields.append(
+                    (
+                        key,
+                        type_name(value, key),
+                        value.get("nullable", False),
+                        value.get("is_json_string", False),
+                    )
+                )
+            classes[class_name] = fields
+            return class_name
+        if node.get("type") == "list":
+            return f"list[{type_name(node.get('item_type', {}), context + '_item')}]"
+        if node.get("type") == "primitive":
+            return str(node.get("python_type", "Any"))
+        return "Any"
+
+    type_name(schema, root_name)
+    lines = [
+        '"""Auto-generated type definitions for CARP data."""',
+        "",
+        "from __future__ import annotations",
+        "",
+        "import json",
+        "from dataclasses import dataclass",
+        "from typing import Any",
+        "",
+        "",
+        "def parse_json_field(value: Any) -> Any:",
+        '    """Parse JSON-like string fields when possible."""',
+        "",
+        "    if not isinstance(value, str):",
+        "        return value",
+        "    try:",
+        "        return json.loads(value)",
+        "    except json.JSONDecodeError:",
+        "        return value",
+        "",
+    ]
+    for class_name, fields in classes.items():
+        lines.extend(["@dataclass(slots=True)", f"class {class_name}:", f'    """Generated dataclass for `{class_name}`."""'])
+        if not fields:
+            lines.extend(["    pass", ""])
+            continue
+        for name, annotation, nullable, _ in fields:
+            type_hint = f"{annotation} | None" if nullable else annotation
+            safe_name = f"{name}_" if name in {"class", "from", "type"} else name
+            lines.append(f"    {safe_name}: {type_hint} = None")
+        lines.extend(
+            [
+                "",
+                "    @classmethod",
+                "    def from_dict(cls, obj: Any) -> Any:",
+                '        """Build an instance from a dictionary."""',
+                "",
+                "        if not isinstance(obj, dict):",
+                "            return obj",
+                "        instance = cls()",
+            ]
+        )
+        for name, annotation, _, is_json in fields:
+            safe_name = f"{name}_" if name in {"class", "from", "type"} else name
+            base_type = annotation.removeprefix("list[").removesuffix("]")
+            lines.append(f"        value = obj.get('{name}')")
+            if is_json:
+                lines.append("        value = parse_json_field(value)")
+            if annotation.startswith("list[") and base_type in classes:
+                lines.extend(
+                    [
+                        "        if isinstance(value, list):",
+                        f"            value = [{base_type}.from_dict(item) for item in value]",
+                    ]
+                )
+            elif base_type in classes:
+                lines.extend(["        if value is not None:", f"            value = {base_type}.from_dict(value)"])
+            lines.append(f"        instance.{safe_name} = value")
+        lines.extend(["        return instance", ""])
+    return "\n".join(lines)
diff --git a/src/carp/types/service.py b/src/carp/types/service.py
new file mode 100644
index 0000000..a274261
--- /dev/null
+++ b/src/carp/types/service.py
@@ -0,0 +1,28 @@
+"""Type-definition generation services."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from .infer import infer_schema
+from .render import render_types
+
+
+class TypeDefinitionService:
+    """Generate typed Python models from sampled CARP records."""
+
+    def __init__(self, records: Any) -> None:
+        self._records = records
+
+    def generate(
+        self,
+        output_file: str | Path = "generated_types.py",
+        sample_size: int = 1_000,
+    ) -> Path:
+        """Generate a Python module containing inferred dataclasses."""
+
+        schema = infer_schema(self._records.iter_records(), sample_size)
+        output_path = Path(output_file)
+        output_path.write_text(render_types(schema), encoding="utf-8")
+        return output_path
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..49c7d1b
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,33 @@
+"""Shared pytest fixtures for CARP Analytics."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from carp import CarpStudy
+
+
+@pytest.fixture()
+def fixture_root() -> Path:
+    """Return the self-contained multi-phase fixture root."""
+
+    return Path(__file__).parent / "fixtures" / "multi_phase"
+
+
+@pytest.fixture()
+def study_paths(fixture_root: Path) -> list[Path]:
+    """Return the default synthetic study file paths."""
+
+    return [
+        fixture_root / "phase_a" / "data-streams.json",
+        fixture_root / "phase_b" / "data-streams.json",
+    ]
+
+
+@pytest.fixture()
+def study(study_paths: list[Path]) -> CarpStudy:
+    """Return a study backed by self-contained fixtures."""
+
+    return CarpStudy(study_paths)
diff --git a/tests/fixtures/multi_phase/phase_a/data-streams.json b/tests/fixtures/multi_phase/phase_a/data-streams.json
new file mode 100644
index 0000000..111918d
--- /dev/null
+++ b/tests/fixtures/multi_phase/phase_a/data-streams.json
@@ -0,0 +1,109 @@
+[
+  {
+    "studyDeploymentId": "deploy-email-a",
+    "dataStream": {
+      "studyDeploymentId": "deploy-email-a",
+      "dataType": {
+        "namespace": "dk.cachet.carp",
+        "name": "stepcount"
+      },
+      "deviceRoleName": "Phone"
+    },
+    "measurement": {
+      "sensorStartTime": 1000,
+      "data": {
+        "steps": 100
+      }
+    },
+    "sequenceId": 1,
+    "syncPoint": 1,
+    "triggerIds": [],
+    "deviceRoleName": "Phone"
+  },
+  {
+    "studyDeploymentId": "deploy-email-a",
+    "dataStream": {
+      "studyDeploymentId": "deploy-email-a",
+      "dataType": {
+        "namespace": "dk.cachet.carp",
+        "name": "location"
+      },
+      "deviceRoleName": "Phone"
+    },
+    "measurement": {
+      "sensorStartTime": 1000,
+      "data": {
+        "latitude": 55.1,
+        "longitude": 12.1
+      }
+    },
+    "sequenceId": 2,
+    "syncPoint": 1,
+    "triggerIds": [],
+    "deviceRoleName": "Phone"
+  },
+  {
+    "studyDeploymentId": "deploy-ssn-a",
+    "dataStream": {
+      "studyDeploymentId": "deploy-ssn-a",
+      "dataType": {
+        "namespace": "dk.cachet.carp",
+        "name": "stepcount"
+      },
+      "deviceRoleName": "Phone"
+    },
+    "measurement": {
+      "sensorStartTime": 2000,
+      "data": {
+        "steps": 50
+      }
+    },
+    "sequenceId": 3,
+    "syncPoint": 1,
+    "triggerIds": [],
+    "deviceRoleName": "Phone"
+  },
+  {
+    "dataStream": {
+      "studyDeploymentId": "deploy-name-a",
+      "dataType": {
+        "namespace": "dk.cachet.carp",
+        "name": "survey"
+      },
+      "deviceRoleName": "Phone"
+    },
+    "measurement": {
+      "sensorStartTime": 3000,
+      "data": {
+        "response_json": "{\"score\": 3, \"tags\": [\"rested\", \"calm\"]}"
+      }
+    },
+    "sequenceId": 4,
+    "syncPoint": 1,
+    "triggerIds": [
+      "survey"
+    ],
+    "deviceRoleName": "Phone"
+  },
+  {
+    "studyDeploymentId": "deploy-unknown-a",
+    "dataStream": {
+      "studyDeploymentId": "deploy-unknown-a",
+      "dataType": {
+        "namespace": "com.acme",
+        "name": "stepcount"
+      },
+      "deviceRoleName": "Watch"
+    },
+    "measurement": {
+      "sensorStartTime": 4000,
+      "data": {
+        "steps": 9
+      }
+    },
+    "sequenceId": 5,
+    "syncPoint": 1,
+    "triggerIds": [],
+    "deviceRoleName": "Watch"
+  }
+]
diff --git a/tests/fixtures/multi_phase/phase_a/participant-data.json b/tests/fixtures/multi_phase/phase_a/participant-data.json
new file mode 100644
index 0000000..3f4d24d
--- /dev/null
+++ b/tests/fixtures/multi_phase/phase_a/participant-data.json
@@ -0,0 +1,64 @@
+[
+  {
+    "studyDeploymentId": "deploy-email-a",
+    "roles": [
+      {
+        "roleName": "Participant",
+        "data": {
+          "dk.carp.webservices.input.full_name": {
+            "firstName": "Alice",
+            "lastName": "Example"
+          },
+          "dk.carp.webservices.input.informed_consent": {
+            "signedTimestamp": "2024-01-01T00:00:00Z",
+            "userId": "user-email-a",
+            "name": "alice@example.com",
+            "consent": "{\"signature\": {\"firstName\": \"Alice\", \"lastName\": \"Example\"}}"
+          }
+        }
+      }
+    ],
+    "common": {}
+  },
+  {
+    "studyDeploymentId": "deploy-ssn-a",
+    "roles": [
+      {
+        "roleName": "Participant",
+        "data": {
+          "dk.carp.webservices.input.full_name": "Bob Example",
+          "dk.carp.webservices.input.ssn": {
+            "socialSecurityNumber": "1111"
+          },
+          "dk.cachet.carp.input.sex": "male"
+        }
+      }
+    ],
+    "common": {}
+  },
+  {
+    "studyDeploymentId": "deploy-name-a",
+    "roles": [
+      {
+        "roleName": "Participant",
+        "data": {
+          "dk.carp.webservices.input.full_name": {
+            "firstName": "Charlie",
+            "lastName": "Example"
+          }
+        }
+      }
+    ],
+    "common": {}
+  },
+  {
+    "studyDeploymentId": "deploy-unknown-a",
+    "roles": [
+      {
+        "roleName": "Participant",
+        "data": {}
+      }
+    ],
+    "common": {}
+  }
+]
diff --git a/tests/fixtures/multi_phase/phase_b/data-streams.json b/tests/fixtures/multi_phase/phase_b/data-streams.json
new file mode 100644
index 0000000..167d50d
--- /dev/null
+++ b/tests/fixtures/multi_phase/phase_b/data-streams.json
@@ -0,0 +1,133 @@
+[
+  {
+    "studyDeploymentId": "deploy-email-b",
+    "dataStream": {
+      "studyDeploymentId": "deploy-email-b",
+      "dataType": {
+        "namespace": "dk.cachet.carp",
+        "name": "stepcount"
+      },
+      "deviceRoleName": "Phone"
+    },
+    "measurement": {
+      "sensorStartTime": 5000,
+      "data": {
+        "steps": 150,
+        "cadence": 90
+      }
+    },
+    "sequenceId": 6,
+    "syncPoint": 1,
+    "triggerIds": [],
+    "deviceRoleName": "Phone"
+  },
+  {
+    "studyDeploymentId": "deploy-email-b",
+    "dataStream": {
+      "studyDeploymentId": "deploy-email-b",
+      "dataType": {
+        "namespace": "dk.cachet.carp",
+        "name": "location"
+      },
+      "deviceRoleName": "Phone"
+    },
+    "measurement": {
+      "sensorStartTime": 5000,
+      "data": {
+        "latitude": 55.2,
+        "longitude": 12.2
+      }
+    },
+    "sequenceId": 7,
+    "syncPoint": 1,
+    "triggerIds": [],
+    "deviceRoleName": "Phone"
+  },
+  {
+    "studyDeploymentId": "deploy-ssn-b",
+    "dataStream": {
+      "studyDeploymentId": "deploy-ssn-b",
+      "dataType": {
+        "namespace": "dk.cachet.carp",
+        "name": "stepcount"
+      },
+      "deviceRoleName": "Phone"
+    },
+    "measurement": {
+      "sensorStartTime": 6000,
+      "data": {
+        "steps": 70
+      }
+    },
+    "sequenceId": 8,
+    "syncPoint": 1,
+    "triggerIds": [],
+    "deviceRoleName": "Phone"
+  },
+  {
+    "studyDeploymentId": "deploy-name-b",
+    "dataStream": {
+      "studyDeploymentId": "deploy-name-b",
+      "dataType": {
+        "namespace": "dk.cachet.carp",
+        "name": "survey"
+      },
+      "deviceRoleName": "Phone"
+    },
+    "measurement": {
+      "sensorStartTime": 7000,
+      "data": {
+        "response_json": "{\"score\": 5}"
+      }
+    },
+    "sequenceId": 9,
+    "syncPoint": 1,
+    "triggerIds": [
+      "survey"
+    ],
+    "deviceRoleName": "Phone"
+  },
+  {
+    "studyDeploymentId": "deploy-name-b",
+    "dataStream": {
+      "studyDeploymentId": "deploy-name-b",
+      "dataType": {
+        "namespace": "dk.cachet.carp",
+        "name": "location"
+      },
+      "deviceRoleName": "Phone"
+    },
+    "measurement": {
+      "sensorStartTime": 7100,
+      "data": {
+        "latitude": 56.0,
+        "longitude": 13.0
+      }
+    },
+    "sequenceId": 10,
+    "syncPoint": 1,
+    "triggerIds": [],
+    "deviceRoleName": "Phone"
+  },
+  {
+    "studyDeploymentId": "deploy-orphan",
+    "dataStream": {
+      "studyDeploymentId": "deploy-orphan",
+      "dataType": {
+        "namespace": "dk.cachet.carp",
+        "name": "weather"
+      },
+      "deviceRoleName": "Phone"
+    },
+    "measurement": {
+      "sensorStartTime": 8000,
+      "data": {
+        "temperature": 21
+      }
+    },
+    "sequenceId": 11,
+    "syncPoint": 1,
+    "triggerIds": [],
+    "deviceRoleName": "Phone"
+  }
+]
diff --git a/tests/fixtures/multi_phase/phase_b/participant-data.json b/tests/fixtures/multi_phase/phase_b/participant-data.json
new file mode 100644
index 0000000..5cd0b16
--- /dev/null
+++ b/tests/fixtures/multi_phase/phase_b/participant-data.json
@@ -0,0 +1,46 @@
+[
+  {
+    "studyDeploymentId": "deploy-email-b",
+    "roles": [
+      {
+        "roleName": "Participant",
+        "data": {
+          "dk.carp.webservices.input.informed_consent": {
+            "signedTimestamp": "2024-01-02T00:00:00Z",
+            "userId": "user-email-b",
+            "name": "alice@example.com",
+            "consent": "{\"signature\": {\"firstName\": \"Alice\", \"lastName\": \"Example\"}}"
+          }
+        }
+      }
+    ],
+    "common": {}
+  },
+  {
+    "studyDeploymentId": "deploy-ssn-b",
+    "roles": [
+      {
+        "roleName": "Participant",
+        "data": {
+          "dk.carp.webservices.input.full_name": "Robert Example",
+          "dk.carp.webservices.input.ssn": {
+            "socialSecurityNumber": "1111"
+          }
+        }
+      }
+    ],
+    "common": {}
+  },
+  {
+    "studyDeploymentId": "deploy-name-b",
+    "roles": [
+      {
+        "roleName": "Participant",
+        "data": {
+          "dk.carp.webservices.input.full_name": "Charlie Example"
+        }
+      }
+    ],
+    "common": {}
+  }
+]
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 0000000..5896922
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,57 @@
+"""Tests for CLI wiring and command execution."""
+
+from __future__ import annotations
+
+from argparse import Namespace
+
+from carp.commandline import app as cli_app
+
+
+def test_cli_commands_and_help(capsys, study_paths, tmp_path) -> None:
+    """Exercise the public CLI commands."""
+
+    assert cli_app.main(["--version"]) == 0
+    assert cli_app.main([]) == 0
+    assert cli_app.main(["schema", *map(str, study_paths)]) == 0
+    assert cli_app.main(["count", *map(str, study_paths)]) == 0
+    assert cli_app.main(["participants", *map(str, study_paths)]) == 0
+    assert cli_app.main(
+        ["export", *map(str, study_paths), "-o", str(tmp_path / "export.json"), "-t", "dk.cachet.carp.location"]
+    ) == 0
+    assert cli_app.main(["group", *map(str, study_paths), "-o", str(tmp_path / "grouped")]) == 0
+    captured = capsys.readouterr().out
+    assert "carp-analytics-python version" in captured
+    assert "Total items" in captured
+
+
+def test_cli_convert_and_error_paths(monkeypatch, capsys, study_paths, tmp_path) -> None:
+    """Exercise CLI conversion and exception-handling branches."""
+
+    assert cli_app.main(["convert", *map(str, study_paths), "-o", str(tmp_path / "parquet"), "--batch-size", "1"]) == 0
+    assert cli_app.main(["count", "missing.json"]) == 1
+
+    class FakeParser:
+        """Minimal fake parser for exception tests."""
+
+        def parse_args(self, _argv):
+            return Namespace(version=False, command="test", handler=lambda _args: (_ for _ in ()).throw(KeyboardInterrupt()))
+
+        def print_help(self):
+            return None
+
+    monkeypatch.setattr(cli_app, "_build_parser", lambda: FakeParser())
+    assert cli_app.main(["ignored"]) == 130
+    monkeypatch.setattr(
+        cli_app,
+        "_build_parser",
+        lambda: type(
+            "BrokenParser",
+            (),
+            {
+                "parse_args": lambda self, _argv: Namespace(version=False, command="x", handler=lambda _args: (_ for _ in ()).throw(ValueError("boom"))),
+                "print_help": lambda self: None,
+            },
+        )(),
+    )
+    assert cli_app.main(["ignored"]) == 1
+    assert "Error: boom" in capsys.readouterr().out
diff --git a/tests/test_core.py b/tests/test_core.py
new file mode 100644
index 0000000..110303a
--- /dev/null
+++ b/tests/test_core.py
@@ -0,0 +1,55 @@
+"""Tests for shared CARP helpers."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from carp.core.dependencies import import_or_raise, module_available
+from carp.core.fields import collect_field_paths, deployment_id_from_record, get_nested_value
+from carp.core.files import JsonArrayWriter, iter_json_array, resolve_paths
+from carp.core.naming import parquet_stem, sanitize_filename
+from carp.participants.directory import ParticipantDirectory
+from carp.participants.parser import load_participant_file
+
+
+def test_core_helpers_cover_nested_values_and_paths(study_paths: list[Path]) -> None:
+    """Exercise shared path and field helpers."""
+
+    record = next(iter_json_array(study_paths[0]))
+    assert resolve_paths(study_paths) == tuple(study_paths)
+    assert get_nested_value(record, "measurement.data.steps") == 100
+    assert get_nested_value(record, "missing.value", "fallback") == "fallback"
+    assert deployment_id_from_record(record) == "deploy-email-a"
+    assert "measurement.data.steps" in collect_field_paths(record)
+    assert sanitize_filename("alice@example.com", allowed="-_.@") == "alice@example.com"
+    assert parquet_stem("dk.cachet.carp.stepcount") == "dk.cachet.carp__stepcount"
+
+
+def test_json_array_writer_and_module_helpers(tmp_path: Path) -> None:
+    """Exercise JSON writing and optional dependency errors."""
+
+    output_path = tmp_path / "output.json"
+    writer = JsonArrayWriter(output_path)
+    writer.write({"value": 1})
+    writer.write({"value": 2})
+    writer.close()
+    assert output_path.read_text(encoding="utf-8") == '[{"value": 1},{"value": 2}]'
+    assert module_available("json") is True
+    with pytest.raises(RuntimeError):
+        import_or_raise("module_that_does_not_exist_for_tests", "test")
+
+
+def test_participant_loader_handles_invalid_consent(tmp_path: Path) -> None:
+    """Exercise parser branches for invalid consent payloads and missing folders."""
+
+    participant_file = tmp_path / "participant-data.json"
+    participant_file.write_text(
+        '[{"studyDeploymentId":"x","roles":[{"roleName":"Participant","data":{"dk.carp.webservices.input.informed_consent":"broken"}}]}]',
+        encoding="utf-8",
+    )
+    loaded = load_participant_file(participant_file)
+    assert loaded["x"].consent_signed is False
+    empty_directory = ParticipantDirectory.from_folders((tmp_path / "missing",))
+    assert empty_directory.summary_rows() == []
diff --git a/tests/test_edge_frames_plotting.py b/tests/test_edge_frames_plotting.py
new file mode 100644
index 0000000..f8eb23e
--- /dev/null
+++ b/tests/test_edge_frames_plotting.py
@@ -0,0 +1,84 @@
+"""Additional edge-case coverage for frames and plotting."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+from carp.core.dependencies import import_or_raise
+from carp.core.fields import collect_field_paths
+from carp.plotting.prepare import candidate_series, frames_from_items, prepare_location_frame, prepare_step_frame
+from carp.plotting.render import _merge_steps, render_heatmap
+
+
+def test_frame_service_edge_branches(study, tmp_path) -> None:
+    """Exercise dataframe and parquet helper branches."""
+
+    pandas = import_or_raise("pandas", "test")
+    pyarrow = import_or_raise("pyarrow", "test")
+    assert study.records.list_fields(sample_size=0) == []
+    assert collect_field_paths([]) == set()
+    assert "items[]" in collect_field_paths({"items": []})
+    assert study.frames.get_dataframe_with_participants("missing.type").empty
+    nested = pandas.DataFrame({"dataStream": [{"studyDeploymentId": "nested-id"}]})
+    assert study.frames._deployment_series(nested).tolist() == ["nested-id"]
+    assert study.frames._participant_row("deploy-email-a")["participant_email"] == "alice@example.com"
+    aligned = study.frames._align_table(
+        pyarrow,
+        pyarrow.Table.from_pylist([{"steps": 1}]),
+        pyarrow.schema([("steps", pyarrow.float64()), ("cadence", pyarrow.int64())]),
+    )
+    assert aligned.column_names == ["steps", "cadence"]
+    assert aligned["steps"][0].as_py() == 1.0
+    assert aligned["cadence"][0].as_py() is None
+    assert study.participant("alice@example.com").dataframe("missing.type").empty
+    assert study.participant("alice@example.com").available_fields(sample_size=0) == []
+    assert study.frames.convert_to_parquet(tmp_path / "flush", batch_size=50)
+    assert study.frames.get_dataframe("missing.type", tmp_path / "flush").empty
+
+
+def test_plotting_helpers_and_edge_paths(study, tmp_path, monkeypatch) -> None:
+    """Exercise helper functions and low-probability plotting branches."""
+
+    pandas = import_or_raise("pandas", "test")
+    location_items = [
+        SimpleNamespace(
+            measurement=SimpleNamespace(
+                data=SimpleNamespace(latitude=1.0, longitude=2.0),
+                sensorStartTime=10,
+            )
+        ),
+        SimpleNamespace(measurement=None),
+    ]
+    step_items = [
+        SimpleNamespace(measurement=SimpleNamespace(data=SimpleNamespace(steps=3), sensorStartTime=10)),
+        SimpleNamespace(measurement=SimpleNamespace(data=SimpleNamespace(steps=None), sensorStartTime=11)),
+    ]
+    location_frame, step_frame = frames_from_items(location_items, step_items)
+    assert not location_frame.empty and not step_frame.empty
+    assert candidate_series(pandas.DataFrame({"value": [1]}), ["missing", "value"]).tolist() == [1]
+    assert candidate_series(pandas.DataFrame({"nested": [{"a": {"b": 1}}]}), ["nested.a.b"]).tolist() == [1]
+    assert candidate_series(pandas.DataFrame({"value": [1]}), ["missing.path"]) is None
+    assert list(prepare_location_frame(study.frames.get_dataframe("dk.cachet.carp.location"))["_lat"]) == [55.1, 55.2, 56.0]
+    assert list(prepare_step_frame(study.frames.get_dataframe("dk.cachet.carp.stepcount"))["_steps"]) == [100, 50, 150, 70]
+    assert render_heatmap(location_frame.iloc[0:0], step_frame, tmp_path / "empty.html") is None
+    assert render_heatmap(location_frame, pandas.DataFrame({"_steps": [0], "_time": [10], "_lat": [1.0], "_lon": [2.0]}), tmp_path / "zero.html") is not None
+    assert _merge_steps(pandas, location_frame, pandas.DataFrame({"_steps": [1]})).empty
+    assert study.plots.unified("missing") is None
+    assert study.plots.deployment("missing", output_file=str(tmp_path / "missing.html")) is None
+    assert study.plots.deployment("deploy-email-a", location_type="missing.type", output_file=str(tmp_path / "noloc.html")) is None
+    assert study.plots.deployment("deploy-email-a", step_type="missing.type", output_file=str(tmp_path / "nosteps.html")) is not None
+    assert study.plots.from_items(location_items, step_items, output_file=str(tmp_path / "objects.html")) is not None
+    monkeypatch.setattr(study.plots, "candidate_series", lambda *_args, **_kwargs: None)
+    assert study.participant("alice@example.com").dataframe("dk.cachet.carp.stepcount").shape[0] == 4
+
+    calls = {"count": 0}
+
+    def staged_series(*_args, **_kwargs):
+        calls["count"] += 1
+        frame = _args[0]
+        if calls["count"] == 1:
+            return pandas.Series(["deploy-email-a"] * len(frame), index=frame.index)
+        return None
+
+    monkeypatch.setattr("carp.plotting.service.candidate_series", staged_series)
+    assert study.plots.deployment("deploy-email-a", output_file=str(tmp_path / "staged.html")) is not None
diff --git a/tests/test_edge_types_cli.py b/tests/test_edge_types_cli.py
new file mode 100644
index 0000000..34690ab
--- /dev/null
+++ b/tests/test_edge_types_cli.py
@@ -0,0 +1,103 @@
+"""Additional edge-case coverage for CLI and type-generation helpers."""
+
+from __future__ import annotations
+
+import runpy
+from pathlib import Path
+
+from carp import CarpStudy
+from carp.core.fields import get_nested_value
+from carp.participants.parser import load_participant_file
+from carp.types.infer import _maybe_json_string, infer_schema, merge_schema
+from carp.types.render import render_types
+
+
+def test_cli_module_entrypoint(monkeypatch) -> None:
+    """Execute the module-level CLI entrypoint."""
+
+    exit_codes = []
+    monkeypatch.setattr("carp.commandline.app.main", lambda: 7)
+    monkeypatch.setattr("sys.exit", lambda code: exit_codes.append(code))
+    runpy.run_module("carp.cli", run_name="__main__")
+    assert exit_codes == [7]
+
+
+def test_parser_and_schema_edge_branches(study_paths: list[Path], tmp_path: Path) -> None:
+    """Exercise parser branches not covered by the default fixture."""
+
+    assert get_nested_value({"a": 1}, "a.b", "fallback") == "fallback"
+    assert CarpStudy(study_paths).schema.cached()["dk.cachet.carp.location"] == ["latitude", "longitude"]
+
+    participant_file = tmp_path / "participant-data.json"
+    participant_file.write_text(
+        """
+        [
+          {"roles": [{"data": {}}]},
+          {
+            "studyDeploymentId": "string-ssn",
+            "roles": [
+              {
+                "roleName": "Participant",
+                "data": {
+                  "dk.carp.webservices.input.ssn": "2222",
+                  "dk.carp.webservices.input.informed_consent": {
+                    "name": "eve@example.com",
+                    "consent": "{broken json}",
+                    "note": 1
+                  }
+                }
+              }
+            ]
+          },
+          {
+            "studyDeploymentId": "non-string-consent",
+            "roles": [
+              {
+                "roleName": "Participant",
+                "data": {
+                  "dk.carp.webservices.input.informed_consent": {
+                    "name": "nonstr@example.com",
+                    "consent": 1
+                  }
+                }
+              }
+            ]
+          }
+        ]
+        """,
+        encoding="utf-8",
+    )
+    loaded = load_participant_file(participant_file)
+    assert loaded["string-ssn"].ssn == "2222"
+    assert loaded["string-ssn"].email == "eve@example.com"
+    assert loaded["string-ssn"].full_name is None
+    assert loaded["non-string-consent"].email == "nonstr@example.com"
+
+
+def test_type_inference_and_rendering_edge_branches() -> None:
+    """Exercise edge branches in schema inference and code rendering."""
+
+    assert _maybe_json_string("plain text") is None
+    assert _maybe_json_string("{broken}") is None
+    schema = {}
+    merge_schema(schema, None)
+    merge_schema(schema, {"value": [1, 2.0]})
+    assert schema["nullable"] is True
+    assert infer_schema(iter([{"a": 1}, {"a": 2}]), sample_size=0)["fields"] == {}
+
+    rendered = render_types(
+        {
+            "type": "object",
+            "fields": {
+                "child": {"type": "object", "fields": {}},
+                "other": {"type": "object", "fields": {"child": {"type": "object", "fields": {"value": {"type": "primitive", "python_type": "int"}}}}},
+                "matching": {"type": "object", "fields": {"child": {"type": "object", "fields": {}}}},
+                "items": {"type": "list", "item_type": {"type": "object", "fields": {"from": {"type": "primitive", "python_type": "str"}}}},
+                "mystery": {},
+            },
+        }
+    )
+    assert "class Child:" in rendered
+    assert "class ChildItem:" in rendered
+    assert "from_: str = None" in rendered
+    assert "mystery: Any = None" in rendered
diff --git a/tests/test_export.py b/tests/test_export.py
new file mode 100644
index 0000000..8f618f1
--- /dev/null
+++ b/tests/test_export.py
@@ -0,0 +1,24 @@
+"""Tests for JSON export and grouping flows."""
+
+from __future__ import annotations
+
+import json
+
+
+def test_export_json_and_group_by_field(study, tmp_path) -> None:
+    """Exercise JSON export and field-based grouping."""
+
+    export_path = study.export.export_json(tmp_path / "records.json", "dk.cachet.carp.location")
+    payload = json.loads(export_path.read_text(encoding="utf-8"))
+    assert len(payload) == 3
+    grouped = study.export.group_by_field("dataStream.dataType.namespace", tmp_path / "grouped")
+    assert {path.name for path in grouped} == {"com.acme.json", "dk.cachet.carp.json"}
+
+
+def test_group_by_participant_and_identity(study, tmp_path) -> None:
+    """Exercise participant-aware grouping flows."""
+
+    participant_files = study.export.group_by_participant(tmp_path / "participants")
+    identity_files = study.export.group_by_identity("email", tmp_path / "emails")
+    assert len(participant_files) == 5
+    assert {path.name for path in identity_files} == {"alice@example.com.json", "unknown.json"}
diff --git a/tests/test_frames.py b/tests/test_frames.py
new file mode 100644
index 0000000..5198f84
--- /dev/null
+++ b/tests/test_frames.py
@@ -0,0 +1,29 @@
+"""Tests for dataframe and parquet services."""
+
+from __future__ import annotations
+
+
+def test_dataframe_loading_and_participant_columns(study) -> None:
+    """Exercise dataframe loading from JSON and participant enrichment."""
+
+    frame = study.frames.get_dataframe("dk.cachet.carp.stepcount")
+    assert frame.shape[0] == 4
+    enriched = study.frames.get_dataframe_with_participants("dk.cachet.carp.weather")
+    assert enriched.loc[0, "participant_id"] is None
+    assert study.frames.parquet_path("dk.cachet.carp.stepcount", "out").name == "dk.cachet.carp__stepcount.parquet"
+
+
+def test_parquet_conversion_and_reload(study, tmp_path) -> None:
+    """Exercise namespace-aware parquet conversion and reload."""
+
+    output_dir = tmp_path / "parquet"
+    created = study.frames.convert_to_parquet(output_dir, batch_size=1)
+    assert {path.name for path in created} == {
+        "com.acme__stepcount.parquet",
+        "dk.cachet.carp__location.parquet",
+        "dk.cachet.carp__stepcount.parquet",
+        "dk.cachet.carp__survey.parquet",
+        "dk.cachet.carp__weather.parquet",
+    }
+    frame = study.frames.get_dataframe("dk.cachet.carp.stepcount", output_dir)
+    assert set(frame.columns) >= {"studyDeploymentId", "measurement"}
diff --git a/tests/test_participants.py b/tests/test_participants.py
new file mode 100644
index 0000000..c2e5db2
--- /dev/null
+++ b/tests/test_participants.py
@@ -0,0 +1,42 @@
+"""Tests for participant lookup and unified views."""
+
+from __future__ import annotations
+
+from carp.participants.view import ParticipantView
+
+
+def test_participant_lookups_and_summary(study) -> None:
+    """Exercise participant lookup methods and summary rows."""
+
+    assert len(study.participants.by_email("alice@example.com")) == 2
+    assert len(study.participants.by_ssn("1111")) == 2
+    assert len(study.participants.by_name("Charlie Example")) == 2
+    summary_rows = study.participants.summary_rows()
+    assert len(summary_rows) == 4
+    assert any(row["emails"] == "alice@example.com" for row in summary_rows)
+
+
+def test_participant_view_info_fields_and_dataframe(study, tmp_path) -> None:
+    """Exercise the participant-scoped view object."""
+
+    participant = study.participant("alice@example.com")
+    assert isinstance(participant, ParticipantView)
+    info = participant.info()
+    assert info is not None
+    assert info["num_deployments"] == 2
+    assert participant.count() == 4
+    assert participant.data_types() == ["dk.cachet.carp.location", "dk.cachet.carp.stepcount"]
+    assert "measurement.data.latitude" in participant.available_fields()
+    assert "measurement.data.steps" in participant.available_fields()
+    assert participant.dataframe("dk.cachet.carp.stepcount").shape[0] == 2
+    assert participant.plot_location(output_file=str(tmp_path / "participant.html")) is not None
+
+
+def test_missing_participant_view_and_unified_lookup(study) -> None:
+    """Exercise missing participants and unified participant lookups."""
+
+    missing = study.participant("nobody@example.com")
+    assert missing.exists is False
+    assert missing.info() is None
+    unified_id = study.participant("alice@example.com").info()["unified_id"]
+    assert len(study.participants.unified(unified_id)) == 2
diff --git a/tests/test_real_data.py b/tests/test_real_data.py
new file mode 100644
index 0000000..82945e3
--- /dev/null
+++ b/tests/test_real_data.py
@@ -0,0 +1,23 @@
+"""Optional real-data integration tests."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from carp import CarpStudy
+
+SLEEP_DATA_ROOT = Path(__file__).resolve().parents[1] / "sleep-data"
+
+
+@pytest.mark.skipif(not SLEEP_DATA_ROOT.exists(), reason="sleep-data is not available")
+def test_real_data_smoke() -> None:
+    """Exercise stable invariants on local real study data."""
+
+    file_paths = sorted(SLEEP_DATA_ROOT.glob("phase-*/data-streams.json"))
+    study = CarpStudy(file_paths)
+    assert study.records.count() > 0
+    assert len(study.records.data_types()) >= 3
+    assert len(study.schema.scan()) >= 3
+    assert len(study.participants.summary_rows()) >= 1
diff --git a/tests/test_records_schema.py b/tests/test_records_schema.py
new file mode 100644
index 0000000..55bbd13
--- /dev/null
+++ b/tests/test_records_schema.py
@@ -0,0 +1,29 @@
+"""Tests for record iteration and schema discovery."""
+
+from __future__ import annotations
+
+
+def test_record_filters_and_participant_enrichment(study) -> None:
+    """Exercise record filtering and participant enrichment."""
+
+    assert study.records.count() == 11
+    assert study.records.count("dk.cachet.carp.stepcount") == 4
+    filtered = list(study.records.iter_records(deployment_ids=("deploy-email-a",)))
+    assert len(filtered) == 2
+    enriched = list(study.records.iter_with_participants("dk.cachet.carp.stepcount"))
+    assert all("_participant" in item for item in enriched)
+
+
+def test_record_field_listing_data_types_and_schema_cache(study) -> None:
+    """Exercise schema discovery and deployment-id fallback paths."""
+
+    data_types = study.records.data_types()
+    assert "com.acme.stepcount" in data_types
+    assert "dk.cachet.carp.survey" in data_types
+    assert "triggerIds[]" in study.records.list_fields()
+    survey = list(study.records.iter_records("dk.cachet.carp.survey"))
+    assert len(survey) == 2
+    assert study.records.count(deployment_ids=("deploy-name-a", "deploy-name-b")) == 3
+    schema = study.schema.scan()
+    assert schema["dk.cachet.carp.stepcount"] == ["cadence", "steps"]
+    assert study.schema.cached() == schema
diff --git a/tests/test_structure.py b/tests/test_structure.py
new file mode 100644
index 0000000..8eab46b
--- /dev/null
+++ b/tests/test_structure.py
@@ -0,0 +1,22 @@
+"""Structural repository tests."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+
+def test_python_files_stay_under_two_hundred_lines() -> None:
+    """Enforce the 200-line limit for Python source and test files."""
+
+    root = Path(__file__).resolve().parents[1]
+    python_files = [
+        path
+        for path in root.rglob("*.py")
+        if all(part not in {".venv.nosync", "dist", "__pycache__"} for part in path.parts)
+    ]
+    offenders = []
+    for path in python_files:
+        line_count = len(path.read_text(encoding="utf-8").splitlines())
+        if line_count > 200:
+            offenders.append((path.relative_to(root), line_count))
+    assert offenders == []
diff --git a/tests/test_types_plotting.py b/tests/test_types_plotting.py
new file mode 100644
index 0000000..b25deb7
--- /dev/null
+++ b/tests/test_types_plotting.py
@@ -0,0 +1,44 @@
+"""Tests for generated types and plotting services."""
+
+from __future__ import annotations
+
+import importlib.util
+import sys
+
+
+def test_generate_type_definitions(study, tmp_path) -> None:
+    """Exercise generated type definitions for JSON-string payloads."""
+
+    output_path = study.types.generate(tmp_path / "generated_types.py", sample_size=11)
+    code = output_path.read_text(encoding="utf-8")
+    assert "parse_json_field" in code
+    assert "class StudyItem" in code
+    spec = importlib.util.spec_from_file_location("generated_types", output_path)
+    module = importlib.util.module_from_spec(spec)
+    assert spec.loader is not None
+    sys.modules[spec.name] = module
+    spec.loader.exec_module(module)
+    payload = {"measurement": {"data": {"response_json": '{"score": 1}'}}}
+    instance = module.StudyItem.from_dict(payload)
+    assert instance.measurement.data.response_json.score == 1
+
+
+def test_plot_service_outputs_html(study, tmp_path) -> None:
+    """Exercise participant, deployment, unified, and item-based plots."""
+
+    participant_path = study.plots.participant("alice@example.com", output_file=str(tmp_path / "alice.html"))
+    assert participant_path is not None
+    assert "leaflet" in (tmp_path / "alice.html").read_text(encoding="utf-8").lower()
+    unified_id = study.participant("alice@example.com").info()["unified_id"]
+    assert study.plots.unified(unified_id, output_file=str(tmp_path / "unified.html")) is not None
+    assert study.plots.deployment("deploy-email-a", output_file=str(tmp_path / "solo.html"), include_steps=False) is not None
+    location_items = []
+    assert study.plots.from_items(location_items, output_file=str(tmp_path / "none.html")) is None
+
+
+def test_plot_service_handles_missing_filters(study, monkeypatch, tmp_path) -> None:
+    """Exercise plot branches for missing participants and missing columns."""
+
+    assert study.plots.participant("missing@example.com") is None
+    monkeypatch.setattr("carp.plotting.service.candidate_series", lambda *_args, **_kwargs: None)
+    assert study.plots.deployment("deploy-email-a", output_file=str(tmp_path / "missing.html")) is None
diff --git a/uv.lock b/uv.lock
index f4509f9..78780d6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -7,6 +7,24 @@ resolution-markers = [
     "python_full_version < '3.11'",
 ]
 
+[[package]]
+name = "alabaster"
+version = "1.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a6/f8/d9c74d0daf3f742840fd818d69cfae176fa332022fd44e3469487d5a9420/alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e", size = 24210, upload-time = "2024-07-26T18:15:03.762Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929, upload-time = "2024-07-26T18:15:02.05Z" },
+]
+
+[[package]]
+name = "babel"
+version = "2.18.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7d/b2/51899539b6ceeeb420d40ed3cd4b7a40519404f9baf3d4ac99dc413a834b/babel-2.18.0.tar.gz", hash = "sha256:b80b99a14bd085fcacfa15c9165f651fbb3406e66cc603abf11c5750937c992d", size = 9959554, upload-time = "2026-02-01T12:30:56.078Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/77/f5/21d2de20e8b8b0408f0681956ca2c69f1320a3848ac50e6e7f39c6159675/babel-2.18.0-py3-none-any.whl", hash = "sha256:e2b422b277c2b9a9630c1d7903c2a00d0830c409c59ac8cae9081c92f1aeba35", size = 10196845, upload-time = "2026-02-01T12:30:53.445Z" },
+]
+
 [[package]]
 name = "branca"
 version = "0.8.2"
@@ -65,6 +83,21 @@ dev = [
     { name = "pytest" },
     { name = "pytest-cov" },
     { name = "ruff" },
+    { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+    { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.11.*'" },
+    { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" },
+    { name = "sphinx-rtd-theme" },
+]
+docs = [
+    { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+    { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.11.*'" },
+    { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" },
+]
+test = [
+    { name = "folium" },
+    { name = "matplotlib" },
+    { name = "pandas" },
+    { name = "pyarrow" },
 ]
 
 [package.metadata]
@@ -91,6 +124,15 @@ dev = [
     { name = "pytest", specifier = ">=7.4.0" },
     { name = "pytest-cov", specifier = ">=4.1.0" },
     { name = "ruff", specifier = ">=0.1.0" },
+    { name = "sphinx", specifier = ">=8.1.3" },
+    { name = "sphinx-rtd-theme", specifier = ">=3.1.0" },
+]
+docs = [{ name = "sphinx", specifier = ">=8.0.0" }]
+test = [
+    { name = "folium", specifier = ">=0.14.0" },
+    { name = "matplotlib", specifier = ">=3.7.0" },
+    { name = "pandas", specifier = ">=2.0.0" },
+    { name = "pyarrow", specifier = ">=14.0.0" },
 ]
 
 [[package]]
@@ -487,6 +529,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
 ]
 
+[[package]]
+name = "docutils"
+version = "0.21.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.11'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444, upload-time = "2024-04-23T18:57:18.24Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408, upload-time = "2024-04-23T18:57:14.835Z" },
+]
+
+[[package]]
+name = "docutils"
+version = "0.22.4"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.12'",
+    "python_full_version == '3.11.*'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" },
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.3.1"
@@ -691,6 +758,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ec/f2/53b6e9bdd2a91202066764eaa74b572ba4dede0fe47a5a26f4de34b7541a/ijson-3.4.0.post0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a0fedf09c0f6ffa2a99e7e7fd9c5f3caf74e655c1ee015a0797383e99382ebc3", size = 54657, upload-time = "2025-10-10T05:29:24.482Z" },
 ]
 
+[[package]]
+name = "imagesize"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6c/e6/7bf14eeb8f8b7251141944835abd42eb20a658d89084b7e1f3e5fe394090/imagesize-2.0.0.tar.gz", hash = "sha256:8e8358c4a05c304f1fccf7ff96f036e7243a189e9e42e90851993c558cfe9ee3", size = 1773045, upload-time = "2026-03-03T14:18:29.941Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5f/53/fb7122b71361a0d121b669dcf3d31244ef75badbbb724af388948de543e2/imagesize-2.0.0-py2.py3-none-any.whl", hash = "sha256:5667c5bbb57ab3f1fa4bc366f4fbc971db3d5ed011fd2715fd8001f782718d96", size = 9441, upload-time = "2026-03-03T14:18:27.892Z" },
+]
+
 [[package]]
 name = "iniconfig"
 version = "2.3.0"
@@ -1790,6 +1866,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
 ]
 
+[[package]]
+name = "roman-numerals"
+version = "4.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ae/f9/41dc953bbeb056c17d5f7a519f50fdf010bd0553be2d630bc69d1e022703/roman_numerals-4.1.0.tar.gz", hash = "sha256:1af8b147eb1405d5839e78aeb93131690495fe9da5c91856cb33ad55a7f1e5b2", size = 9077, upload-time = "2025-12-17T18:25:34.381Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/54/6f679c435d28e0a568d8e8a7c0a93a09010818634c3c3907fc98d8983770/roman_numerals-4.1.0-py3-none-any.whl", hash = "sha256:647ba99caddc2cc1e55a51e4360689115551bf4476d90e8162cf8c345fe233c7", size = 7676, upload-time = "2025-12-17T18:25:33.098Z" },
+]
+
 [[package]]
 name = "ruff"
 version = "0.14.7"
@@ -2005,6 +2090,193 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
 ]
 
+[[package]]
+name = "snowballstemmer"
+version = "3.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/75/a7/9810d872919697c9d01295633f5d574fb416d47e535f258272ca1f01f447/snowballstemmer-3.0.1.tar.gz", hash = "sha256:6d5eeeec8e9f84d4d56b847692bacf79bc2c8e90c7f80ca4444ff8b6f2e52895", size = 105575, upload-time = "2025-05-09T16:34:51.843Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c8/78/3565d011c61f5a43488987ee32b6f3f656e7f107ac2782dd57bdd7d91d9a/snowballstemmer-3.0.1-py3-none-any.whl", hash = "sha256:6cd7b3897da8d6c9ffb968a6781fa6532dce9c3618a4b127d920dab764a19064", size = 103274, upload-time = "2025-05-09T16:34:50.371Z" },
+]
+
+[[package]]
+name = "sphinx"
+version = "8.1.3"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.11'",
+]
+dependencies = [
+    { name = "alabaster", marker = "python_full_version < '3.11'" },
+    { name = "babel", marker = "python_full_version < '3.11'" },
+    { name = "colorama", marker = "python_full_version < '3.11' and sys_platform == 'win32'" },
+    { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+    { name = "imagesize", marker = "python_full_version < '3.11'" },
+    { name = "jinja2", marker = "python_full_version < '3.11'" },
+    { name = "packaging", marker = "python_full_version < '3.11'" },
+    { name = "pygments", marker = "python_full_version < '3.11'" },
+    { name = "requests", marker = "python_full_version < '3.11'" },
+    { name = "snowballstemmer", marker = "python_full_version < '3.11'" },
+    { name = "sphinxcontrib-applehelp", marker = "python_full_version < '3.11'" },
+    { name = "sphinxcontrib-devhelp", marker = "python_full_version < '3.11'" },
+    { name = "sphinxcontrib-htmlhelp", marker = "python_full_version < '3.11'" },
+    { name = "sphinxcontrib-jsmath", marker = "python_full_version < '3.11'" },
+    { name = "sphinxcontrib-qthelp", marker = "python_full_version < '3.11'" },
+    { name = "sphinxcontrib-serializinghtml", marker = "python_full_version < '3.11'" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/be0b61178fe2cdcb67e2a92fc9ebb488e3c51c4f74a36a7824c0adf23425/sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927", size = 8184611, upload-time = "2024-10-13T20:27:13.93Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/26/60/1ddff83a56d33aaf6f10ec8ce84b4c007d9368b21008876fceda7e7381ef/sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2", size = 3487125, upload-time = "2024-10-13T20:27:10.448Z" },
+]
+
+[[package]]
+name = "sphinx"
+version = "9.0.4"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version == '3.11.*'",
+]
+dependencies = [
+    { name = "alabaster", marker = "python_full_version == '3.11.*'" },
+    { name = "babel", marker = "python_full_version == '3.11.*'" },
+    { name = "colorama", marker = "python_full_version == '3.11.*' and sys_platform == 'win32'" },
+    { name = "docutils", version = "0.22.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.11.*'" },
+    { name = "imagesize", marker = "python_full_version == '3.11.*'" },
+    { name = "jinja2", marker = "python_full_version == '3.11.*'" },
+    { name = "packaging", marker = "python_full_version == '3.11.*'" },
+    { name = "pygments", marker = "python_full_version == '3.11.*'" },
+    { name = "requests", marker = "python_full_version == '3.11.*'" },
+    { name = "roman-numerals", marker = "python_full_version == '3.11.*'" },
+    { name = "snowballstemmer", marker = "python_full_version == '3.11.*'" },
+    { name = "sphinxcontrib-applehelp", marker = "python_full_version == '3.11.*'" },
+    { name = "sphinxcontrib-devhelp", marker = "python_full_version == '3.11.*'" },
+    { name = "sphinxcontrib-htmlhelp", marker = "python_full_version == '3.11.*'" },
+    { name = "sphinxcontrib-jsmath", marker = "python_full_version == '3.11.*'" },
+    { name = "sphinxcontrib-qthelp", marker = "python_full_version == '3.11.*'" },
+    { name = "sphinxcontrib-serializinghtml", marker = "python_full_version == '3.11.*'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/42/50/a8c6ccc36d5eacdfd7913ddccd15a9cee03ecafc5ee2bc40e1f168d85022/sphinx-9.0.4.tar.gz", hash = "sha256:594ef59d042972abbc581d8baa577404abe4e6c3b04ef61bd7fc2acbd51f3fa3", size = 8710502, upload-time = "2025-12-04T07:45:27.343Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c6/3f/4bbd76424c393caead2e1eb89777f575dee5c8653e2d4b6afd7a564f5974/sphinx-9.0.4-py3-none-any.whl", hash = "sha256:5bebc595a5e943ea248b99c13814c1c5e10b3ece718976824ffa7959ff95fffb", size = 3917713, upload-time = "2025-12-04T07:45:24.944Z" },
+]
+
+[[package]]
+name = "sphinx"
+version = "9.1.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.12'",
+]
+dependencies = [
+    { name = "alabaster", marker = "python_full_version >= '3.12'" },
+    { name = "babel", marker = "python_full_version >= '3.12'" },
+    { name = "colorama", marker = "python_full_version >= '3.12' and sys_platform == 'win32'" },
+    { name = "docutils", version = "0.22.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" },
+    { name = "imagesize", marker = "python_full_version >= '3.12'" },
+    { name = "jinja2", marker = "python_full_version >= '3.12'" },
+    { name = "packaging", marker = "python_full_version >= '3.12'" },
+    { name = "pygments", marker = "python_full_version >= '3.12'" },
+    { name = "requests", marker = "python_full_version >= '3.12'" },
+    { name = "roman-numerals", marker = "python_full_version >= '3.12'" },
+    { name = "snowballstemmer", marker = "python_full_version >= '3.12'" },
+    { name = "sphinxcontrib-applehelp", marker = "python_full_version >= '3.12'" },
+    { name = "sphinxcontrib-devhelp", marker = "python_full_version >= '3.12'" },
+    { name = "sphinxcontrib-htmlhelp", marker = "python_full_version >= '3.12'" },
+    { name = "sphinxcontrib-jsmath", marker = "python_full_version >= '3.12'" },
+    { name = "sphinxcontrib-qthelp", marker = "python_full_version >= '3.12'" },
+    { name = "sphinxcontrib-serializinghtml", marker = "python_full_version >= '3.12'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cd/bd/f08eb0f4eed5c83f1ba2a3bd18f7745a2b1525fad70660a1c00224ec468a/sphinx-9.1.0.tar.gz", hash = "sha256:7741722357dd75f8190766926071fed3bdc211c74dd2d7d4df5404da95930ddb", size = 8718324, upload-time = "2025-12-31T15:09:27.646Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/73/f7/b1884cb3188ab181fc81fa00c266699dab600f927a964df02ec3d5d1916a/sphinx-9.1.0-py3-none-any.whl", hash = "sha256:c84fdd4e782504495fe4f2c0b3413d6c2bf388589bb352d439b2a3bb99991978", size = 3921742, upload-time = "2025-12-31T15:09:25.561Z" },
+]
+
+[[package]]
+name = "sphinx-rtd-theme"
+version = "3.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+    { name = "docutils", version = "0.22.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+    { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.11.*'" },
+    { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" },
+    { name = "sphinxcontrib-jquery" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/84/68/a1bfbf38c0f7bccc9b10bbf76b94606f64acb1552ae394f0b8285bfaea25/sphinx_rtd_theme-3.1.0.tar.gz", hash = "sha256:b44276f2c276e909239a4f6c955aa667aaafeb78597923b1c60babc76db78e4c", size = 7620915, upload-time = "2026-01-12T16:03:31.17Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/87/c7/b5c8015d823bfda1a346adb2c634a2101d50bb75d421eb6dcb31acd25ebc/sphinx_rtd_theme-3.1.0-py2.py3-none-any.whl", hash = "sha256:1785824ae8e6632060490f67cf3a72d404a85d2d9fc26bce3619944de5682b89", size = 7655617, upload-time = "2026-01-12T16:03:28.101Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-applehelp"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053, upload-time = "2024-07-29T01:09:00.465Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300, upload-time = "2024-07-29T01:08:58.99Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-devhelp"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967, upload-time = "2024-07-29T01:09:23.417Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530, upload-time = "2024-07-29T01:09:21.945Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-htmlhelp"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617, upload-time = "2024-07-29T01:09:37.889Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705, upload-time = "2024-07-29T01:09:36.407Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-jquery"
+version = "4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+    { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.11.*'" },
+    { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/de/f3/aa67467e051df70a6330fe7770894b3e4f09436dea6881ae0b4f3d87cad8/sphinxcontrib-jquery-4.1.tar.gz", hash = "sha256:1620739f04e36a2c779f1a131a2dfd49b2fd07351bf1968ced074365933abc7a", size = 122331, upload-time = "2023-03-14T15:01:01.944Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/76/85/749bd22d1a68db7291c89e2ebca53f4306c3f205853cf31e9de279034c3c/sphinxcontrib_jquery-4.1-py2.py3-none-any.whl", hash = "sha256:f936030d7d0147dd026a4f2b5a57343d233f1fc7b363f68b3d4f1cb0993878ae", size = 121104, upload-time = "2023-03-14T15:01:00.356Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-jsmath"
+version = "1.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b2/e8/9ed3830aeed71f17c026a07a5097edcf44b692850ef215b161b8ad875729/sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8", size = 5787, upload-time = "2019-01-21T16:10:16.347Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071, upload-time = "2019-01-21T16:10:14.333Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-qthelp"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165, upload-time = "2024-07-29T01:09:56.435Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743, upload-time = "2024-07-29T01:09:54.885Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-serializinghtml"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080, upload-time = "2024-07-29T01:10:09.332Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072, upload-time = "2024-07-29T01:10:08.203Z" },
+]
+
 [[package]]
 name = "threadpoolctl"
 version = "3.6.0"