nf-core · priyalT · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -11,7 +11,7 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+      - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
 
       - name: Set up Python 3.14
         uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6
@@ -28,7 +28,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out pipeline code
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
 
       - name: Install Nextflow
         uses: nf-core/setup-nextflow@v2
@@ -71,7 +71,7 @@ jobs:
 
       - name: Upload linting log file artifact
         if: ${{ always() }}
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5
         with:
           name: linting-logs
           path: |

diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.12.8
diff --git a/conf/modules.config b/conf/modules.config
@@ -48,6 +48,7 @@ process {
     }
 
 
+
     // ---------------------------- xeniumranger ---------------------------------------------------
 
     withName: XENIUMRANGER_RELABEL {
@@ -148,6 +149,60 @@ process {
         ]
     }
 
+    // ---------------------------- segtraq -----------------------------------------
+    withName: SEGTRAQ_BASELINE {
+        publishDir = [
+            path: { "${params.outdir}/${params.mode}/segtraq/baseline" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+    withName: SEGTRAQ_CLUSTERING_STABILITY {
+        publishDir = [
+            path: { "${params.outdir}/${params.mode}/segtraq/clustering_stability" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+    withName: SEGTRAQ_REGION_SIMILARITY {
+        publishDir = [
+            path: { "${params.outdir}/${params.mode}/segtraq/region_similarity"},
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+    withName: SEGTRAQ_VOLUME {
+        publishDir = [
+            path: { "${params.outdir}/${params.mode}/segtraq/volume"},
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+    withName: SEGTRAQ_SUPERVISED {
+        publishDir = [
+            path: { "${params.outdir}/${params.mode}/segtraq/supervised"},
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+    withName: SEGTRAQ_POINT_STATISTICS {
+        publishDir = [
+            path: { "${params.outdir}/${params.mode}/segtraq/point_statistics"},
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+    withName: SEGTRAQ_PLOTTING {
+        publishDir = [
+            path: { "${params.outdir}/${params.mode}/segtraq/plotting"},
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
+
+
+
     // ---------------------------- ficture ------------------------------------------
 
     withName: FICTURE_PREPROCESS {

diff --git a/modules/local/segtraq/Dockerfile b/modules/local/segtraq/Dockerfile
@@ -0,0 +1,40 @@
+# Dockerfile to create container with segtraq
+# SegTraQ: A Python toolkit for quantitative and visual quality control
+# of segmentation and transcript assignment in spatial omics data.
+# https://github.com/LazDaria/SegTraQ
+
+FROM python:3.12-slim
+
+LABEL authors="Priyal Tripathi" \
+    description="Docker image containing SegTraQ and its dependencies for segmentation QC"
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV NUMBA_CACHE_DIR='tmp'
+ENV MPLCONFIGDIR='tmp/matplotlib'
+ENV XDG_CACHE_HOME='tmp'
+
+# Install system dependencies required by geopandas, rasterio, rtree, igraph
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    libgeos-dev \
+    libgdal-dev \
+    libspatialindex-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set the working directory
+WORKDIR /app
+
+# Install segtraq with pinned version and its key dependencies
+RUN pip install --no-cache-dir \
+    segtraq==0.0.3 \
+    spatialdata>=0.7.2 \
+    spatialdata-io>=0.1.4 \
+    anndata>=0.12 \
+    scanpy \
+    squidpy>=1.6.2 \
+    geopandas \
+    igraph \
+    rtree \
+    rasterio \
+    ovrlpy>=1.1.0
diff --git a/modules/local/segtraq/baseline/main.nf b/modules/local/segtraq/baseline/main.nf
@@ -0,0 +1,44 @@
+process SEGTRAQ_BASELINE {
+    tag "${meta.id}"
+    label 'process_medium'
+
+    container "quay.io/priyal_tripathi/segtraq:0.0.3"
+
+    input:
+    tuple val(meta), path(spatialdata_zarr)
+
+    output:
+    tuple val(meta), path("segtraq_qc/${prefix}/"), emit: qc_results
+    path ("versions.yml")                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error("SEGTRAQ_BASELINE module does not support Conda. Please use Docker / Singularity / Podman instead.")
+    }
+
+    prefix = task.ext.prefix ?: "${meta.id}"
+
+    template 'baseline.py'
+
+    stub:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error("SEGTRAQ_BASELINE module does not support Conda. Please use Docker / Singularity / Podman instead.")
+    }
+
+    prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    mkdir -p "segtraq_qc/${prefix}"
+    touch "segtraq_qc/${prefix}/baseline_summary.json"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        segtraq: \$(pip show segtraq | grep Version | cut -d' ' -f2)
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/segtraq/baseline/meta.yml b/modules/local/segtraq/baseline/meta.yml
@@ -0,0 +1,52 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+name: segtraq_baseline
+description: Run SegTraQ baseline QC metrics on a SpatialData object to assess
+  segmentation quality including cell counts, unassigned transcripts, transcripts/genes
+  per cell, transcript density, morphological features, and more.
+keywords:
+  - segtraq
+  - quality control
+  - segmentation
+  - spatial transcriptomics
+  - baseline metrics
+tools:
+  - custom:
+      description: SegTraQ - A Python toolkit for quantitative and visual quality
+        control of segmentation and transcript assignment in spatial omics data.
+      homepage: https://github.com/LazDaria/SegTraQ
+      documentation: https://lazdaria.github.io/SegTraQ
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'sample' ]
+  - spatialdata_zarr:
+      type: directory
+      description: |
+        Path to a SpatialData .zarr directory containing the spatial omics
+        data with segmentation results.
+      pattern: "*.zarr"
+output:
+  - qc_results:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'sample' ]
+      - "segtraq_qc/${prefix}/":
+          type: directory
+          description: |
+            Directory containing SegTraQ baseline QC results including
+            baseline_summary.json with cell counts, transcript/gene stats,
+            and morphological features.
+          pattern: "segtraq_qc/*/"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@priyaltripathi"
+maintainers:
+  - "@priyaltripathi"
diff --git a/modules/local/segtraq/baseline/templates/baseline.py b/modules/local/segtraq/baseline/templates/baseline.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+
+"""Compute baseline statistics on spatialdata object for QC."""
+
+import os
+import segtraq
+import spatialdata as sd
+import json
+import subprocess
+
+def main():
+    print("[START] SegTraQ Baseline QC")
+    input_path = "${spatialdata_zarr}"
+    prefix = "${prefix}"
+    centroid_x_key = "${params.segtraq_centroid_x_key}"
+    centroid_y_key = "${params.segtraq_centroid_y_key}"
+    output_dir = f"segtraq_qc/{prefix}"
+    os.makedirs(output_dir, exist_ok=True)
+
+    #reading the spatial data
+    print(f"[INFO] Reading SpatialData object from: {input_path}")
+    sdata = sd.read_zarr(input_path)
+
+    #initialiizing segtraq object
+    cx_key = centroid_x_key if centroid_x_key not in ("null", "", "None") else None
+    cy_key = centroid_y_key if centroid_y_key not in ("null", "", "None") else None
+    print("[INFO] Initializing SegTraQ object")
+    st = segtraq.SegTraQ(
+    sdata,
+    images_key = None,
+    tables_area_key = None,
+    points_background_id =0,
+    tables_centroid_x_key=cx_key,
+    tables_centroid_y_key=cy_key,
+    )
+
+    print(f"[INFO] Computing baseline QC metrics")
+    summary = {}
+
+    #number of cells
+    n_cells = st.bl.num_cells()
+    summary["num_cells"] = int(n_cells)
+    print(f"  num_cells: {n_cells}")
+
+    #number of transcripts
+    n_transcripts = st.bl.num_transcripts()
+    summary["num_transcripts"] = int(n_transcripts)
+    print(f" num_transcripts: {n_transcripts}")
+
+    #number of genes
+    n_genes = st.bl.num_genes()
+    summary["num_genes"] = int(n_genes)
+    print(f" num_genes: {n_genes}")
+
+    #percentage of assigned transcripts
+    percentage_unassgn_transcripts = st.bl.perc_unassigned_transcripts()
+    summary["percent_unassigned_transcripts"] = int(percentage_unassgn_transcripts)
+    print(f" percent_unassigned_transcripts: {percentage_unassgn_transcripts}")
+
+    #unassigned transcripts per gene
+    unassgn_transcripts_per_gene = st.bl.perc_unassigned_transcripts_per_gene()
+    summary["unassigned_transcripts_per_gene"] = int(unassgn_transcripts_per_gene)
+    print(f" unassigned_transcripts_per_gene: {unassgn_transcripts_per_gene}")
+
+    #transcripts per cell
+    transcripts_per_cell = st.bl.transcripts_per_cell()
+    summary["transcripts_per_cell"] = int(transcripts_per_cell)
+    print(f" transcripts_per_cell: {transcripts_per_cell}")
+
+    #genes per celll
+    genes_per_cell = st.bl.genes_per_cell()
+    summary["genes_per_cell"] = int(genes_per_cell)
+    print(f" genes_per_cell: {genes_per_cell}")
+
+    #transcript density
+    transcript_density = st.bl.transcript_density()
+    summary["transcript_density"] = int(transcript_density)
+    print(f" transcript_density: {transcript_density}")
+
+    #mean transcripts per gene cell
+    mean_transcripts_per_gene_per_cell = st.bl.mean_transcripts_per_gene_per_cell()
+    summary["mean_transcripts_per_gene_per_cell"] = int(mean_transcripts_per_gene_per_cell)
+    print(f" mean_transcripts_per_gene_per_cell: {mean_transcripts_per_gene_per_cell}")
+
+    #morphological features
+    morpho_features = st.bl.morphological_features()
+    summary["morpho_features"] = int(morpho_features)
+    print(f" morpho_features: {morpho_features}")
+
+    #summary
+    with open(f"{output_dir}/baseline_summary.json", "w") as f:
+        json.dump(summary, f, indent=2)
+    print(f"[INFO] Summary written to {output_dir}/baseline_summary.json")
+
+    version = subprocess.check_output(
+        ["pip", "show", "segtraq"], text=True
+    )
+    segtraq_version = [l for l in version.splitlines() if l.startswith("Version:")][0].split(": ")[1]
+
+    with open("versions.yml", "w") as f:
+        f.write('"${task.process}":\n')
+        f.write(f'  segtraq: "{segtraq_version}"\n')
+        f.write(f'  spatialdata: "{sd.__version__}"\n')
+    print("[FINISH] SegTraQ Baseline QC")
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/local/segtraq/clustering_stability/main.nf b/modules/local/segtraq/clustering_stability/main.nf
@@ -0,0 +1,44 @@
+process SEGTRAQ_CLUSTERING_STABILITY {
+    tag "${meta.id}"
+    label 'process_medium'
+
+    container "quay.io/priyal_tripathi/segtraq:0.0.3"
+
+    input:
+    tuple val(meta), path(spatialdata_zarr)
+
+    output:
+    tuple val(meta), path("segtraq_qc/${prefix}/"), emit: qc_results
+    path("versions.yml")                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error("SEGTRAQ_CLUSTERING_STABILITY module does not support Conda. Please use Docker / Singularity / Podman instead.")
+    }
+
+    prefix = task.ext.prefix ?: "${meta.id}"
+
+    template('clustering_stability.py')
+
+    stub:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error("SEGTRAQ_CLUSTERING_STABILITY module does not support Conda. Please use Docker / Singularity / Podman instead.")
+    }
+
+    prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    mkdir -p "segtraq_qc/${prefix}"
+    touch "segtraq_qc/${prefix}/clustering_stability_summary.json"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        segtraq: \$(pip show segtraq | grep Version | cut -d' ' -f2)
+    END_VERSIONS
+    """
+}