diff --git a/.Rbuildignore b/.Rbuildignore index 7d68da4..6256641 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,3 +5,5 @@ ^Dockerfile$ ^Dockerfiles/.*$ ^Conda_Recipe/.*$ +^doc$ +^Meta$ diff --git a/.Rhistory b/.Rhistory index 07a9d36..9059bf2 100644 --- a/.Rhistory +++ b/.Rhistory @@ -67,8 +67,8 @@ complexity.limits = c(NA,NA), mad.complexity.limits = c(5,NA), topNgenes.limits = c(NA,NA), mad.topNgenes.limits = c(5,5), -n.topgnes=20, -do.doublets.fitler=T +n.topgenes=20, +do.doublets.filter=T ) ggsave(SO_filtered$plots$PostFilterCombined, filename = "./images/QC1.png", width = 10, height = 10) ggsave(SO_filtered$plots$ViolinPlotCombine, filename = "./images/QC2.png", width = 10, height = 10) @@ -315,12 +315,12 @@ dev.off() modScore MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","M1","M2" ) -ms_threshold=c("Macrophages .40","M1 .25","M2 .14"), +use.columns = c("Macrophages","M1","M2" ) +ms.threshold=c("Macrophages .40","M1 .25","M2 .14"), MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","M1","M2" ), -ms_threshold=c("Macrophages .40","M1 .25","M2 .14"), +use.columns = c("Macrophages","M1","M2" ), +ms.threshold=c("Macrophages .40","M1 .25","M2 .14"), use_assay = "SCT", general.class=c("Macrophages"), lvl.vec = c('Macrophages-M1','Macrophages-M2'), @@ -333,8 +333,8 @@ step.size = 0.1 modScore MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","M1","M2" ), -ms_threshold=c("Macrophages .40","M1 .25","M2 .14"), +use.columns = c("Macrophages","M1","M2" ), +ms.threshold=c("Macrophages .40","M1 .25","M2 .14"), use_assay = "SCT", general.class=c("Macrophages"), multi.lvl = FALSE, @@ -347,8 +347,8 @@ step.size = 0.1 modScore MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","M1","M2" ), -ms_threshold=c("Macrophages .40","M1 .25","M2 .14"), +use.columns = c("Macrophages","M1","M2" ), +ms.threshold=c("Macrophages .40","M1 .25","M2 .14"), general.class=c("Macrophages"), multi.lvl = FALSE, reduction = "umap", @@ -359,8 +359,8 @@ step.size = 0.1 ) MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","M1","M2" ), -ms_threshold=c("Macrophages .40","M1 .25","M2 .14"), +use.columns = c("Macrophages","M1","M2" ), +ms.threshold=c("Macrophages .40","M1 .25","M2 .14"), multi.lvl = FALSE, reduction = "umap", nbins = 10, @@ -370,8 +370,8 @@ step.size = 0.1 ) MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","M1","M2" ), -ms_threshold=c("Macrophages .40","M1 .25","M2 .14"), +use.columns = c("Macrophages","M1","M2" ), +ms.threshold=c("Macrophages .40","M1 .25","M2 .14"), general.class=c("Macrophages","M1","M2"), multi.lvl = FALSE, reduction = "umap", @@ -383,8 +383,8 @@ step.size = 0.1 Marker_Table MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","Monocytes","CD8_T" ), -ms_threshold=c("Macrophages .40","Monocytes .25","CD8_T .14"), +use.columns = c("Macrophages","Monocytes","CD8_T" ), +ms.threshold=c("Macrophages .40","Monocytes .25","CD8_T .14"), general.class=c("Macrophages","Monocytes","CD8_T"), multi.lvl = FALSE, reduction = "umap", @@ -395,8 +395,8 @@ step.size = 0.1 ) MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","Neutrophils","CD8_T" ), -ms_threshold=c("Macrophages .40","Neutrophils .25","CD8_T .14"), +use.columns = c("Macrophages","Neutrophils","CD8_T" ), +ms.threshold=c("Macrophages .40","Neutrophils .25","CD8_T .14"), general.class=c("Macrophages","Neutrophils","CD8_T"), multi.lvl = FALSE, reduction = "umap", @@ -407,8 +407,8 @@ step.size = 0.1 ) MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Neutrophils","Macrophages","CD8_T" ), -ms_threshold=c("Neutrophils .25","Macrophages .40","CD8_T .14"), +use.columns = c("Neutrophils","Macrophages","CD8_T" ), +ms.threshold=c("Neutrophils .25","Macrophages .40","CD8_T .14"), general.class=c("Neutrophils","Macrophages","CD8_T"), multi.lvl = FALSE, reduction = "umap", diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000..875ae87 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,288 @@ +# Contributing to SCWorkflow + +## Proposing changes with issues + +If you want to make a change, it's a good idea to first +[open an issue](https://code-review.tidyverse.org/issues/) +and make sure someone from the team agrees that it’s needed. + +If you've decided to work on an issue, +[assign yourself to the issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/assigning-issues-and-pull-requests-to-other-github-users#assigning-an-individual-issue-or-pull-request) +so others will know you're working on it. + +## Pull request process + +We use [GitHub Flow](https://docs.github.com/en/get-started/using-github/github-flow) +as our collaboration process. +Follow the steps below for detailed instructions on contributing changes to +SCWorkflow. + +![GitHub Flow diagram](https://raw.githubusercontent.com/CCBR/CCBR_NextflowTemplate/main/.github/img/GitHub-Flow_bg-white.png) + + +### Clone the repo + +If you are a member of [CCBR](https://github.com/CCBR), +you can clone this repository to your computer or development environment. +Otherwise, you will first need to +[fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) +the repo and clone your fork. You only need to do this step once. + +```sh +git clone https://github.com/CCBR/SCWorkflow +``` + +> Cloning into 'SCWorkflow'...
+> remote: Enumerating objects: 1136, done.
+> remote: Counting objects: 100% (463/463), done.
+> remote: Compressing objects: 100% (357/357), done.
+> remote: Total 1136 (delta 149), reused 332 (delta 103), pack-reused 673
+> Receiving objects: 100% (1136/1136), 11.01 MiB | 9.76 MiB/s, done.
+> Resolving deltas: 100% (530/530), done.
+ +```sh +cd SCWorkflow +``` + +### If this is your first time cloning the repo, install dependencies + +- In an R console, install the R development dependencies with + `devtools::install_dev_deps()`, and then make sure the package passes R CMD + check by running `devtools::check()`. If R CMD check doesn't pass cleanly, + it's a good idea to ask for help before continuing. + +- Install [`pre-commit`](https://pre-commit.com/#install) if you don't already + have it. Then from the repo's root directory, run + + ```sh + pre-commit install + ``` + + This will install the repo's pre-commit hooks. + You'll only need to do this step the first time you clone the repo. + +### Create a branch + + Create a Git branch for your pull request (PR). Give the branch a descriptive + name for the changes you will make, such as `iss-10` if it is for a specific + issue. + + ```sh + # create a new branch and switch to it + git branch iss-10 + git switch iss-10 + ``` + + > Switched to a new branch 'iss-10' + +### Make your changes + +Edit the code, write unit tests, and update the documentation as needed. + +#### style + +New code should follow the [tidyverse style guide](https://style.tidyverse.org). +You can use the [styler](https://CRAN.R-project.org/package=styler) package to +apply these styles, but please don't restyle code that has nothing to do with +your PR. + +A brief overview of conventions according to the tidyverse style guide: + +- most object names (variables and functions) should be in [snake_case](https://style.tidyverse.org/syntax.html#sec-objectnames) +- function names should use [verbs](https://style.tidyverse.org/functions.html#naming) where possible +- use `<-` for assignment +- use [pipes](https://style.tidyverse.org/pipes.html) to chain operations on a single object + +Please see the [tidyverse style guide](https://style.tidyverse.org) for more details. + +#### test + +Most changes to the code will also need unit tests to demonstrate that the +changes work as intended. +Use [`testthat`](https://testthat.r-lib.org/) to create your unit tests and test +the code. +Test files are organized as described in +. +Take a look at the existing code in this package for examples. + +#### document + +If you have written a new function or changed the API of an existing function, +you will need to update the function's documentation using +[roxygen2](https://cran.r-project.org/package=roxygen2) with +[Markdown syntax](https://roxygen2.r-lib.org/articles/rd-formatting.html). +See instructions on writing roxygen2 comments here: +. +If the function is used in a vignette, you may also need to update the vignette. + +#### check + +After making your changes, run `devtools::check()` from an R console to make +sure the package still passes R CMD check. + +### Commit and push your changes + +If you're not sure how often you should commit or what your commits should +consist of, we recommend following the "atomic commits" principle where each +commit contains one new feature, fix, or task. +Learn more about atomic commits here: + + +First, add the files that you changed to the staging area: + +```sh +git add path/to/changed/files/ +``` + +Then make the commit. +Your commit message should follow the +[Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) +specification. +Briefly, each commit should start with one of the approved types such as +`feat`, `fix`, `docs`, etc. followed by a description of the commit. +Take a look at the [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0/#summary) +for more detailed information about how to write commit messages. + + +```sh +git commit -m 'feat: create function for awesome feature' +``` + +pre-commit will enforce that your commit message and the code changes are +styled correctly and will attempt to make corrections if needed. + +> Check for added large files..............................................Passed
+> Fix End of Files.........................................................Passed
+> Trim Trailing Whitespace.................................................Failed
+> - hook id: trailing-whitespace
+> - exit code: 1
+> - files were modified by this hook
+>
+> Fixing path/to/changed/files/file.txt
+>
+> codespell................................................................Passed
+> style-files..........................................(no files to check)Skipped
+> readme-rmd-rendered..................................(no files to check)Skipped
+> use-tidy-description.................................(no files to check)Skipped
+ +In the example above, one of the hooks modified a file in the proposed commit, +so the pre-commit check failed. You can run `git diff` to see the changes that +pre-commit made and `git status` to see which files were modified. To proceed +with the commit, re-add the modified file(s) and re-run the commit command: + +```sh +git add path/to/changed/files/file.txt +git commit -m 'feat: create function for awesome feature' +``` + +This time, all the hooks either passed or were skipped +(e.g. hooks that only run on R code will not run if no R files were +committed). +When the pre-commit check is successful, the usual commit success message +will appear after the pre-commit messages showing that the commit was created. + +> Check for added large files..............................................Passed
+> Fix End of Files.........................................................Passed
+> Trim Trailing Whitespace.................................................Passed
+> codespell................................................................Passed
+> style-files..........................................(no files to check)Skipped
+> readme-rmd-rendered..................................(no files to check)Skipped
+> use-tidy-description.................................(no files to check)Skipped
+> Conventional Commit......................................................Passed
+> [iss-10 9ff256e] feat: create function for awesome feature
+> 1 file changed, 22 insertions(+), 3 deletions(-)
+ +Finally, push your changes to GitHub: + +```sh +git push +``` + +If this is the first time you are pushing this branch, you may have to +explicitly set the upstream branch: + +```sh +git push --set-upstream origin iss-10 +``` + +> Enumerating objects: 7, done.
+> Counting objects: 100% (7/7), done.
+> Delta compression using up to 10 threads
+> Compressing objects: 100% (4/4), done.
+> Writing objects: 100% (4/4), 648 bytes | 648.00 KiB/s, done.
+> Total 4 (delta 3), reused 0 (delta 0), pack-reused 0
+> remote: Resolving deltas: 100% (3/3), completed with 3 local objects.
+> remote:
+> remote: Create a pull request for 'iss-10' on GitHub by visiting:
+> remote: https://github.com/CCBR/SCWorkflow/pull/new/iss-10
+> remote:
+> To https://github.com/CCBR/SCWorkflow
+>
+> [new branch] iss-10 -> iss-10
+> branch 'iss-10' set up to track 'origin/iss-10'.
+ +We recommend pushing your commits often so they will be backed up on GitHub. +You can view the files in your branch on GitHub at +`https://github.com/CCBR/SCWorkflow/tree/` +(replace `` with the actual name of your branch). + +### Create the PR + +Once your branch is ready, create a PR on GitHub: + + +Select the branch you just pushed: + +![Create a new PR from your branch](https://raw.githubusercontent.com/CCBR/CCBR_NextflowTemplate/main/.github/img/new-PR.png) + +Edit the PR title and description. +The title should briefly describe the change. +Follow the comments in the template to fill out the body of the PR, and +you can delete the comments (everything between ``) as you go. +When you're ready, click 'Create pull request' to open it. + +![Open the PR after editing the title and description](https://raw.githubusercontent.com/CCBR/CCBR_NextflowTemplate/main/.github/img/create-PR.png) + +Optionally, you can mark the PR as a draft if you're not yet ready for it to +be reviewed, then change it later when you're ready. + +### Wait for a maintainer to review your PR + +We will do our best to follow the tidyverse code review principles: +. +The reviewer may suggest that you make changes before accepting your PR in +order to improve the code quality or style. +If that's the case, continue to make changes in your branch and push them to +GitHub, and they will appear in the PR. + +Once the PR is approved, the maintainer will merge it and the issue(s) the PR +links will close automatically. +Congratulations and thank you for your contribution! + +### After your PR has been merged + +After your PR has been merged, update your local clone of the repo by +switching to the main branch and pulling the latest changes: + +```sh +git checkout main +git pull +``` + +It's a good idea to run `git pull` before creating a new branch so it will +start from the most recent commits in main. + +## Helpful links for more information + +- This contributing guide was adapted from the [tidyverse contributing guide](https://github.com/tidyverse/tidyverse/blob/main/.github/CONTRIBUTING.md) +- [GitHub Flow](https://docs.github.com/en/get-started/using-github/github-flow) +- [tidyverse style guide](https://style.tidyverse.org) +- [tidyverse code review principles](https://code-review.tidyverse.org) +- [reproducible examples](https://www.tidyverse.org/help/#reprex) +- [R packages book](https://r-pkgs.org/) +- packages: + - [usethis](https://usethis.r-lib.org/) + - [devtools](https://devtools.r-lib.org/) + - [testthat](https://testthat.r-lib.org/) + - [styler](https://styler.r-lib.org/) + - [roxygen2](https://roxygen2.r-lib.org) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..95c98d1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,45 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + Before you submit this issue, please check the documentation: + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: reprex + attributes: + label: Code and output + description: Please include a minimal reproducible example (AKA a reprex). If you've never heard of a [reprex](http://reprex.tidyverse.org/) before, start by reading . + render: console + placeholder: | + library(SCWorkflow) + ... insert_your_code_here() ... + + Paste some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop any relevant files here if applicable. Create a `.zip` archive if the extension is not allowed. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Version of R + * Version of CCBR/SCWorkflow + * OS _(eg. Ubuntu Linux, macOS)_ + * Hardware _(eg. HPC, Desktop)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..84f8757 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,4 @@ +contact_links: + - name: Discussions + url: https://github.com/CCBR/SCWorkflow/discussions + about: Please ask and answer questions here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..73a08f5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the package +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..bc679fa --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,24 @@ +## Changes + + + +## Issues + + + +## PR Checklist + +(~Strikethrough~ any points that are not applicable.) + +- [ ] This comment contains a description of changes with justifications, with any relevant issues linked. +- [ ] Write unit tests for any new features, bug fixes, or other code changes. +- [ ] Update the docs if there are any API changes (roxygen2 comments, vignettes, readme, etc.). +- [ ] Update `NEWS.md` with a short description of any user-facing changes and reference the PR number. Follow the style described in +- [ ] Run `devtools::check()` locally and fix all notes, warnings, and errors. diff --git a/.github/package-versions.txt b/.github/package-versions.txt new file mode 100644 index 0000000..2facfd9 --- /dev/null +++ b/.github/package-versions.txt @@ -0,0 +1 @@ +any::Seurat@4.1.1, any::Matrix@1.5.1 \ No newline at end of file diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..d53b9fb --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,124 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [ main, master, dev, DEV ] + pull_request: + branches: [ main, master, dev, DEV ] + workflow_dispatch: + +name: R-CMD-check + +permissions: + contents: read + pull-requests: read + +jobs: + R-CMD-check: + strategy: + fail-fast: false + matrix: + config: + - { os: ubuntu-latest, r: "4.1.3" } + #- { os: ubuntu-latest, r: 'oldrel-1' } + #- { os: macos-latest, r: 'release' } + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + steps: + - uses: actions/checkout@v4 + + - uses: CCBR/actions/install-r-pak@main + with: + versions-file: .github/package-versions.txt + extra-packages: local::. + needs: dev + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: CCBR/actions/install-r-pak@main + with: + r-version: 4.1.3 + versions-file: .github/package-versions.txt + needs: dev + + - name: Good Practice checks + shell: Rscript {0} + run: | + g <- goodpractice::gp() + g + n_failed <- length(goodpractice::failed_checks(g)) + if (n_failed > 0) { + warning(paste(n_failed, "failed checks")) + } + - name: Lint + shell: Rscript {0} + run: lintr::lint_package() + env: + LINTR_ERROR_ON_LINT: false + + test-coverage: + runs-on: ubuntu-latest + container: + image: nciccbr/scworkflow:v1.0.2_79e5d37 + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + needs: dev + - name: Test coverage + run: | + cov <- covr::package_coverage( + quiet = FALSE, + clean = FALSE, + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") + ) + covr::to_cobertura(cov) + shell: Rscript {0} + - uses: codecov/codecov-action@v4 + with: + fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} + file: ./cobertura.xml + plugin: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package + + check: + # make sure all check jobs pass. https://github.com/orgs/community/discussions/4324#discussioncomment-3477871 + runs-on: ubuntu-latest + container: + image: nciccbr/scworkflow:v1.0.2_79e5d37 + needs: [ R-CMD-check, lint, test-coverage ] + if: always() + steps: + - name: Successful build + if: ${{ !(contains(needs.*.result, 'failure')) }} + run: exit 0 + - name: Failing build + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 9e96b07..a6822bc 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -56,4 +56,4 @@ jobs: BUILD_DATE=${{ steps.vars.outputs.DATE }} BUILD_TAG=${{ steps.vars.outputs.VERSION_TAG }} REPONAME=${{ env.IMAGE_NAME }} - R_VERSION=4.3.2 + R_VERSION=4.1.3 diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index f9745ba..33b9b71 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -2,11 +2,11 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master, GalaxyCLI] + branches: [ main, master ] pull_request: - branches: [main, master] + branches: [ main, master ] release: - types: [published] + types: [ published ] workflow_dispatch: name: pkgdown @@ -23,20 +23,16 @@ jobs: group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + container: + image: nciccbr/scworkflow:v1.0.2_79e5d37 steps: - uses: actions/checkout@v3 - - - uses: r-lib/actions/setup-pandoc@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - use-public-rspm: true - r-version: 4.3 - - - uses: r-lib/actions/setup-r-dependencies@v2 + - uses: CCBR/actions/install-r-pak@main with: - extra-packages: any::pkgdown, local::. - needs: website + r-version: 4.1.3 + versions-file: .github/package-versions.txt + extra-packages: local::. + needs: dev - name: Build site run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 0000000..4ac1c43 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,58 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master, dev, DEV] + pull_request: + branches: [main, master, dev, DEV] + +name: test-coverage + +permissions: read-all + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v4 + + - uses: CCBR/actions/install-r-pak@main + with: + r-version: 4.1.3 + versions-file: .github/package-versions.txt + needs: dev + + - name: Test coverage + run: | + cov <- covr::package_coverage( + quiet = FALSE, + clean = FALSE, + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") + ) + covr::to_cobertura(cov) + shell: Rscript {0} + + - uses: codecov/codecov-action@v4 + with: + fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} + file: ./cobertura.xml + plugin: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/.github/workflows/user-projects.yml b/.github/workflows/user-projects.yml new file mode 100644 index 0000000..c189ef6 --- /dev/null +++ b/.github/workflows/user-projects.yml @@ -0,0 +1,23 @@ +name: user-projects + +on: + issues: + types: + - assigned + pull_request: + types: + - assigned + +permissions: + issues: write + pull-requests: write + +jobs: + add-to-project: + runs-on: ubuntu-latest + steps: + - uses: CCBR/actions/user-projects@main + with: + app-id: ${{ vars.CCBR_BOT_APP_ID }} + app-private-key: ${{ secrets.CCBR_BOT_PRIVATE_KEY }} + token-owner: "CCBR" diff --git a/.gitignore b/.gitignore index dabea9e..f2fa164 100644 --- a/.gitignore +++ b/.gitignore @@ -9,9 +9,10 @@ tests/testthat/fixtures/ tests/testthat/output/ .Rproj.user - inst/doc -#inst/extdata/* -docs -*.Rds -*.rds +/doc/ +/Meta/ + +.github.zip +decision_log.md + diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fe1d48..a3438a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,15 @@ # CHANGELOG +## v1.0.3 (in development) +### Feature + +* feat: Add compareCellPopulations() function for comparing cell population distributions across experimental groups + - Visualizes cell population frequencies or absolute counts across multiple groups + - Generates alluvial flow bar plots and faceted box plots + - Supports custom group ordering and color palettes + - Added ggalluvial dependency for flow visualizations + - Generated from JSON template using json2r.prompt.md instructions ## v1.0.2 (2024-02-01) diff --git a/DESCRIPTION b/DESCRIPTION index 0a26fcb..ed084e5 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,94 +1,105 @@ Package: SCWorkflow Title: SCWorkflow from NIDAP Version: 1.0.2 -Authors@R: c(person("Maggie", "Cam", email = "maggie.cam@nih.gov", role = "aut", comment = c(ORCID = "0000-0001-8190-9766")), - person("Thomas", "Meyer", email = "thomas.meyer@nih.gov", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7185-5597")), - person("Jing", "Bian", email = "bianjh@nih.gov", role = "aut", comment = c(ORCID = "0000-0001-7109-716X")), - person("Alexandra", "Michalowski", email = "michaloa@mail.nih.gov", role = "aut", comment = c(ORCID = "0000-0001-9259-6101")), - person("Alexei", "Lobanov", email = "alexei.lobanov@nih.gov", role = "aut", comment = c(ORCID = "0000-0002-9883-4374")), - person("Philip", "Homan", email = "philip.homan@nih.gov", role = "aut", comment = c(ORCID = "0000-0002-3389-4931")), - person("Rui", "He", email = "rui.he@nih.gov", role = "aut")) -Description: A set of functions for analyzing single-cell RNA-seq data using the - Seurat workflow. The user provides H5 files containing the results of the - upstream processing through CellRanger, and the package functions allow for - the QC, filtering, normalization, annotation, differential gene expression, - and further visualizations and analysis based on user input. This package can - be run both in a docker container and in user-friendly web-based interactive - notebooks (NIDAP, Palantir Foundry). -License: MIT + file LICENSE -Encoding: UTF-8 -Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.3 -Suggests: - testthat (>= 3.0.0), - knitr, - rmarkdown, - shiny -VignetteBuilder: knitr +Authors@R: c( + person("Maggie", "Cam", , "maggie.cam@nih.gov", role = "aut", + comment = c(ORCID = "0000-0001-8190-9766")), + person("Thomas", "Meyer", , "thomas.meyer@nih.gov", role = c("aut", "cre"), + comment = c(ORCID = "0000-0002-7185-5597")), + person("Jing", "Bian", , "bianjh@nih.gov", role = "aut", + comment = c(ORCID = "0000-0001-7109-716X")), + person("Alexandra", "Michalowski", , "michaloa@mail.nih.gov", role = "aut", + comment = c(ORCID = "0000-0001-9259-6101")), + person("Alexei", "Lobanov", , "alexei.lobanov@nih.gov", role = "aut", + comment = c(ORCID = "0000-0002-9883-4374")), + person("Philip", "Homan", , "philip.homan@nih.gov", role = "aut", + comment = c(ORCID = "0000-0002-3389-4931")), + person("Rui", "He", , "rui.he@nih.gov", role = "aut") + ) +Description: A set of functions for analyzing single-cell RNA-seq data + using the Seurat workflow. The user provides H5 files containing the + results of the upstream processing through CellRanger, and the package + functions allow for the QC, filtering, normalization, annotation, + differential gene expression, and further visualizations and analysis + based on user input. This package can be run both in a docker + container and in user-friendly web-based interactive notebooks (NIDAP, + Palantir Foundry). +License: MIT Depends: R (>= 4.0) Imports: anndata (>= 0.7.5.2), + BiocManager, + BiocParallel, callr (>= 3.7.1), + celldex, + colorspace, + ComplexHeatmap (>= 2.10.0), cowplot (>= 1.1.1), data.table (>= 1.14.2), + dendextend, + dendsort, + digest (>= 0.6.29), dplyr (>= 1.0.9), edgeR (>= 3.36.0), - future.apply (>= 1.9.0), future (>= 1.27.0), + future.apply (>= 1.9.0), gargle (>= 1.2.0), + gdata, + ggalluvial, + ggExtra, ggplot2 (>= 3.3.6), ggpubr (>= 0.4.0), + ggrepel, globals (>= 0.16.1), + gridBase (>= 0.4-7), + gridExtra (>= 2.3), + gtable (>= 0.3.1), harmony (>= 0.1.1), hdf5r (>= 1.3.5), + htmlwidgets, httpuv (>= 1.6.5), + httr, + jsonlite, leiden (>= 0.4.2), limma (>= 3.50.3), magrittr (>= 2.0.3), markdown (>= 1.1), + MAST (>= 1.20.0), methods (>= 4.1.3), - plotly (>= 4.10.0), + pheatmap, + plotly (>= 4.10.0), + plyr, + png, progressr (>= 0.10.1), - pryr (>= 0.1.5), purrr (>= 0.3.4), quantmod (>= 0.4.20), + RColorBrewer (>= 1.1-3), reshape2 (>= 1.4.4), reticulate (>= 1.25), rlang (>= 1.0.6), + scales, + scDblFinder, + Seurat (>= 4.1.1), + SingleR (>= 1.8.1), statmod (>= 1.4.37), stringr (>= 1.4.1), svglite (>= 2.1.0), tibble (>= 3.1.8), + tidyr, tidyverse (>= 1.3.2), viridisLite (>= 0.4.0), xfun (>= 0.32), - zip (>= 2.2.0), - ComplexHeatmap (>= 2.10.0), - MAST (>= 1.20.0), - SingleR (>= 1.8.1), - BiocManager, - gridBase (>= 0.4-7), - gridExtra (>= 2.3), - RColorBrewer (>= 1.1-3), - Seurat (>= 4.1.1), - gtable (>= 0.3.1), - digest (>= 0.6.29), - png, - ggExtra, - httr, - jsonlite, - plyr, - colorspace, - dendextend, - dendsort, - pheatmap, - scales, - celldex, - gdata, - ggrepel, - tidyr, - htmlwidgets, - scDblFinder - , BiocParallel + zip (>= 2.2.0) +Suggests: + knitr, + rmarkdown, + roxygen2, + testthat (>= 3.0.0), + usethis +Config/Needs/dev: cffr, covr, goodpractice, here, lintr, pkgdown, + rcmdcheck Config/testthat/edition: 3 +Encoding: UTF-8 +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.2.3 diff --git a/Dockerfile b/Dockerfile index b91c92f..9a4b171 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ ENV BUILD_TAG=${BUILD_TAG} ARG REPONAME="000000" ENV REPONAME=${REPONAME} -ARG R_VERSION=4.3.2 +ARG R_VERSION=4.1.3 ENV R_VERSION=${R_VERSION} SHELL ["/bin/bash", "-lc"] @@ -26,24 +26,87 @@ RUN conda config --add channels conda-forge \ && conda config --set channel_priority strict # install conda packages -RUN mamba install -y -c conda-forge \ +# Note: Most version pins removed to allow conda to resolve compatible versions with R 4.1.3 +# Only R version is pinned per requirement +RUN mamba install -y \ r-base=${R_VERSION} \ - r-devtools \ + r-anndata \ + r-biocmanager \ + r-callr \ + bioconductor-celldex \ + r-colorspace \ + bioconductor-complexheatmap \ + r-cowplot \ + r-data.table \ + r-dendextend \ + r-dendsort \ + r-digest \ + r-dplyr \ + bioconductor-edger \ + r-future \ + r-future.apply \ + r-gargle \ + r-gdata \ + r-ggextra \ r-ggplot2 \ - r-ggrepel r-viridis r-upsetr r-patchwork r-plotly \ - r-matrix r-mgcv r-survival \ - bioconductor-genomicranges \ - bioconductor-summarizedexperiment \ - bioconductor-delayedarray \ - bioconductor-s4arrays \ - bioconductor-annotationdbi \ - bioconductor-annotate \ - bioconductor-keggrest \ + r-ggpubr \ + r-ggrepel \ + r-globals \ + r-gridbase \ + r-gridextra \ + r-gtable \ + r-harmony \ + r-hdf5r \ + r-htmlwidgets \ + r-httpuv \ + r-httr \ + r-jsonlite \ + r-leiden \ + bioconductor-limma \ + r-magrittr \ + r-markdown \ + bioconductor-mast \ + r-pheatmap \ + r-plotly \ + r-plyr \ + r-png \ + r-progressr \ + r-purrr \ + r-quantmod \ + r-rcolorbrewer \ + r-reshape2 \ + r-reticulate \ + r-rlang \ + r-scales \ + bioconductor-scdblfinder \ + r-seurat=4.1.1 \ + bioconductor-singler \ + r-statmod \ + r-stringr \ + r-svglite \ + r-tibble \ + r-tidyr \ + r-tidyverse \ + r-viridislite \ + r-xfun \ + r-zip \ + r-knitr \ + r-rmarkdown \ + r-roxygen2 \ + r-testthat \ + r-usethis \ + r-cffr \ + r-covr \ + r-goodpractice \ + r-here \ + r-lintr \ + r-pkgdown \ + r-rcmdcheck \ && conda clean -afy # install R package COPY . /opt2/SCWorkflow -RUN R -e "devtools::install_local('/opt2/SCWorkflow', dependencies = TRUE, repos='http://cran.rstudio.com')" +RUN R -e "devtools::install_local('/opt2/SCWorkflow', dependencies = TRUE, upgrade = 'never', repos='http://cran.rstudio.com')" # add scworkflow exec to the path # RUN chmod -R +x /opt2/conda/lib/R/library/SCWorkflow/exec @@ -51,14 +114,51 @@ RUN R -e "devtools::install_local('/opt2/SCWorkflow', dependencies = TRUE, repos # RUN scworkflow --help # copy example script & json to data -COPY ./inst/extdata/example_script.sh /data2/ -COPY ./inst/extdata/json_args/ /data2/json_args/ +# COPY ./inst/extdata/example_script.sh /data2/ +# COPY ./inst/extdata/json_args/ /data2/json_args/ # Save Dockerfile in the docker COPY Dockerfile /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} RUN chmod a+r /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} +# Verify all dependencies from DESCRIPTION are installed +RUN cat > /tmp/check_description_deps.R << 'EOF' +# Parse DESCRIPTION file and check if all dependencies are installed +desc_file <- "/opt2/SCWorkflow/DESCRIPTION" +if (!file.exists(desc_file)) { + stop("DESCRIPTION file not found at ", desc_file) +} +# Read and parse DESCRIPTION +desc <- read.dcf(desc_file) +# Extract dependencies +extract_packages <- function(str) { + if (is.na(str) || str == "") return(character(0)) + # Split by comma and clean up whitespace and version specs + pkgs <- strsplit(str, ",")[[1]] + pkgs <- trimws(pkgs) + pkgs <- gsub("\\s*\\(.*\\)$", "", pkgs) # Remove version specs + pkgs <- pkgs[pkgs != ""] + pkgs +} +deps <- unique(c( + extract_packages(desc[1, "Depends"]), + extract_packages(desc[1, "Imports"]), + extract_packages(desc[1, "Suggests"]), + extract_packages(desc[1, "Config/Needs/dev"]) +)) +# Remove base R +deps <- deps[!grepl("^R$", deps)] +# Check if each dependency is installed +missing <- deps[!vapply(deps, requireNamespace, quietly = TRUE, FUN.VALUE = logical(1))] +if (length(missing) > 0) { + stop("The following dependencies are missing: ", paste(missing, collapse = ", ")) +} else { + message("All dependencies are installed.") +} +EOF +RUN R --vanilla --slave --file=/tmp/check_description_deps.R + # cleanup WORKDIR /data2 RUN apt-get clean && apt-get purge \ - && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ No newline at end of file diff --git a/Dockerfile.rs413s b/Dockerfile.rs413s new file mode 100644 index 0000000..e957f2a --- /dev/null +++ b/Dockerfile.rs413s @@ -0,0 +1,548 @@ +FROM rocker/rstudio:4.1.3 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + bash gcc g++ gfortran make cmake pkg-config \ + git vim-tiny \ + libcurl4-openssl-dev libssl-dev libxml2-dev \ + libpng-tools \ + libpng-dev libjpeg-dev libtiff5-dev zlib1g-dev \ + libfreetype6-dev libharfbuzz-dev libfribidi-dev \ + libbz2-dev liblzma-dev \ + libgsl-dev libhdf5-dev \ + tcl tk tcl-dev tk-dev \ + libglpk-dev libglpk40 \ + libgit2-dev \ + python3 python3-venv \ + python3-dev python3-pip \ + libfontconfig1-dev \ + autoconf automake libtool \ + libgeos-dev libproj-dev \ + libcairo2-dev libxt-dev \ + automake \ + && rm -rf /var/lib/apt/lists/* + + +# libpng libtiff-4 libjpeg libwebp libwebpmux + +# (Optional) prevent R networking during local install steps +# xxx rpf get rid of +# RUN echo 'options(repos = c())' >> /usr/local/lib/R/etc/Rprofile.site + +#RUN pip3 install --no-cache-dir igraph leidenalg numpy +# weird pip instal bug introduced +RUN pip3 install --no-cache-dir igraph "leidenalg==0.10.0" numpy + +WORKDIR /home/rstudio + +# Remove any spatstat* that might be preinstalled in the rocker image +RUN R -q -e " \ + for (lib in .libPaths()) { \ + ip <- rownames(installed.packages(lib.loc = lib)); \ + pk <- grep('^spatstat', ip, value = TRUE); \ + if (length(pk)) { \ + message('Removing from ', lib, ': ', paste(pk, collapse=', ')); \ + remove.packages(pk, lib = lib); \ + } \ + }" + +# Install packages with specific versions from CRAN and Bioconductor +RUN R --vanilla --slave << 'EOF' +options(repos = c(CRAN = 'https://cran.r-project.org')) + +if (!require('BiocManager', quietly = TRUE)) { + install.packages('BiocManager', quiet = TRUE) +} +if (!require('remotes', quietly = TRUE)) { + install.packages('remotes', quiet = TRUE) +} + +# Install specific CRAN package versions +cran_versions <- list( + bitops = '1.0-8', + RCurl = '1.98-1.14', + GlobalOptions = '0.1.2', + bit = '4.0.4', + assertthat = '0.2.1', + bit64 = '4.0.5', + shape = '1.4.6', + colorspace = '2.0-3', + circlize = '0.4.15', + rlang = '1.1.4', + cli = '3.6.3', + glue = '1.7.0', + lifecycle = '1.0.4', + vctrs = '0.6.5', + blob = '1.2.3', + clue = '0.3-61', + cluster = '2.1.2', + codetools = '0.2-18', + crayon = '1.5.3', + data.table = '1.15.4', + DBI = '1.2.3', + dendsort = '0.3.4', + digest = '0.6.37', + ellipsis = '0.3.2', + evaluate = '0.24.0', + fansi = '1.0.6', + farver = '2.1.1', + fastmap = '1.2.0', + cachem = '1.1.0', + fastmatch = '1.1-3', + iterators = '1.0.14', + foreach = '1.5.2', + formatR = '1.14', + generics = '0.1.3', + rjson = '0.2.21', + GetoptLong = '1.0.5', + gtable = '0.3.5', + labeling = '0.4.2', + munsell = '0.5.0', + R6 = '2.5.1', + gridExtra = '2.3', + RColorBrewer = '1.1-3', + utf8 = '1.2.4', + pillar = '1.9.0', + pkgconfig = '2.0.3', + viridisLite = '0.4.1', + scales = '1.2.1', + withr = '3.0.1', + gtools = '3.9.5', + gridGraphics = '0.5-1', + hms = '1.1.2', + irlba = '2.3.5.1', + jsonlite = '1.8.8', + png = '0.1-7', + KernSmooth = '2.23-20', + lambda.r = '1.2.4', + lattice = '0.20-45', + lazyeval = '0.2.2', + locfit = '1.5-9.9', + Matrix = '1.5-1', + matrixStats = '0.62.0', + memoise = '2.0.1', + mgcv = '1.8-39', + nlme = '3.1-155', + rsvd = '1.0.5', + Rcpp = '1.0.13', + uuid = '1.1-0', + xfun = '0.47', + xtable = '1.8-4', + yaml = '2.3.10', + beeswarm = '0.4.0', + filelock = '1.0.3', + bslib = '0.4.2', + Cairo = '1.6-0', + desc = '1.4.2', + pkgload = '1.3.0', + brio = '1.1.3', + zip = '2.3.3', + openxlsx = '4.2.5.2', + praise = '1.0.0', + diffobj = '0.3.5', + waldo = '0.5.1', + testthat = '3.1.6', + nloptr = '2.0.3', + minqa = '1.2.5', + lme4 = '1.1-33', + MatrixModels = '0.5-1', + SparseM = '1.81', + quantreg = '5.95', + numDeriv = '2016.8-1.1', + pbkrtest = '0.5.2', + maptools = '1.1-7', + carData = '3.0-5', + rio = '0.5.29', + car = '3.0-0', + pROC = '1.18.2', + proxy = '0.4-27', + e1071 = '1.7-13', + ModelMetrics = '1.2.2.2', + clock = '0.7.0', + shinyjs = '2.1.0', + colourpicker = '1.2.0', + combinat = '0.0-8', + corrplot = '0.92', + cpp11 = '0.4.7', + diagram = '1.6.5', + DT = '0.28', + Rttf2pt1 = '1.3.12', + extrafontdb = '1.0', + extrafont = '0.18', + vipor = '0.4.7', + ggbeeswarm = '0.7.2', + ggrastr = '1.0.2', + fastICA = '1.2-3', + gdata = '2.18.0.1', + ggExtra = '0.10.1', + ggsci = '3.0.0', + ggsignif = '0.6.3', + gower = '1.0.1', + gridBase = '0.4-7', + hardhat = '1.3.0', + RhpcBLASctl = '0.23-42', + hdf5r = '1.3.5', + hexbin = '1.28.3', + survival = '3.2-13', + MASS = '7.3-55', + SQUAREM = '2021.1', + lava = '1.7.2.1', + lobstr = '1.1.2', + lsei = '1.3-0', + markdown = '1.13', + lpSolve = '5.6.16', + mclust = '6.0.0', + npsurv = '0.5-0', + pheatmap = '1.0.12', + polynom = '1.4-1', + proj4 = '1.0-12', + pryr = '0.1.5', + ps = '1.8.1', + RcppParallel = '5.1.6', + rstatix = '0.7.0', + timeDate = '4022.108', + maps = '3.4.1', + ggpubr = '0.4.0', + prodlim = '2023.03.31', + ipred = '0.9-14', + recipes = '1.0.6', + caret = '6.0-94', + quantmod = '0.4.20', + profvis = '0.3.7', + sessioninfo = '1.2.3', + downlit = '0.4.2', + whisker = '0.4.1', + pkgdown = '2.0.7', + pkgbuild = '1.4.8', + brew = '1.0-8', + roxygen2 = '7.2.3', + urlchecker = '1.0.1', + rversions = '2.1.2', + xopen = '1.0.0', + rcmdcheck = '1.4.0', + credentials = '1.3.2', + gert = '1.9.2', + gitcreds = '0.1.2', + httr2 = '0.2.3', + ini = '0.3.1', + gh = '1.4.0', + usethis = '3.1.0', + devtools = '2.4.5', + ggalt = '0.4.0', + EnhancedVolcano = '1.12.0' +) + +for (pkg in names(cran_versions)) { + tryCatch({ + remotes::install_version(pkg, version = cran_versions[[pkg]], repos = 'https://cran.r-project.org', quiet = TRUE) + }, error = function(e) { + message('Note: install_version failed for ', pkg, ', trying install.packages') + install.packages(pkg, quiet = TRUE) + }) +} + +# Install Bioconductor packages with specific versions +bioc_versions <- list( + BiocGenerics = '0.40.0', + Biobase = '2.54.0', + S4Vectors = '0.32.4', + IRanges = '2.28.0', + XVector = '0.34.0', + GenomeInfoDbData = '1.2.7', + GenomeInfoDb = '1.30.1', + beachmat = '2.10.0', + edgeR = '3.36.0', + GenomicRanges = '1.46.1', + Biostrings = '2.62.0', + DelayedArray = '0.20.0', + sparseMatrixStats = '1.6.0', + DelayedMatrixStats = '1.16.0', + ScaledMatrix = '1.2.0', + zlibbioc = '1.40.0', + KEGGREST = '1.34.0', + AnnotationDbi = '1.56.2', + BiocParallel = '1.28.3', + BiocSingular = '1.10.0', + ComplexHeatmap = '2.10.0', + fgsea = '1.20.0', + BiocNeighbors = '1.12.0', + BiocFileCache = '2.2.1', + bluster = '1.4.0', + scuttle = '1.4.0', + scater = '1.22.0', + scran = '1.22.1', + SingleR = '1.8.1', + MAST = '1.20.0', + scDblFinder = '1.8.0', + SummarizedExperiment = '1.24.0', + SingleCellExperiment = '1.16.0', + HDF5Array = '1.22.1', + rhdf5 = '2.38.1', + rhdf5filters = '1.6.0', + GSVA = '1.42.0', + ExperimentHub = '2.2.1', + AnnotationHub = '3.2.2', + celldex = '1.4.0', + annotate = '1.72.0', + graph = '1.72.0', + GSEABase = '1.56.0', + interactiveDisplayBase = '1.32.0', + TrajectoryUtils = '1.2.0', + TSCAN = '1.32.0', + conquer = '1.3.3', + metapod = '1.2.0', + statmod = '1.5.0' +) + +for (pkg in names(bioc_versions)) { + tryCatch({ + remotes::install_version(pkg, version = bioc_versions[[pkg]], repos = BiocManager::repositories(), quiet = TRUE) + }, error = function(e) { + message('Note: install_version failed for ', pkg, ', trying BiocManager') + BiocManager::install(pkg, ask = FALSE, quiet = TRUE) + }) +} +EOF + + +# Additional packages that need special installation + +# Install SCWorkflow from GitHub +COPY . /opt/SCWorkflow +RUN R --vanilla --slave -e "remotes::install_local('/opt/SCWorkflow', dependencies = TRUE, quiet = TRUE, upgrade='never')" + +# Install spatstat family packages with specific versions +RUN cat > /tmp/install_spatstat.R << 'EOFSPAT' +options(repos = c(CRAN = 'https://cran.r-project.org')) +if (!require('remotes', quietly = TRUE)) install.packages('remotes', quiet = TRUE) +spatstat_versions <- list( + spatstat.utils = '3.1-0', + spatstat.data = '3.0-0', + deldir = '2.0-4', + polyclip = '1.10-7', + spatstat.univar = '2.0-3', + spatstat.geom = '3.0-3', + spatstat.random = '2.2-0', + abind = '1.4-5', + tensor = '1.5', + goftest = '1.2-3', + spatstat.sparse = '3.1-0', + spatstat.core = '2.4-2', + spatstat.linnet = '2.2-1' +) +for (pkg in names(spatstat_versions)) { + tryCatch({ + remotes::install_version(pkg, version = spatstat_versions[[pkg]], repos = 'https://cran.r-project.org', quiet = TRUE) + }, error = function(e) { + message('Note: install_version for ', pkg, ' failed, trying install.packages') + install.packages(pkg, quiet = TRUE) + }) +} +EOFSPAT + +RUN R --vanilla --slave --file=/tmp/install_spatstat.R + +# Install remaining specialized packages +RUN cat > /tmp/install_special.R << 'EOFSPEC' +options(repos = c(CRAN = 'https://cran.r-project.org')) +if (!require('remotes', quietly = TRUE)) install.packages('remotes', quiet = TRUE) +special_versions <- list( + RSpectra = '0.16-1', + dotCall64 = '1.1-1', + spam = '2.11-0', + RcppHNSW = '0.4.1', + leidenbase = '0.1.30', + fastDummies = '1.7.3', + sp = '1.5-0', + rgeos = '0.5-9', + SeuratObject = '4.1.1', + Seurat = '4.1.1', + XML = '3.99-0.14', + anndata = '0.7.5.2', + ash = '1.0-15', + viridis = '0.6.5', + dendextend = '1.16.0', + ggrepel = '0.9.5', + l2p = '0.0-13', + l2psupp = '0.0-13', + ica = '1.0-3', + Rtsne = '0.16', + ggridges = '0.5.3', + scattermore = '1.2', + listenv = '0.9.1', + globals = '0.16.3', + parallelly = '1.38.0', + future = '1.34.0', + future.apply = '1.11.2', + RcppEigen = '0.3.3.9.3', + RcppAnnoy = '0.0.19', + zoo = '1.8-12', + lmtest = '0.9-40', + fitdistrplus = '1.1-8', + caTools = '1.18.2', + gplots = '3.1.3', + ROCR = '1.0-11', + igraph = '2.0.3', + pbapply = '1.5-0', + commonmark = '1.9.0', + httpuv = '1.6.15', + sourcetools = '0.1.7-1', + shiny = '1.9.1', + miniUI = '0.1.1.1', + progressr = '0.14.0', + sitmo = '2.0.2', + dqrng = '0.4.1', + FNN = '1.1.3.2', + RcppProgress = '0.4.2', + uwot = '0.1.14', + cowplot = '1.1.1', + RcppTOML = '0.2.2', + rprojroot = '2.0.4', + here = '1.0.1', + reticulate = '1.40.0', + leiden = '0.4.3', + RANN = '2.6.1', + RcppArmadillo = '0.12.4.0.0', + sctransform = '0.3.4', + Rhdf5lib = '1.16.0', + xgboost = '1.7.8.1', + nidapFunctions = '0.7.8', + snow = '0.4-4', + BH = '1.81.0-1', + futile.options = '1.0.1', + futile.logger = '1.4.3', + base64enc = '0.1-3', + htmltools = '0.5.8.1', + rappdirs = '0.3.3', + jquerylib = '0.1.4', + tinytex = '0.44', + fs = '1.6.4', + sass = '0.4.6', + mime = '0.12', + magrittr = '2.0.3', + stringi = '1.8.4', + stringr = '1.5.1', + highr = '0.10', + knitr = '1.48', + fontawesome = '0.5.1', + rmarkdown = '2.28', + htmlwidgets = '1.6.4', + sys = '3.4.2', + askpass = '1.1', + openssl = '2.0.5', + curl = '6.4.0', + httr = '1.4.7', + timechange = '0.2.0', + lubridate = '1.8.0', + isoband = '0.2.7', + tibble = '3.2.1', + ggplot2 = '3.3.6', + patchwork = '1.2.0', + later = '1.3.2', + promises = '1.3.0', + crosstalk = '1.2.0', + purrr = '1.0.2', + tidyselect = '1.2.1', + dplyr = '1.1.4', + tidyr = '1.2.1', + plotly = '4.10.4', + plyr = '1.8.7', + clipr = '0.8.0', + prettyunits = '1.1.1', + progress = '1.2.2', + tzdb = '0.4.0', + vroom = '1.6.3', + readr = '2.1.2', + reshape2 = '1.4.4', + plogr = '0.2.0', + RSQLite = '2.3.9', + systemfonts = '1.2.3', + textshaping = '0.3.6', + ragg = '1.2.5', + dbplyr = '2.2.1', + rstudioapi = '0.14', + dtplyr = '1.3.1', + backports = '1.4.1', + broom = '1.0.1', + processx = '3.8.5', + callr = '3.7.3', + reprex = '2.0.2', + modelr = '0.1.9', + conflicted = '1.2.0', + rematch2 = '2.1.2', + gargle = '1.3.0', + rematch = '1.0.1', + cellranger = '1.1.0', + ids = '1.0.1', + googledrive = '2.0.0', + googlesheets4 = '1.0.1', + readxl = '1.4.1', + selectr = '0.4-2', + xml2 = '1.3.3', + rvest = '1.0.3', + forcats = '0.5.2', + haven = '2.5.1', + tidyverse = '1.3.2', + doParallel = '1.0.17', + MatrixGenerics = '1.6.0', + svglite = '2.1.0', + svMisc = '1.2.3', + xts = '0.12.2', + TTR = '0.24.3' +) +for (pkg in names(special_versions)) { + tryCatch({ + remotes::install_version(pkg, version = special_versions[[pkg]], repos = 'https://cran.r-project.org', quiet = TRUE) + }, error = function(e) { + message('Note: install_version for ', pkg, ' failed, trying install.packages') + install.packages(pkg, quiet = TRUE) + }) +} +EOFSPEC + +RUN R --vanilla --slave --file=/tmp/install_special.R + +# Install development dependencies +RUN cat > /tmp/install_dev_dependencies.R << 'EOFDEV' +options(repos = c(CRAN = 'https://cran.r-project.org')) +dev_dependencies <- c('cffr', 'covr', 'goodpractice', 'here', 'lintr', 'pkgdown', 'rcmdcheck') + +for (pkg in dev_dependencies) { + tryCatch({ + remotes::install_version(pkg, repos = 'https://cran.r-project.org', quiet = TRUE) + }, error = function(e) { + message('Note: install_version failed for ', pkg, ', trying install.packages') + install.packages(pkg, quiet = TRUE) + }) +} +EOFDEV + +RUN R --vanilla --slave --file=/tmp/install_dev_dependencies.R + +# Verify that all dependencies from DESCRIPTION are installed +RUN cat > /tmp/check_description_deps.R << 'EOF' +options(repos = c(CRAN = 'https://cran.r-project.org')) +# Read the DESCRIPTION file +desc_file <- "/opt/SCWorkflow/DESCRIPTION" +if (!file.exists(desc_file)) { + stop("DESCRIPTION file not found at ", desc_file) +} +# Parse dependencies from DESCRIPTION +desc <- read.dcf(desc_file) +deps <- unique(c( + strsplit(desc[1, "Imports"], ",")[[1]], + strsplit(desc[1, "Suggests"], ",")[[1]], + strsplit(desc[1, "Depends"], ",")[[1]] +)) +deps <- trimws(deps) +# Check if each dependency is installed +missing <- deps[!vapply(deps, requireNamespace, quietly = TRUE, FUN.VALUE = logical(1))] +if (length(missing) > 0) { + stop("The following dependencies are missing: ", paste(missing, collapse = ", ")) +} else { + message("All dependencies are installed.") +} +EOF + +RUN R --vanilla --slave --file=/tmp/check_description_deps.R + +COPY Dockerfile / diff --git a/NAMESPACE b/NAMESPACE index 96b1ac4..49bc794 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,25 +3,29 @@ export(aggregateCounts) export(annotateCellTypes) export(appendMetadataToSeuratObject) +export(build_modscore_plots) export(colorByGene) export(colorByMarkerTable) export(combineNormalize) +export(compareCellPopulations) +export(compute_modscore_data) export(degGeneExpressionMarkers) export(dotPlotMet) export(dualLabeling) export(filterQC) export(filterSeuratObjectByMetadata) +export(harmonyBatchCorrect) export(heatmapSC) +export(launch_module_score_app) export(modScore) export(nameClusters) -export(object) export(palantir_api_call) export(plotMetadata) export(processRawData) export(reclusterFilteredSeuratObject) export(reclusterSeuratObject) export(tSNE3D) -export(violinPlot_mod) +export(violinPlot) import(MAST) import(RColorBrewer) import(Seurat) @@ -40,12 +44,18 @@ import(gridExtra) import(harmony) import(httr) import(jsonlite) +import(magrittr) import(parallel) +importFrom(tidyr, pivot_longer) import(plotly) import(quantmod) import(reshape2) import(rlang) import(scales) +importFrom(stringr, str_split) +import(stringr) +import(tibble) +import(tidyr) import(tidyverse) import(tools) import(utils) @@ -76,6 +86,7 @@ importFrom(dplyr,arrange) importFrom(dplyr,case_when) importFrom(dplyr,desc) importFrom(dplyr,filter) +importFrom(dplyr,group_by) importFrom(dplyr,if_else) importFrom(dplyr,mutate) importFrom(dplyr,mutate_if) @@ -86,14 +97,26 @@ importFrom(dplyr,row_number) importFrom(dplyr,select) importFrom(dplyr,summarise) importFrom(ggExtra,ggMarginal) +importFrom(ggalluvial,geom_flow) importFrom(ggplot2,aes) importFrom(ggplot2,coord_fixed) +importFrom(ggplot2,element_blank) +importFrom(ggplot2,element_text) importFrom(ggplot2,geom_hline) +importFrom(ggplot2,geom_line) importFrom(ggplot2,geom_point) +importFrom(ggplot2,geom_segment) +importFrom(ggplot2,geom_violin) importFrom(ggplot2,geom_vline) importFrom(ggplot2,ggplot) importFrom(ggplot2,ggtitle) +importFrom(ggplot2,guide_legend) +importFrom(ggplot2,guides) +importFrom(ggplot2,scale_color_gradientn) importFrom(ggplot2,scale_color_identity) +importFrom(ggplot2,scale_x_continuous) +importFrom(ggplot2,scale_y_continuous) +importFrom(ggplot2,scale_y_log10) importFrom(ggplot2,scale_y_reverse) importFrom(ggplot2,theme) importFrom(ggplot2,theme_bw) @@ -116,6 +139,7 @@ importFrom(gridExtra,arrangeGrob) importFrom(gridExtra,tableGrob) importFrom(htmlwidgets,saveWidget) importFrom(magrittr,"%>%") +importFrom(patchwork,plot_layout) importFrom(plotly,as_widget) importFrom(plotly,ggplotly) importFrom(plotly,plot_ly) @@ -128,6 +152,7 @@ importFrom(stats,kmeans) importFrom(stats,mad) importFrom(stats,median) importFrom(stats,quantile) +importFrom(stats,setNames) importFrom(stringr,str_replace_all) importFrom(stringr,str_sort) importFrom(stringr,str_split_fixed) diff --git a/R/3D_tSNE.R b/R/3D_tSNE.R index fb8ab05..9896cc5 100644 --- a/R/3D_tSNE.R +++ b/R/3D_tSNE.R @@ -19,6 +19,20 @@ #' @importFrom htmlwidgets saveWidget #' #' @export +#' +#' @return A list with a plotly 3D TSNE plot (`figure`) and TSNE coordinates +#' (`tsne.df`). +#' +#' @examples +#' \dontrun{ +#' out <- tSNE3D( +#' object = seurat_obj, +#' color.variable = "cell_type", +#' label.variable = "orig.ident", +#' npcs = 15, +#' save.plot = FALSE +#' ) +#' } tSNE3D <- function(object, color.variable, diff --git a/R/AggregateCounts.R b/R/AggregateCounts.R index 7fc9f0b..699dab1 100644 --- a/R/AggregateCounts.R +++ b/R/AggregateCounts.R @@ -1,33 +1,44 @@ -##' @title Aggregate Counts (Pseudobulk) -##' @description Compute pseudobulk expression by averaging expression across groups -##' defined by one or more metadata columns, and return a tidy table. -##' @details Uses Seurat's `AverageExpression()` on the `SCT` assay to compute -##' group-wise average expression for each feature. Also produces a -##' bar plot (via `ggplot2`/`plotly`) showing the number of cells per -##' pseudobulk group and warns if any group contains only one cell. -##' -##' @param object Seurat-class object. -##' @param var.group Character vector of metadata column names used to define -##' pseudobulk groups. When multiple columns are supplied, an -##' interaction of these columns defines the groups. -##' @param slot Character name of the assay data layer passed to -##' `AverageExpression()` (e.g., "data", "counts", or "scale.data"). -##' -##' @return A data.frame of pseudobulk expression with columns `Gene` followed by -##' one column per pseudobulk group. Column names are sanitized to -##' contain only alphanumeric/underscore characters. -##' -##' @import Seurat -##' @import tidyverse -##' @import ggplot2 -##' @import plotly -##' @importFrom dplyr select -##' -##' @export +#' @title Aggregate Counts (Pseudobulk) +#' @description Compute pseudobulk expression by averaging expression across groups +#' defined by one or more metadata columns, and return a tidy table. +#' @details Uses Seurat's `AverageExpression()` on the `SCT` assay to compute +#' group-wise average expression for each feature. Also produces a +#' bar plot (via `ggplot2`/`plotly`) showing the number of cells per +#' pseudobulk group and warns if any group contains only one cell. +#' +#' @param object Seurat-class object. +#' @param var.group Character vector of metadata column names used to define +#' pseudobulk groups. When multiple columns are supplied, an +#' interaction of these columns defines the groups. +#' @param slot Character name of the assay data layer passed to +#' `AverageExpression()` (e.g., "data", "counts", or "scale.data"). +#' @param interactive If TRUE, draw plotly plot (default is FALSE) +#' +#' @import Seurat +#' @import tidyverse +#' @import ggplot2 +#' @import plotly +#' @importFrom dplyr select +#' +#' @export +#' +#' @return A data.frame of pseudobulk expression with columns `Gene` followed by +#' one column per pseudobulk group. Column names are sanitized to +#' contain only alphanumeric/underscore characters. +#' +#' @examples +#' \dontrun{ +#' out <- aggregateCounts( +#' object = seurat_obj, +#' var.group = c("orig.ident", "condition"), +#' slot = "data" +#' ) +#' } aggregateCounts <- function(object, var.group, - slot){ + slot="data", + interactive=FALSE){ ## --------------- ## @@ -73,16 +84,19 @@ aggregateCounts <- function(object, )) } - p <- ggplotly(ggplot(df, aes(x = pseudobulk_group, y = Freq)) + + p <- ggplot(df, aes(x = pseudobulk_group, y = Freq)) + geom_bar(stat = "identity", position = "stack") + labs(y = "Counts", x = "Pseudobulk Groups", title = "Number of Cells in each Pseudobulk Group") + - theme(axis.text.x = element_text(angle = 90, hjust = 1))) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) - print(p) + if(interactive==T){ + p <- ggplotly(p) + } } else { stop("All columns in var.group must be factors or characters") } - return(pseudobulk) + return(list(data=pseudobulk, + plots=p)) } \ No newline at end of file diff --git a/R/Annotate_Cell_Types.R b/R/Annotate_Cell_Types.R index 8a8cf0a..bbdc84b 100755 --- a/R/Annotate_Cell_Types.R +++ b/R/Annotate_Cell_Types.R @@ -1,8 +1,7 @@ -#' @title Annotating cell types using SingleR module -#' @description SingleR is an automatic annotation method for single-cell -#' RNA sequencing (scRNAseq) data (Aran et al. 2019). Given a reference dataset -#' of samples (single-cell or bulk) with known labels, it labels new cells -#' from a test dataset based on similarity to the reference. +#' @title Cell Type Annotation with SingleR [CCBR] [scRNA-seq] +#' @description Annotate the cell types of your cells using SingleR (Aran et al., 2019). This +#' function takes a combined Seurat object after PC reduction and assigns cells +#' to a category (for example, stem cells or T cells) based on genomic profile. #' @details This function is Step 5 of the basic Single-Cell RNA-seq workflow. #' It is the starting point for downstream visualization, subsetting, and #' analysis. It takes a combined seurat object as input, such as the one created @@ -22,8 +21,7 @@ #' Default is NULL #' @param use.clusters Provide cluster identities for each cell. #' Default is NULL - - +#' #' #' @import Seurat #' @import cowplot @@ -36,9 +34,17 @@ #' @export #' #' @return a Seurat object with additional metadata - - - +#' +#' @examples +#' \dontrun{ +#' out <- annotateCellTypes( +#' object = combined_so, +#' species = "Human", +#' reduction.type = "umap" +#' ) +#' } +#' +#' annotateCellTypes <- function(object, species = "Mouse", reduction.type = "umap", diff --git a/R/Color_by_Gene.R b/R/Color_by_Gene.R index 3ddaab9..82e96dd 100644 --- a/R/Color_by_Gene.R +++ b/R/Color_by_Gene.R @@ -35,8 +35,17 @@ #' @export #' #' @return a Seurat object with additional metadata or gene table and plot - - +#' +#' @examples +#' \dontrun{ +#' out <- colorByGene( +#' object = anno_so, +#' samples.to.include = c("sample1", "sample2"), +#' gene = c("CD3D", "MS4A1"), +#' reduction.type = "umap" +#' ) +#' } +#' colorByGene <- function(object, samples.to.include, @@ -65,16 +74,14 @@ colorByGene <- function(object, ## --------------- ## - print(object) # checking for samples - if(any(grepl('c\\(|\\[\\]',samples))) { - samples = eval(parse(text = gsub('\\[\\]', 'c()', samples))) - }else{ - samples=samples + if (is.character(samples.to.include) && length(samples.to.include) == 1 && + grepl('c\\(|\\[\\]', samples.to.include)) { + samples.to.include <- eval(parse(text = gsub('\\[\\]', 'c()', samples.to.include))) } # if none specified, using ALL - if (length(samples) == 0) { - samples = unique(object@meta.data$orig.ident) + if (length(samples.to.include) == 0) { + samples.to.include = unique(object@meta.data$orig.ident) } # Fix for underscore @@ -85,13 +92,13 @@ colorByGene <- function(object, names(sample.name) = names(object@active.ident) object@active.ident <- as.factor(vector()) object@active.ident <- sample.name - object.sub = subset(object, ident = samples) + object.sub = subset(object, ident = samples.to.include) } else { sample.name = as.factor(object@meta.data$orig.ident) names(sample.name) = names(object@active.ident) object@active.ident <- as.factor(vector()) object@active.ident <- sample.name - object.sub = subset(object, ident = samples) + object.sub = subset(object, ident = samples.to.include) } #Check input for missing genes diff --git a/R/Color_by_Genes_Automatic.R b/R/Color_by_Genes_Automatic.R index d81409b..88bff98 100644 --- a/R/Color_by_Genes_Automatic.R +++ b/R/Color_by_Genes_Automatic.R @@ -46,17 +46,21 @@ #' @return arranged grob of dimension reduction plots colored by individual #' marker expression -colorByMarkerTable <- function (object, samples.subset, samples.to.display, - manual.genes = c(), marker.table, - cells.of.interest, protein.presence = FALSE, assay = "SCT", slot = "scale.data", - reduction.type = "umap", point.transparency = 0.5, point.shape = 16, - cite.seq = FALSE){ +colorByMarkerTable <- function(object, + samples.subset, + samples.to.display, + manual.genes = c(), + marker.table, + cells.of.interest, + protein.presence = FALSE, + assay = "SCT", + slot = "scale.data", + reduction.type = "umap", + point.transparency = 0.5, + point.shape = 16, + cite.seq = FALSE + ){ - library(ggplot2) - library(Seurat) - library(stringr) - library(grid) - library(gridExtra) .plotMarkers <- function(markers) { if (is.na(markers) == TRUE) { @@ -211,10 +215,12 @@ colorByMarkerTable <- function (object, samples.subset, samples.to.display, }) } - results <- list( - overall = cons.gg.storage, - celltype = indv_arranged, - manual_entry = manual.arranged) + results <- list("plots"=list( + "overall" = cons.gg.storage, + "celltype" = indv_arranged, + "manual_entry" = manual.arranged + ) + ) return(results) } diff --git a/R/Combine_and_Normalize.R b/R/Combine_and_Normalize.R index 98e8f65..ec351e3 100755 --- a/R/Combine_and_Normalize.R +++ b/R/Combine_and_Normalize.R @@ -92,6 +92,16 @@ #' #' @return Seurat Objects and QC plots #' +#' @examples +#' \dontrun{ +#' out <- combineNormalize( +#' object = filtered_so_list, +#' npcs = 30, +#' draw.umap = TRUE, +#' draw.tsne = TRUE +#' ) +#' } +#' combineNormalize <- function(object, diff --git a/R/Compare_Cell_Populations.R b/R/Compare_Cell_Populations.R new file mode 100644 index 0000000..56fe700 --- /dev/null +++ b/R/Compare_Cell_Populations.R @@ -0,0 +1,263 @@ +#' @title Compare Cell Populations +#' @description Compare cell population distributions across different groups +#' using bar plots and box plots. Creates visualizations showing cell type +#' frequencies or counts across user-defined groupings. +#' +#' @details This function generates comparative visualizations of cell +#' populations from a Seurat object. It can display data as either frequency +#' percentages or absolute counts, and creates both stacked bar plots +#' (with alluvial flow connections) and grouped box plots for comparison +#' across samples and conditions. +#' +#' @param object A Seurat object containing the single-cell data +#' @param annotation.column Character string specifying the metadata column +#' containing cell type annotations to summarize in the bar plot +#' @param group.column Character string specifying the metadata column +#' defining groups to compare (e.g., treatment conditions) +#' @param sample.column Character string specifying the metadata column +#' containing sample identifiers. Default is "orig.ident" +#' @param counts.type Character string specifying plot data type: +#' "Frequency" (percentages) or "Counts" (absolute numbers). Default is "Frequency" +#' @param group.order Character vector specifying the order of groups in plots. +#' If NULL, uses natural order from data. Default is NULL +#' @param seurat.object.filename Character string for the Seurat object +#' filename. Default is "seurat_object.rds" +#' @param wrap.ncols Integer specifying number of columns for facet wrapping +#' in box plots. Default is 5 +#' +#' @import Seurat +#' @import ggplot2 +#' @import ggpubr +#' @import RColorBrewer +#' @import tibble +#' @import reshape2 +#' @import data.table +#' @import dplyr +#' @import magrittr +#' @import cowplot +#' @import gridExtra +#' @import grid +#' @import scales +#' +#' @importFrom ggalluvial geom_flow +#' @importFrom stats setNames +#' @importFrom grDevices colorRampPalette +#' +#' @export +#' +#' @return A list containing: +#' \itemize{ +#' \item \code{Plots} - A list with two ggplot objects: +#' \itemize{ +#' \item \code{Barplot} - Stacked bar plot with alluvial flows +#' \item \code{Boxplot} - Faceted box plots by cell type (only if counts.type="Frequency") +#' } +#' \item \code{Table} - A data.frame with cell counts and percentages +#' } +#' +#' @examples +#' \dontrun{ +#' # Compare cell populations by treatment group +#' results <- compareCellPopulations( +#' object = seurat_obj, +#' annotation.column = "cell_type", +#' group.column = "treatment", +#' sample.column = "sample_id", +#' counts.type = "Frequency" +#' ) +#' +#' # Display plots +#' plot(results$Plots$Barplot) +#' plot(results$Plots$Boxplot) +#' +#' # View summary table +#' head(results$Table) +#' } + +compareCellPopulations <- function( + object, + annotation.column, + group.column, + sample.column = "orig.ident", + counts.type = "Frequency", + group.order = NULL, + wrap.ncols = 5 +) { + + ## -------------------------------- ## + ## Input Validation ## + ## -------------------------------- ## + + # Validate object + if (!inherits(object, "Seurat")) { + stop("Error: 'object' must be a Seurat object") + } + + # Validate counts.type + if (!counts.type %in% c("Frequency", "Counts")) { + stop("Error: 'counts.type' must be either 'Frequency' or 'Counts'") + } + + ## --------- ## + ## Functions ## + ## --------- ## + + createAnnoTable <- function(SO, AnnoCol, GroupCol) { + ## Extract annotation data for each group using a 2D contingency table + cntMat <- table(SO@meta.data[[AnnoCol]], SO@meta.data[[GroupCol]]) + + # Convert to data frame while preserving row/column names + cntTble <- as.data.frame.matrix(cntMat) + cntTble <- data.frame( + lapply(cntTble, function(x) as.numeric(as.character(x))), + check.names = FALSE, + row.names = rownames(cntTble) + ) + + freqTble <- apply(cntTble, 2, FUN = function(x) { + return(x / sum(x)) + }) + freqTble <- (freqTble * 100) + + outTbl <- merge(cntTble, as.data.frame(freqTble), + by = 'row.names', + suffixes = c('_CellCounts', '_Percent')) + outTbl <- dplyr::rename(outTbl, 'Clusters' = "Row.names") + + return(list( + 'CellFreq' = freqTble, + 'CellCounts' = cntTble, + 'OutTable' = outTbl + )) + } + + ## --------------- ## + ## Main Code Block ## + ## --------------- ## + + # Replace dots with underscores in column names + colnames(object@meta.data) <- gsub("\\.", "_", colnames(object@meta.data)) + + # Update column names if they were modified + annotation.column <- gsub("\\.", "_", annotation.column) + group.column <- gsub("\\.", "_", group.column) + sample.column <- gsub("\\.", "_", sample.column) + + + # Validate metadata columns exist + required.cols <- c(annotation.column, group.column, sample.column) + missing.cols <- setdiff(required.cols, colnames(object@meta.data)) + if (length(missing.cols) > 0) { + stop("Error: The following columns are missing from metadata: ", + paste(missing.cols, collapse = ", ")) + } + + + + + # Set up ordering + ordr <- object@meta.data[[annotation.column]] %>% + unique() %>% + sort() + + if (is.null(group.order)) { + group.order <- unique(object@meta.data[[group.column]]) + } + + # Set up colors + numColors <- max( + length(unique(object@meta.data[[annotation.column]])), + 20 + ) + colpaired <- colorRampPalette(brewer.pal(12, "Paired")) + cols <- c( + "#e6194B", "#3cb44b", "#4363d8", "#f58231", "#911eb4", "#42d4f4", + "#f032e6", "#bfef45", "#fabebe", "#469990", "#e6beff", "#9A6324", + "#800000", "#aaffc3", "#808000", "#000075", + colpaired(numColors) + ) + names(cols) <- ordr + + object@meta.data[[annotation.column]] <- factor( + object@meta.data[[annotation.column]], + levels = ordr + ) + + # Create tables + ColTables <- createAnnoTable(object, annotation.column, group.column) + BoxTables <- createAnnoTable(object, annotation.column, sample.column) + + metaGroups <- object@meta.data[, c(group.column, sample.column)] + rownames(metaGroups) <- NULL + metaGroups <- metaGroups %>% unique() + + ## Create plots based on counts type + if (counts.type == 'Frequency') { + ptbl <- melt(ColTables$CellFreq) + ptblBox <- melt(as.matrix(BoxTables$CellFreq)) + ptblBox <- merge(ptblBox, metaGroups, + by.x = 'Var2', by.y = sample.column, all.x = TRUE) + + labelCol <- 'PerValue' + ylab <- 'Frequency of each cell type (100%)' + } else if (counts.type == "Counts") { + ptbl <- melt(as.matrix(ColTables$CellCounts)) + ptblBox <- melt(as.matrix(BoxTables$CellCounts)) + ptblBox <- merge(ptblBox, metaGroups, + by.x = 'Var2', by.y = sample.column, all.x = TRUE) + + labelCol <- 'value' + ylab <- 'Cell Counts' + } + + # Format bar plot data + ptbl$Var1 <- factor(ptbl$Var1, levels = ordr) + ptbl$value <- round(ptbl$value, 1) + ptbl$PerValue <- paste0(ptbl$value, '%') + ptbl$PerValue <- gsub('^%$', "_", ptbl$PerValue) + ptbl[ptbl$value < 1, 'PerValue'] <- "" + ptbl$Var2 <- factor(ptbl$Var2, levels = group.order) + + # Create bar plot with alluvial flows + p2 <- ptbl %>% + ggplot(aes_string(y = 'value', x = 'Var2', fill = 'Var1', label = labelCol)) + + geom_flow(aes(alluvium = Var1), alpha = .2, + lty = 2, color = "black", + curve_type = "linear", + width = .5) + + geom_col(aes(fill = Var1), width = .5, color = "black") + + geom_text(size = 3, position = position_stack(vjust = 0.5)) + + theme_classic() + + ylab(ylab) + + xlab("") + + scale_x_discrete(guide = guide_axis(angle = 45)) + + scale_fill_manual(annotation.column, values = cols) + + # Create box plot + ptblBox$value <- round(ptblBox$value, 1) + ptblBox$PerValue <- paste0(ptblBox$value, '%') + ptblBox$PerValue <- gsub('^%$', "_", ptblBox$PerValue) + ptblBox[ptblBox$value < 1, 'PerValue'] <- "" + ptblBox[[group.column]] <- factor(ptblBox[[group.column]], levels = group.order) + + p2_Box <- ptblBox %>% + ggboxplot(y = 'value', x = group.column, add = "jitter", color = "Var1") + + facet_wrap(~Var1, ncol = wrap.ncols, scales = 'fixed') + + ylab(ylab) + + xlab("") + + theme(legend.title = element_blank()) + + # Return results + result <- list( + 'plots' = list('Barplot' = p2, 'Boxplot' = p2_Box), + 'data' = ColTables$OutTable + ) + + return(result) +} + +# Add global variables to avoid R CMD check NOTEs +utils::globalVariables(c( + "Var1", "Var2", "value", "PerValue", "alluvium", + ".", "CellFreq", "CellCounts", "OutTable" +)) diff --git a/R/DEG_Gene_Expression_Markers.R b/R/DEG_Gene_Expression_Markers.R index 321333c..463c507 100755 --- a/R/DEG_Gene_Expression_Markers.R +++ b/R/DEG_Gene_Expression_Markers.R @@ -1,7 +1,7 @@ -#' @title DEG (Gene Expression Markers) -#' @description This function performs a DEG (differential expression of genes) -#' analysis on a merged Seurat object to identify expression markers -#' between different groups of cells (contrasts). +#' @title DE with Find Markers [CCBR] [scRNA-seq] +#' @description This function performs DE (differential expression) analysis on +#' a merged Seurat object to identify expression markers between different +#' groups of cells (contrasts). #' @details The recommended input is a merged Seurat object #' with SingleR annotations, along with its associated sample names and metadata #' @@ -20,8 +20,8 @@ #' Default is FALSE #' @param assay.to.use The assay to use for your DEG analysis. #' Default is SCT, but can use linearly scaled data by selecting RNA instead - - +#' +#' #' @import Seurat #' @import ggplot2 #' @import RColorBrewer @@ -41,9 +41,19 @@ #' @export #' #' @return a dataframe with DEG. - - - +#' +#' @examples +#' \dontrun{ +#' deg <- degGeneExpressionMarkers( +#' object = anno_so, +#' samples = c("sample1", "sample2"), +#' contrasts = c("A-B"), +#' parameter.to.test = "cluster" +#' ) +#' } +#' +#' +#' degGeneExpressionMarkers <- function (object, samples, contrasts, parameter.to.test = "orig_ident", test.to.use = "MAST", log.fc.threshold = 0.25, use.spark = FALSE, assay.to.use = "SCT") diff --git a/R/Dotplot_by_Metadata.R b/R/Dotplot_by_Metadata.R index f5beed2..d7f87ad 100644 --- a/R/Dotplot_by_Metadata.R +++ b/R/Dotplot_by_Metadata.R @@ -19,11 +19,23 @@ #' @param cell.reverse.sort If TRUE, Reverse plot order of metadata category #' factors (default is FALSE) #' @param dot.color Dot color (default is "dark blue") +#' #' @importFrom tidyr pivot_wider #' @importFrom Seurat Idents DotPlot +#' #' @export #' #' @return Dotplot with markers and cell types. +#' +#' @examples +#' \dontrun{ +#' p <- dotPlotMet( +#' object = anno_so, +#' metadata = "celltype", +#' cells = c("T cell", "B cell"), +#' markers = c("CD3D", "MS4A1") +#' ) +#' } dotPlotMet <- function(object, metadata, diff --git a/R/Dual_Labeling.R b/R/Dual_Labeling.R index b4ef133..9e0f906 100755 --- a/R/Dual_Labeling.R +++ b/R/Dual_Labeling.R @@ -1,33 +1,30 @@ -#' @title Plot coexpression of 2 markers using transcript and/or protein -#' expression values -#' @description This method provides visualization of coexpression of 2 genes -#' (or proteins) and additional methods for filtering for cells with gene -#' expression values that are above or below thresholds set for one or both -#' markers. The method allows for filtering (optional) of the Seurat object -#' using manually set expression thresholds. +#' @title Cell Annotation with Co-Expression [CCBR] [scRNA-seq] +#' @description Display co-expression of two chosen markers in your Seurat +#' object. Creates a metadata column containing annotations for cells that +#' correspond to marker expression thresholds. #' #' @param object Seurat-class object #' @param samples Samples to be included in the analysis #' @param marker.1 First gene/marker for coexpression analysis #' @param marker.2 Second gene/marker for coexpression analysis #' @param marker.1.type Slot to use for first marker. Choices are "SCT", -#' "protein","HTO" (default is "SCT") +#' "protein","HTO", or "Spatial" (default is "SCT") #' @param marker.2.type Slot to use for second marker. Choices are "SCT", -#' "protein","HTO" (default is "SCT") +#' "protein","HTO", or "Spatial" (default is "SCT") #' @param data.reduction Dimension Reduction method to use for image. Options -#' are "umap" or "tsne" (default is "umap") +#' are "umap", "tsne", or "both" (default is "both") #' @param point.size Point size for image (default is 0.5) #' @param point.shape Point shape for image (default is 16) #' @param point.transparency Point transparency for image (default is 0.5) -#' @param add.marker.thresholds Add marker thresholds on plot (default is FALSE) +#' @param add.marker.thresholds Add marker thresholds on plot (default is TRUE) #' @param marker.1.threshold Threshold set for first marker (default is 0.5) #' @param marker.2.threshold Threshold set for second marker (default is 0.5) #' @param filter.data Add new parameter column to metadata annotating where #' marker thresholds are applied (default is TRUE) -#' @param M1.filter.direction Annotate cells that have gene expression levels +#' @param marker.1.filter.direction Annotate cells that have gene expression levels #' for marker 1 using the marker 1 threshold. Choices are "greater than" #' or "less than" (default is "greater than") -#' @param M2.filter.direction Annotate cells that have gene expression levels +#' @param marker.2.filter.direction Annotate cells that have gene expression levels #' for marker 2 using the marker 2 threshold. Choices are "greater than" #' or "less than" (default is "greater than") #' @param apply.filter.1 If TRUE, apply the first filter (default is TRUE) @@ -35,19 +32,17 @@ #' @param filter.condition If TRUE, apply both filters 1 and 2 and take #' intersection. If FALSE, apply both filters and take the union. #' @param parameter.name Name for metadata column for new marker filters -#' (Default is "Marker") +#' (default is "My_CoExp") #' @param trim.marker.1 Trim top and bottom percentile of marker 1 signal to #' pre-scale trim values (below) to remove extremely low and high values -#' (Default is TRUE) +#' (default is FALSE) #' @param trim.marker.2 Trim top and bottom percentile of marker 2 signal to #' pre-scale trim values (below) to remove extremely low and high values -#' (Default is TRUE) -#' @param pre.scale.trim Set trimming percentile values (Defalut is 0.99) -#' @param density.heatmap Creates a additional heatmap showing the density -#' distribution of cells. (Default is FALSE) +#' (default is FALSE) +#' @param pre.scale.trim Set trimming percentile value (default is 0.99) #' @param display.unscaled.values Set to TRUE if you want to view the unscaled -#' gene/protein expression values (Default is FALSE) - +#' gene/protein expression values (default is FALSE) +#' #' @import Seurat #' @importFrom scales rescale #' @importFrom gridExtra arrangeGrob tableGrob @@ -63,7 +58,18 @@ #' @return a seurat object with optional additional metadata for cells that are #' positive or negative for gene markers, a coexpression plot and contingency #' table showing sum of cells filtered. - +#' +#' @examples +#' \dontrun{ +#' out <- dualLabeling( +#' object = anno_so, +#' samples = c("sample1"), +#' marker.1 = "CD3D", +#' marker.2 = "MS4A1", +#' data.reduction = "umap" +#' ) +#' } +#' dualLabeling <- function (object, samples, marker.1, @@ -568,16 +574,26 @@ dualLabeling <- function (object, } - if (data.reduction=='tsne'|data.reduction=='umap') { + if (data.reduction=='tsne') { result.list <- list("object" = so.sub, + "data"=list("plot_table" = g), "plots"=list( - "plot" = grob, - "plot_densityHM" = grobHM, - "plot_table" = g) + 'tsne' = grob, + "densityHM" = grobHM) ) - } else if (data.reduction=='both'){ + }else if (data.reduction=='umap') { + + result.list <- list("object" = so.sub, + "data"=list("plot_table" = g), + "plots"=list( + 'umap' = grob, + "densityHM" = grobHM) + ) + + + }else if (data.reduction=='both'){ result.list <- list("object" = so.sub, "data"=list("plot_table" = g), diff --git a/R/Filter_QC.R b/R/Filter_QC.R index e9a13a7..9592e76 100755 --- a/R/Filter_QC.R +++ b/R/Filter_QC.R @@ -1,6 +1,7 @@ -#' @title Filter & QC Samples -#' @description Filters cells and Genes for each sample and generates QC Plots -#' to evaluate data before and after filtering. +#' @title Filter Low Quality Cells (CCBR scRNA-seq) +#' @description Filters cells and genes across various criteria for each sample. +#' Multiple cell and gene filters can be selected to remove poor quality data +#' and noise while generating QC plots before and after filtering. #' @details This is Step 2 in the basic Single-Cell RNA-seq workflow. Multiple #' cell and gene filters can be selected to remove poor quality data and noise. #' Workflows can use this downstream of any Seurat Object. This tool is @@ -71,18 +72,18 @@ #' Usage c(lower limit, Upper Limit). E.g. setting to c(NA,50) will not set a #' lower limit and remove cells with greater than 50% of reads in the top N #' genes. (Default: c(NA,NA)) -#' @param mad.topNgenes.limitsSet Filter limits based on how many Median +#' @param mad.topNgenes.limits Filter limits based on how many Median #' Absolute Deviations an outlier cell will have. Calculated from the Median #' percentage of counts in the top N Genes. #' Usage c(lower limit, Upper Limit). E.g. setting to c(5,5) will remove all #' cells with more than 5 absolute deviations greater than or 5 absolute #' deviations less than the median percentage. (Default: c(5,5)) -#' @param n.topgnes Select the number of top highly expressed genes used to +#' @param n.topgenes Select the number of top highly expressed genes used to #' calculate the percentage of reads found in these genes. #' E.g. a value of 20 calculates the percentage of reads found in the top 20 #' most highly expressed Genes. #' (Default: 20) -#' @param do.doublets.fitler Use scDblFinder to identify and remove doublet +#' @param do.doublets.filter Use scDblFinder to identify and remove doublet #' cells. Doublets are defined as two cells that are sequenced under the same #' cellular barcode, for example, if they were captured in the same droplet. #' (Default: TRUE) @@ -104,12 +105,22 @@ #' @importFrom stringr str_split_fixed #' @importFrom stats mad median #' @importFrom grid grobHeight textGrob grid.newpage gTree grid.draw - +#' #' #' @export #' #' @return Seurat Object and QC plots - +#' +#' @examples +#' \dontrun{ +#' out <- filterQC( +#' object = so_list, +#' min.cells = 20, +#' n.topgenes = 20, +#' do.doublets.filter = TRUE +#' ) +#' } +#' filterQC <- function(object, ## Filter Samples @@ -125,8 +136,8 @@ filterQC <- function(object, mad.complexity.limits = c(5,NA), topNgenes.limits = c(NA,NA), mad.topNgenes.limits = c(5,5), - n.topgnes=20, - do.doublets.fitler=T, + n.topgenes=20, + do.doublets.filter=TRUE, ## dim Reduction settings plot.outliers="None", #options(None,UMAP,tSNE) @@ -138,7 +149,7 @@ filterQC <- function(object, high.cut.disp = 100000, selection.method = "vst", npcs = 30, - vars_to_regress=NULL, + vars.to.regress=NULL, seed.for.PCA = 42, seed.for.TSNE = 1, seed.for.UMAP = 42 @@ -155,7 +166,7 @@ filterQC <- function(object, ### Helper Functions ##### - .topNGenes <- function(so,n.topgnes) { + .topNGenes <- function(so,n.topgenes) { ##Extract counts table counts_matrix = GetAssayData(so, slot="counts") @@ -163,7 +174,7 @@ filterQC <- function(object, tbl= apply(counts_matrix,2,function(i){ cnts=i[order(i,decreasing=T)] - t20=sum(cnts[1:n.topgnes]) + t20=sum(cnts[1:n.topgenes]) total=sum(cnts) pertop20=(t20/total)*100 @@ -214,7 +225,7 @@ filterQC <- function(object, .plotViolin2=function(count.df,value){ axis.lab = unique(count.df$filt) ylabs=gsub(" \\(", "\n\\(",value) - ylabs=gsub(paste0(" Top",n.topgnes), paste0("\nTop",n.topgnes),ylabs) + ylabs=gsub(paste0(" Top",n.topgenes), paste0("\nTop",n.topgenes),ylabs) ### Set up table fore cut off lines ## clean up cutoff values @@ -301,7 +312,7 @@ filterQC <- function(object, # count.df$filt=factor(count.df$filt,levels = c('filt','raw')) count.df$filt=factor(count.df$filt,levels = c('raw','filt')) ylabs=gsub(" \\(", "\n\\(",value) - ylabs=gsub(paste0(" Top",n.topgnes), paste0("\nTop",n.topgnes),ylabs) + ylabs=gsub(paste0(" Top",n.topgenes), paste0("\nTop",n.topgenes),ylabs) ### Set up table fore cut off lines ## clean up cutoff values @@ -469,7 +480,7 @@ filterQC <- function(object, xlab = as.character(xaxis) ylab = as.character(yaxis) ylab=gsub(" \\(", "\n\\(",ylab) - ylab=gsub(paste0(" Top",n.topgnes), paste0("\nTop",n.topgnes),ylab) + ylab=gsub(paste0(" Top",n.topgenes), paste0("\nTop",n.topgenes),ylab) name = paste(ylab,"vs.",xlab) g =ggplot(count.df, aes(x=.data[[xaxis]], y=.data[[yaxis]],color = Sample))+ @@ -510,7 +521,7 @@ filterQC <- function(object, .plotViolinPost2=function(count.df,yaxis){ axis.lab = unique(count.df$Sample) ylabs=gsub(" \\(", "\n\\(",yaxis) - ylabs=gsub(paste0(" Top",n.topgnes), paste0("\nTop",n.topgnes),ylabs) + ylabs=gsub(paste0(" Top",n.topgenes), paste0("\nTop",n.topgenes),ylabs) g=ggplot(count.df, aes(x=Sample, y=(.data[[yaxis]]))) + @@ -548,7 +559,7 @@ filterQC <- function(object, if(plot.outliers!="none"){ so.nf.qcFiltr <- SCTransform(so.nf,do.correct.umi = TRUE, - vars.to.regress=vars_to_regress, + vars.to.regress=vars.to.regress, return.only.var.genes = FALSE) so.nf.qcFiltr = FindVariableFeatures(object = so.nf.qcFiltr, nfeatures = nfeatures, @@ -625,7 +636,7 @@ filterQC <- function(object, ## Caluclate filter Metrics ## calculate Counts in Top 20 Genes - so=.topNGenes(so,n.topgnes) + so=.topNGenes(so,n.topgenes) ## Counts(umi) Filter mad.ncounts.limits=.madCalc(so,'nCount_RNA',mad.ncounts.limits) @@ -700,7 +711,7 @@ filterQC <- function(object, ) ## doublets Filter - if(do.doublets.fitler==T){ + if(do.doublets.filter==T){ doublets.fitler <- so@meta.data$Doublet%in%"singlet" }else{ doublets.fitler=rep(TRUE,nrow(so@meta.data)) @@ -749,7 +760,7 @@ filterQC <- function(object, filtSum[,"Cells before Filtering"]=nrow(filter_matrix) filtSum[,"Cells after all Filters"]=sum(filterIndex) filtSum[,"Percent Remaining"]=perc.remain - topN.filterRename=paste0('% Counts in Top',n.topgnes,' Genes filter') + topN.filterRename=paste0('% Counts in Top',n.topgenes,' Genes filter') filtTbl=colSums(filter_matrix==F)%>%t()%>%as.data.frame() filtTbl=rename(filtTbl, 'UMI Count (nCount_RNA)' = 'ncounts.filter', @@ -767,7 +778,7 @@ filterQC <- function(object, ########################################################## # ## create Filter Limits table - topN.filterRename=paste0('% Counts in Top',n.topgnes,' Genes') + topN.filterRename=paste0('% Counts in Top',n.topgenes,' Genes') cat('VDJ Genes Removed: ',length(VDJgenesOut), '\n') cat('Minimum Cells per Gene: ',min.cells,'\n') cat('UMI Count (nCount_RNA) Limits: ',ncounts.limits,'\n') @@ -780,7 +791,7 @@ filterQC <- function(object, cat('MAD Complexity (log10GenesPerUMI) Limits: ',mad.complexity.limits,'\n') cat(topN.filterRename,' Limits: ',topNgenes.limits,'\n') cat('MAD ',topN.filterRename,' Limits: ',mad.topNgenes.limits,'\n') - cat('Doublets Filter: ',do.doublets.fitler,'\n') + cat('Doublets Filter: ',do.doublets.filter,'\n') @@ -821,7 +832,7 @@ filterQC <- function(object, paste0(c("Low:","High:"),topNgenes.limits)%>%paste(collapse = "\n") FiltLmts[,paste0('MAD ',topN.filterRename,'')]= paste0(c("Low:","High:"),mad.topNgenes.limits)%>%paste(collapse = "\n") - FiltLmts[,'DoubletFinder (scDblFinder)']=do.doublets.fitler + FiltLmts[,'DoubletFinder (scDblFinder)']=do.doublets.filter rownames(FiltLmts)=i ### Apply Filters #### @@ -878,13 +889,13 @@ filterQC <- function(object, ## calculate Counts in Top 20 Genes ##calculated after min.cell filter as well - so=.topNGenes(so,n.topgnes) + so=.topNGenes(so,n.topgenes) ## Annotate Doublets: #### ## Gene filter does not effect doublet ident and so not recalculated - if( do.doublets.fitler==T){ + if( do.doublets.filter==T){ sce <- as.SingleCellExperiment(so) set.seed(123) @@ -995,7 +1006,7 @@ filterQC <- function(object, table.meta$nFeature_RNA=as.numeric(table.meta$nFeature_RNA) table.meta$filt=factor(table.meta$filt,levels = c('raw','filt')) - topN.filterRename=paste0('% Counts in Top',n.topgnes,' Genes') + topN.filterRename=paste0('% Counts in Top',n.topgenes,' Genes') table.meta=rename(table.meta, 'UMI Count (nCount_RNA)' = 'nCount_RNA', diff --git a/R/Filter_Seurat_Object_by_Metadata.R b/R/Filter_Seurat_Object_by_Metadata.R index 2d76180..7b4a7a6 100644 --- a/R/Filter_Seurat_Object_by_Metadata.R +++ b/R/Filter_Seurat_Object_by_Metadata.R @@ -1,5 +1,6 @@ -#' @title Filter Seurat Object by Metadata -#' @description Filter and subset your Seurat object based on metadata column +#' @title Subset Seurat Object [CCBR] [scRNA-seq] +#' @description This function subsets your Seurat object by selecting a +#' metadata column and values matching the cells to pass forward in analysis. #' @details This is a downstream template that should be loaded after #' Step 5 of the pipeline (SingleR Annotations on Seurat Object) #' @@ -32,7 +33,7 @@ #' which have been highlighted. Default is 0.5 #' @param use.cite.seq.data TRUE if you would like to plot Antibody clusters #' from CITEseq instead of scRNA. - +#' #' #' @import Seurat #' @import ggplot2 @@ -49,8 +50,19 @@ #' @export #' #' @return a subset Seurat object - - +#' +#' @examples +#' \dontrun{ +#' out <- filterSeuratObjectByMetadata( +#' object = anno_so, +#' samples.to.include = c("sample1", "sample2"), +#' sample.name = "orig.ident", +#' category.to.filter = "celltype", +#' values.to.filter = c("T cell", "B cell") +#' ) +#' } +#' +#' filterSeuratObjectByMetadata <- function(object, samples.to.include, sample.name, diff --git a/R/Harmony.R b/R/Harmony.R index 85401dc..619eb1f 100644 --- a/R/Harmony.R +++ b/R/Harmony.R @@ -5,13 +5,16 @@ #' (SCT scale.data) to obtain PCA embeddings. Performs harmony on #' decomposed embedding and adjusts decomposed gene expression values #' by harmonized embedding. -#' @param seurat_object Seurat-class object +#' @param object Seurat-class object containing gene expression data and +#' metadata with the batch variable. #' @param nvar Number of variable genes to subset the gene expression data by #' (Default: 2000) #' @param genes.to.add Add genes that might not be found among variably #' expressed genes #' @param group.by.var Which variable should be accounted for when running #' batch correction +#' @param return.lognorm Logical; if TRUE, retain log-normalized assay behavior +#' in downstream handling. (Default: TRUE) #' @param npc Number of principal components to use when running Harmony #' (Default: 20) @@ -20,6 +23,7 @@ #' @import gridExtra #' @import RColorBrewer #' @import ggplot2 +#' @importFrom patchwork plot_layout #' #' @export #' @examples @@ -36,20 +40,13 @@ #' @return A list: adj.object with harmony-adjusted gene expression (SCT slot) #' adj.tsne: harmonized tSNE plot -object = readRDS('tests/testthat/fixtures/BRCA/BRCA_Combine_and_Renormalize_SO_downsample.rds') - harmonyBatchCorrect <- function(object, nvar = 2000, genes.to.add = c(), group.by.var, - return_lognorm = T, + return.lognorm = T, npc = 30) { -library(patchwork) -library(harmony) -library(Seurat) -library(ggplot2) -library(RColorBrewer) # Error and Warning Messages if(is.null(genes.to.add)){ @@ -145,14 +142,10 @@ library(RColorBrewer) object@reductions$pca@stdev <- pppca$d # Store original log-normalized data and scaling parameters for back-calculation - if (return_lognorm) { + if (return.lognorm) { library(Matrix) # Get log-normalized data for the variable features lognorm_data <- object@assays$SCT@data[mvf, , drop = FALSE] - print(str(object)) - print("hello") - print(class(lognorm_data)) - print(dim(lognorm_data)) # Calculate scaling parameters from the original scaled data #scale_center <- Matrix::rowMeans(lognorm_data) @@ -216,16 +209,17 @@ library(RColorBrewer) guides(colour = guide_legend(override.aes = list(size=5, alpha = 1))) + annotate("text", x = Inf, y = -Inf, label = "Harmonized UMAP", hjust = 1.1, vjust = -1, size = 5) - print((orig.tsne + harm.tsne) + plot_layout(ncol = 2)) - print((orig.umap + harm.umap) + plot_layout(ncol = 2)) - + + tsneComb=(orig.tsne + harm.tsne) + plot_layout(ncol = 2) + umapComb=(orig.umap + harm.umap) + plot_layout(ncol = 2) + # Calculate adjusted gene expression from embeddings harm.embeds <- object@reductions$harmony@cell.embeddings harm.lvl.backcalc.scaled <- harm.embeds %*% t(ppldngs) # Store batch-corrected scaled data in Harmony assay - if (return_lognorm) { + if (return.lognorm) { # Fast conversion back to log-normalized space # Direct vectorized operations on the transposed matrix harm.lvl.backcalc.lognorm <- t(harm.lvl.backcalc.scaled) * scaling_params$scale[mvf] + scaling_params$center[mvf] @@ -237,19 +231,28 @@ library(RColorBrewer) print("Batch-corrected scaled data stored in object@assays$Harmony@scale.data") } - # Insert back-calculated data into seurat + # Insert back-calculated data into seurat + print( "Insert back-calculated data into seurat") object[["Harmony"]] <- CreateAssayObject(data = harm.lvl.backcalc.lognorm) #object[["Harmony"]] <- CreateAssayObject(data = Matrix::Matrix(t(harm.lvl.backcalc.lognorm), sparse = TRUE)) object@assays$Harmony@scale.data <- t(harm.lvl.backcalc.scaled) + print( "Scale Harmony data") object <- ScaleData(object, assay = "Harmony", verbose = FALSE) + print( "re-run PCA on harmony") # re-run PCA on harmony embeddings using top variable genes (mvf) object <- RunPCA(object, assay = "Harmony", verbose = FALSE, features = rownames(object)) object <- FindNeighbors(object, reduction = "harmony", dims = 1:10, assay = "Harmony") + return( - list("object"=object) - ) + list("object"=object, + "plots"=list( + "tsne"=tsneComb, + "umap"=umapComb + ) + ) + ) } diff --git a/R/Heatmap.R b/R/Heatmap.R index 077bea8..0c50340 100755 --- a/R/Heatmap.R +++ b/R/Heatmap.R @@ -7,6 +7,8 @@ #' @param sample.names Sample names #' @param metadata Metadata column to plot #' @param transcripts Transcripts to plot +#' @param use.assay Assay to use for transcript expression values. Choices are +#' "SCT" or "Harmony" (default is "SCT") #' @param proteins Proteins to plot (default is NULL) #' @param heatmap.color Color for heatmap. Choices are "Cyan to Mustard", #' "Blue to Red", "Red to Vanilla", "Violet to Pink", "Bu Yl Rd", @@ -27,8 +29,8 @@ #' @param trim.outliers Remove outlier data (default is TRUE) #' @param trim.outliers.percentage Set outlier percentage (default is 0.01) #' @param order.heatmap.rows Order heatmap rows (default is FALSE) -#' @param row.order Gene vector to set row order. If NULL, use cluster order -#' (default is NULL) +#' @param row.order Gene vector to set row order. If empty, use cluster order +#' (default is empty vector) #' #' @import Seurat #' @importFrom ComplexHeatmap pheatmap @@ -47,11 +49,21 @@ #' @return This function returns a heatmap plot and the data underlying the #' heatmap. #' +#' @examples +#' \dontrun{ +#' out <- heatmapSC( +#' object = anno_so, +#' sample.names = c("sample1", "sample2"), +#' metadata = "celltype", +#' transcripts = c("CD3D", "MS4A1") +#' ) +#' } +#' heatmapSC <- function(object, sample.names, metadata, transcripts, - use_assay = 'SCT', + use.assay = 'SCT', proteins = NULL, heatmap.color = "Bu Yl Rd", plot.title = "Heatmap", @@ -314,21 +326,21 @@ heatmapSC <- function(object, df.mat1 = NULL if (length(transcripts) > 0) { if (length(transcripts) == 1) { - if(use_assay == 'SCT'){ + if(use.assay == 'SCT'){ df.mat1 <- vector(mode = "numeric", length = length(object$SCT@scale.data[transcripts,])) df.mat1 <- object$SCT@scale.data[transcripts,] - } else if (use_assay == 'Harmony'){ + } else if (use.assay == 'Harmony'){ df.mat1 <- vector(mode = "numeric", length = length(object$Harmony@scale.data[transcripts,])) df.mat1 <- object$Harmony@scale.data[transcripts,] } } else { - if(use_assay == 'SCT'){ + if(use.assay == 'SCT'){ df.mat1 <- as.matrix(object$SCT@scale.data[transcripts,]) - } else if (use_assay == 'Harmony'){ + } else if (use.assay == 'Harmony'){ df.mat1 <- as.matrix(object$Harmony@scale.data[transcripts,]) } } diff --git a/R/ModuleScore.R b/R/ModuleScore.R index ed57712..4839763 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -11,11 +11,13 @@ #' as the column names, and marker(s) as the entries #' in each column. #' Requires SCT@data to be present within Seurat Object -#' @param use_columns Select specific columns within Marker Table to analyze. +#' @param group.var Metadata column used for grouping in diagnostic plots. +#' (Default: "orig.ident") +#' @param use.columns Select specific columns within Marker Table to analyze. #' Markers from unselected columns won't be included. -#' @param ms_threshold Allow user-specified module score thresholds. +#' @param ms.threshold Allow user-specified module score thresholds. #' Provide one threshold for each Celltype you included -#' in the "use_columns" parameter. +#' in the "use.columns" parameter. #' For each Celltype, provide the Celltype name, #' then a space, then type your threshold for that Celltype. #' This threshold must be a number between 0.0 and 1.0. @@ -68,6 +70,7 @@ #' @import grid #' @import data.table #' @import utils +#' @import stringr str_split #' @importFrom dplyr select #' #' @export @@ -76,8 +79,8 @@ #' modScore( #' object = seuratObject, #' marker.table = immuneCellMarkers, -#' use_columns = c("CD4_T", "Treg", "Monocytes"), -#' ms_threshold = c("CD4_T 0.1", "Treg 0.4", "Monocytes 0.3"), +#' use.columns = c("CD4_T", "Treg", "Monocytes"), +#' ms.threshold = c("CD4_T 0.1", "Treg 0.4", "Monocytes 0.3"), #' general.class = c("CD4_T", "Monocytes"), #' multi.lvl = FALSE #' ) @@ -85,8 +88,8 @@ #' modScore( #' object = seuratObject, #' marker.table = immuneCellMarkers, -#' use_columns = c("CD4_T", "Treg", "Monocytes"), -#' ms_threshold = c("CD4_T 0.1", "Treg 0.4", "Monocytes 0.3"), +#' use.columns = c("CD4_T", "Treg", "Monocytes"), +#' ms.threshold = c("CD4_T 0.1", "Treg 0.4", "Monocytes 0.3"), #' general.class = c("CD4_T", "Monocytes"), #' multi.lvl = TRUE, #' lvl.df = parentChildTable @@ -99,9 +102,9 @@ modScore <- function(object, marker.table, - group_var = "orig.ident", - use_columns, - ms_threshold, + group.var = "orig.ident", + use.columns, + ms.threshold, general.class, multi.lvl = FALSE, lvl.df=NULL, @@ -111,18 +114,12 @@ modScore <- function(object, violin.ft.size = 6, step.size = 0.1) { - library(Seurat) - library(gridExtra) - library(grid) - library(dplyr) - library(stringr) - library(ggplot2) # Function for separating and calling cells by bimodal thresholds - .modScoreCall <- function(ms.meta, numeric_threshold, reject) { + .modScoreCall <- function(ms.meta, numeric.threshold, reject) { thres.ls <- list() for (i in 1:ncol(ms.meta)) { - thres.ls[[i]] <- rep(numeric_threshold[i], nrow(ms.meta)) + thres.ls[[i]] <- rep(numeric.threshold[i], nrow(ms.meta)) } thres.df <- data.frame(matrix(unlist(thres.ls), nrow = nrow(ms.meta))) thres.filter <- ms.meta > thres.df @@ -138,7 +135,7 @@ modScore <- function(object, # Upstream processing # String split celltype_thresholds - numeric portion - numeric_threshold <- sapply(stringr::str_split(ms_threshold, " "), function(x) as.numeric(x[2])) + numeric.threshold <- sapply(stringr::str_split(ms.threshold, " "), function(x) as.numeric(x[2])) if (!"Barcode" %in% colnames(object@meta.data)) { object@meta.data$Barcode <- rownames(object@meta.data) @@ -147,12 +144,12 @@ modScore <- function(object, colnames(object@meta.data)) # Marker table processing - marker.table <- marker.table[,use_columns] + marker.table <- marker.table[,use.columns] marker.tab <- unlist(marker.table) - celltypes <- sapply(str_split(ms_threshold, " "), function(x) as.character(x[1])) + celltypes <- sapply(str_split(ms.threshold, " "), function(x) as.character(x[1])) - if (any(!celltypes %in% use_columns)){ - unmatched_celltypes <- celltypes[!celltypes %in% use_columns] + if (any(!celltypes %in% use.columns)){ + unmatched_celltypes <- celltypes[!celltypes %in% use.columns] celltype_mismatch_message <- paste0("Labels from thresholds does not match columns from marker table: ",paste(unmatched_celltypes, collapse = ", ")) stop(celltype_mismatch_message) } @@ -163,9 +160,9 @@ modScore <- function(object, 0) { stop("No genes from list was found in data") } - if (length(numeric_threshold) != length(celltypes)) { - if (sum(numeric_threshold) == 0) { - numeric_threshold <- rep(0, length(celltypes)) + if (length(numeric.threshold) != length(celltypes)) { + if (sum(numeric.threshold) == 0) { + numeric.threshold <- rep(0, length(celltypes)) print("Module Score threshold set to zero - outputing preliminary data") } else { stop("Threshold length does not match # celltypes to analyze") @@ -173,7 +170,7 @@ modScore <- function(object, } # For each celltype, print out present / nonpresent genes, calculate MS and generate plots - names(numeric_threshold) <- celltypes + names(numeric.threshold) <- celltypes figures <- list() exclude_cells <- c() h = 0 @@ -238,26 +235,26 @@ modScore <- function(object, umap.pos <- clusmat %>% group_by(clusid) %>% dplyr::summarise(umap1.mean = mean(umap1), umap2.mean = mean(umap2)) title = as.character(m) clusmat <- clusmat %>% dplyr::arrange(clusid) - clusid.df <- data.frame(id = object@meta.data[[group_var]], + clusid.df <- data.frame(id = object@meta.data[[group.var]], ModuleScore = object@meta.data[[m]]) g <- ggplot(clusmat, aes(x = umap1, y = umap2)) + theme_bw() + theme(legend.title = element_blank()) + geom_point(aes(colour = sample_clusid), alpha = 0.5, shape = 20, size = 1) + scale_color_gradientn(colours = c("blue4", "lightgrey", "red"), values = scales::rescale(c(0, - numeric_threshold[celltype_name]/2, numeric_threshold[celltype_name], (numeric_threshold[celltype_name] + 1)/2, + numeric.threshold[celltype_name]/2, numeric.threshold[celltype_name], (numeric.threshold[celltype_name] + 1)/2, 1), limits = c(0, 1))) + guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) + xlab("tsne-1") + ylab("tsne-2") - g1 <- RidgePlot(object, features = m, group.by = group_var) + + g1 <- RidgePlot(object, features = m, group.by = group.var) + theme(legend.position = "none", title = element_blank(), axis.text.x = element_text(size = gradient.ft.size)) + - geom_vline(xintercept = numeric_threshold[celltype_name], linetype = "dashed", + geom_vline(xintercept = numeric.threshold[celltype_name], linetype = "dashed", color = "red3") + scale_x_continuous(breaks = seq(0, 1, step.size)) g2 <- ggplot(clusid.df, aes(x = id, y = ModuleScore)) + geom_violin(aes(fill = id)) + theme_classic() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.title = element_blank(), panel.background = element_blank(), axis.text.x = element_blank(), legend.text = element_text(size = rel(0.8)), legend.position = "top", axis.text.y = element_text(size = violin.ft.size)) + guides(colour = guide_legend(override.aes = list(size = 5, - alpha = 1))) + geom_hline(yintercept = numeric_threshold[celltype_name], + alpha = 1))) + geom_hline(yintercept = numeric.threshold[celltype_name], linetype = "dashed", color = "red3") + scale_y_continuous(breaks = seq(0, 1, step.size)) g3 <- ggplot(data.frame(x = d$x, y = d$y), aes(x, y)) + @@ -265,9 +262,9 @@ modScore <- function(object, geom_segment(aes(xend = d$x, yend = 0, colour = x)) + scale_y_log10() + scale_color_gradientn(colours = c("blue4", "lightgrey", "red"), values = scales::rescale(c(0, - numeric_threshold[celltype_name]/2, numeric_threshold[celltype_name], (numeric_threshold[celltype_name] + 1)/2, - 1), limits = c(0, 1))) + geom_vline(xintercept = numeric_threshold[celltype_name], - linetype = "dashed", color = "red3") + geom_vline(xintercept = numeric_threshold[celltype_name], linetype = "dashed", color = "red3") + scale_x_continuous(breaks = seq(0, 1, step.size)) + theme(legend.title = element_blank(), + numeric.threshold[celltype_name]/2, numeric.threshold[celltype_name], (numeric.threshold[celltype_name] + 1)/2, + 1), limits = c(0, 1))) + geom_vline(xintercept = numeric.threshold[celltype_name], + linetype = "dashed", color = "red3") + geom_vline(xintercept = numeric.threshold[celltype_name], linetype = "dashed", color = "red3") + scale_x_continuous(breaks = seq(0, 1, step.size)) + theme(legend.title = element_blank(), axis.text.x = element_text(size = 6)) figures[[celltype_name]] = arrangeGrob(g, g1, g2, g3, ncol = 2, top = textGrob(paste0(celltype_name," (General Class)"), gp = gpar(fontsize = 14, fontface = "bold"))) @@ -281,7 +278,7 @@ modScore <- function(object, # Heirarchical classification: general.class > subtypes general.class <- general.class[general.class %in% colnames(object@meta.data)] trunc.meta.gen <- object@meta.data[general.class] - gen.thrs.vec <- numeric_threshold[general.class] + gen.thrs.vec <- numeric.threshold[general.class] call.res <- .modScoreCall(trunc.meta.gen, gen.thrs.vec, reject = "unknown") call.res$Barcode <- rownames(call.res) @@ -313,14 +310,14 @@ modScore <- function(object, figures <- append(figures, list(NA), after = gap_ind) - figures[[gap_ind+1]] <- ggplot(trunc.meta.parent, aes_string(x = child)) + geom_density() + ggtitle(plot.title) + geom_vline(xintercept = numeric_threshold[child], linetype = "dashed", color = "red3") + theme_classic() + figures[[gap_ind+1]] <- ggplot(trunc.meta.parent, aes_string(x = child)) + geom_density() + ggtitle(plot.title) + geom_vline(xintercept = numeric.threshold[child], linetype = "dashed", color = "red3") + theme_classic() names(figures)[gap_ind+1] <- child } trunc.meta.no.parent <- call.res[!call.res$MS_Celltype == parent, ] non.parent <- rownames(trunc.meta.no.parent) - child.thres.vec <- numeric_threshold[children_class] + child.thres.vec <- numeric.threshold[children_class] sub.class.call[[match(parent, parent.class)]] <- .modScoreCall(trunc.meta.parent, child.thres.vec, reject = parent) %>% select(MS_Celltype)} @@ -338,7 +335,7 @@ modScore <- function(object, return( list("object"=object, - "figures" = figures) + "plots" = figures) ) } - \ No newline at end of file + diff --git a/R/ModuleScoreHelpers.R b/R/ModuleScoreHelpers.R index 539b6d9..2e55b43 100644 --- a/R/ModuleScoreHelpers.R +++ b/R/ModuleScoreHelpers.R @@ -1,19 +1,31 @@ #' @title Helpers for ModuleScore Shiny app #' @description Precompute module scores per celltype and build plots from cached data. +#' @name modscore-imports #' @keywords internal #' @importFrom dplyr mutate group_by summarise arrange select #' @importFrom ggplot2 ggplot aes theme_bw theme element_blank -#' @importFrom geom_point scale_color_gradientn guides guide_legend -#' @importFrom xlab ylab element_text geom_violin theme_classic geom_hline -#' @importFrom scale_y_continuous geom_line geom_segment scale_y_log10 scale_x_continuous -#' @importFrom gridExtra arrangeGrob grid textGrob gpar -NULL - +#' @importFrom ggplot2 geom_point scale_color_gradientn guides guide_legend +#' @importFrom ggplot2 xlab ylab element_text geom_violin theme_classic geom_hline +#' @importFrom ggplot2 scale_y_continuous geom_line geom_segment scale_y_log10 scale_x_continuous +#' @importFrom gridExtra arrangeGrob +#' @importFrom grid textGrob gpar +#' +#' +#' @examples +#' \dontrun{ +#' res <- compute_modscore_data( +#' object = seurat_obj, +#' marker.list = list(Tcell = c("CD3D", "TRBC1")), +#' use.columns = c("Tcell"), +#' reduction = "umap" +#' ) +#' } +#' #' @export -compute_modscore_data <- function(object, marker_list, use_columns, +compute_modscore_data <- function(object, marker.list, use.columns, reduction = c("tsne","umap","pca"), nbins = 10, - group_var = "orig.ident") { + group.var = "orig.ident") { reduction <- match.arg(reduction) if (!"Barcode" %in% colnames(object@meta.data)) { @@ -28,8 +40,8 @@ compute_modscore_data <- function(object, marker_list, use_columns, # Build per-celltype data res <- list() - for (celltype_name in use_columns) { - genes <- marker_list[[celltype_name]] + for (celltype_name in use.columns) { + genes <- marker.list[[celltype_name]] if (is.null(genes)) next object <- Seurat::AddModuleScore(object, list(genes), name = celltype_name, nbin = nbins, assay = "SCT") @@ -58,7 +70,7 @@ compute_modscore_data <- function(object, marker_list, use_columns, coords <- dplyr::mutate(coords, sample_clusid = coords$clusid) coords <- dplyr::arrange(coords, clusid) - clusid_df <- data.frame(id = object@meta.data[[group_var]], + clusid.df <- data.frame(id = object@meta.data[[group.var]], ModuleScore = object@meta.data[[m]], stringsAsFactors = FALSE) @@ -66,7 +78,7 @@ compute_modscore_data <- function(object, marker_list, use_columns, object = object, m = m, coords = coords, - clusid_df = clusid_df, + clusid.df = clusid.df, density = d ) } @@ -75,9 +87,9 @@ compute_modscore_data <- function(object, marker_list, use_columns, } #' @export -build_modscore_plots <- function(object, m, coords, clusid_df, d, threshold, - gradient_ft_size = 6, violin_ft_size = 6, step_size = 0.1, - group_var = "orig.ident", +build_modscore_plots <- function(object, m, coords, clusid.df, d, threshold, + gradient.ft.size = 6, violin.ft.size = 6, step.size = 0.1, + group.var = "orig.ident", reduction = c("tsne","umap","pca")) { reduction <- match.arg(reduction) @@ -91,10 +103,10 @@ build_modscore_plots <- function(object, m, coords, clusid_df, d, threshold, ggplot2::xlab(if (reduction == "tsne") "tsne-1" else if (reduction == "umap") "umap-1" else "pc-1") + ggplot2::ylab(if (reduction == "tsne") "tsne-2" else if (reduction == "umap") "umap-2" else "pc-2") - g1 <- Seurat::RidgePlot(object, features = m, group.by = group_var) + - ggplot2::theme(legend.position = "none", title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = gradient_ft_size)) + + g1 <- Seurat::RidgePlot(object, features = m, group.by = group.var) + + ggplot2::theme(legend.position = "none", title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = gradient.ft.size)) + ggplot2::geom_vline(xintercept = threshold, linetype = "dashed", color = "red3") + - ggplot2::scale_x_continuous(breaks = seq(0, 1, step_size)) + ggplot2::scale_x_continuous(breaks = seq(0, 1, step.size)) g3 <- ggplot2::ggplot(data.frame(x = d$x, y = d$y), ggplot2::aes(x, y)) + ggplot2::xlab("ModuleScore") + ggplot2::ylab("Density") + ggplot2::geom_line() + @@ -102,7 +114,7 @@ build_modscore_plots <- function(object, m, coords, clusid_df, d, threshold, ggplot2::scale_color_gradientn(colours = c("blue4", "lightgrey", "red"), values = scales::rescale(c(0, threshold/2, threshold, (threshold + 1)/2, 1), limits = c(0, 1))) + ggplot2::geom_vline(xintercept = threshold, linetype = "dashed", color = "red3") + - ggplot2::scale_x_continuous(breaks = seq(0, 1, step_size)) + ggplot2::theme(legend.title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = 6)) + ggplot2::scale_x_continuous(breaks = seq(0, 1, step.size)) + ggplot2::theme(legend.title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = 6)) arranged <- gridExtra::arrangeGrob(g, g1, g3, ncol = 2, top = grid::textGrob(m, gp = grid::gpar(fontsize = 14, fontface = "bold"))) list(g = g, g1 = g1, g3 = g3, arranged = arranged) diff --git a/R/ModuleScoreHelpers_011726.R b/R/ModuleScoreHelpers_011726.R index 539b6d9..24a6e82 100644 --- a/R/ModuleScoreHelpers_011726.R +++ b/R/ModuleScoreHelpers_011726.R @@ -1,19 +1,21 @@ #' @title Helpers for ModuleScore Shiny app #' @description Precompute module scores per celltype and build plots from cached data. +#' @name modscore-imports-011726 #' @keywords internal #' @importFrom dplyr mutate group_by summarise arrange select #' @importFrom ggplot2 ggplot aes theme_bw theme element_blank -#' @importFrom geom_point scale_color_gradientn guides guide_legend -#' @importFrom xlab ylab element_text geom_violin theme_classic geom_hline -#' @importFrom scale_y_continuous geom_line geom_segment scale_y_log10 scale_x_continuous -#' @importFrom gridExtra arrangeGrob grid textGrob gpar +#' @importFrom ggplot2 geom_point scale_color_gradientn guides guide_legend +#' @importFrom ggplot2 xlab ylab element_text geom_violin theme_classic geom_hline +#' @importFrom ggplot2 scale_y_continuous geom_line geom_segment scale_y_log10 scale_x_continuous +#' @importFrom gridExtra arrangeGrob +#' @importFrom grid textGrob gpar NULL #' @export -compute_modscore_data <- function(object, marker_list, use_columns, +compute_modscore_data <- function(object, marker.list, use.columns, reduction = c("tsne","umap","pca"), nbins = 10, - group_var = "orig.ident") { + group.var = "orig.ident") { reduction <- match.arg(reduction) if (!"Barcode" %in% colnames(object@meta.data)) { @@ -28,8 +30,8 @@ compute_modscore_data <- function(object, marker_list, use_columns, # Build per-celltype data res <- list() - for (celltype_name in use_columns) { - genes <- marker_list[[celltype_name]] + for (celltype_name in use.columns) { + genes <- marker.list[[celltype_name]] if (is.null(genes)) next object <- Seurat::AddModuleScore(object, list(genes), name = celltype_name, nbin = nbins, assay = "SCT") @@ -58,7 +60,7 @@ compute_modscore_data <- function(object, marker_list, use_columns, coords <- dplyr::mutate(coords, sample_clusid = coords$clusid) coords <- dplyr::arrange(coords, clusid) - clusid_df <- data.frame(id = object@meta.data[[group_var]], + clusid.df <- data.frame(id = object@meta.data[[group.var]], ModuleScore = object@meta.data[[m]], stringsAsFactors = FALSE) @@ -66,7 +68,7 @@ compute_modscore_data <- function(object, marker_list, use_columns, object = object, m = m, coords = coords, - clusid_df = clusid_df, + clusid.df = clusid.df, density = d ) } @@ -75,9 +77,9 @@ compute_modscore_data <- function(object, marker_list, use_columns, } #' @export -build_modscore_plots <- function(object, m, coords, clusid_df, d, threshold, - gradient_ft_size = 6, violin_ft_size = 6, step_size = 0.1, - group_var = "orig.ident", +build_modscore_plots <- function(object, m, coords, clusid.df, d, threshold, + gradient.ft.size = 6, violin.ft.size = 6, step.size = 0.1, + group.var = "orig.ident", reduction = c("tsne","umap","pca")) { reduction <- match.arg(reduction) @@ -91,10 +93,10 @@ build_modscore_plots <- function(object, m, coords, clusid_df, d, threshold, ggplot2::xlab(if (reduction == "tsne") "tsne-1" else if (reduction == "umap") "umap-1" else "pc-1") + ggplot2::ylab(if (reduction == "tsne") "tsne-2" else if (reduction == "umap") "umap-2" else "pc-2") - g1 <- Seurat::RidgePlot(object, features = m, group.by = group_var) + - ggplot2::theme(legend.position = "none", title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = gradient_ft_size)) + + g1 <- Seurat::RidgePlot(object, features = m, group.by = group.var) + + ggplot2::theme(legend.position = "none", title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = gradient.ft.size)) + ggplot2::geom_vline(xintercept = threshold, linetype = "dashed", color = "red3") + - ggplot2::scale_x_continuous(breaks = seq(0, 1, step_size)) + ggplot2::scale_x_continuous(breaks = seq(0, 1, step.size)) g3 <- ggplot2::ggplot(data.frame(x = d$x, y = d$y), ggplot2::aes(x, y)) + ggplot2::xlab("ModuleScore") + ggplot2::ylab("Density") + ggplot2::geom_line() + @@ -102,7 +104,7 @@ build_modscore_plots <- function(object, m, coords, clusid_df, d, threshold, ggplot2::scale_color_gradientn(colours = c("blue4", "lightgrey", "red"), values = scales::rescale(c(0, threshold/2, threshold, (threshold + 1)/2, 1), limits = c(0, 1))) + ggplot2::geom_vline(xintercept = threshold, linetype = "dashed", color = "red3") + - ggplot2::scale_x_continuous(breaks = seq(0, 1, step_size)) + ggplot2::theme(legend.title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = 6)) + ggplot2::scale_x_continuous(breaks = seq(0, 1, step.size)) + ggplot2::theme(legend.title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = 6)) arranged <- gridExtra::arrangeGrob(g, g1, g3, ncol = 2, top = grid::textGrob(m, gp = grid::gpar(fontsize = 14, fontface = "bold"))) list(g = g, g1 = g1, g3 = g3, arranged = arranged) diff --git a/R/Name_Clusters_by_Enriched_Cell_Type.R b/R/Name_Clusters_by_Enriched_Cell_Type.R index 52a1a08..aa7af4d 100644 --- a/R/Name_Clusters_by_Enriched_Cell_Type.R +++ b/R/Name_Clusters_by_Enriched_Cell_Type.R @@ -7,13 +7,16 @@ #' #' @param object Seurat-class object with cluster IDs column and cell type #' column present -#' @param cluster.numbers Vector containing cluster numbers that match the -#' (numeric) cluster ID's in the cluster.column in Seurat Object metadata -#' @param cluster.names Vector containing custom cluster labels #' @param cluster.column Column name containing cluster ID in the metadata slot #' in the object #' @param labels.column Column name containing labels (usually cell type) in the #' metadata slot in the object +#' @param cluster.identities.table Data frame containing cluster IDs and custom +#' cluster labels +#' @param cluster.numbers Column name in cluster.identities.table +#' containing cluster numbers that match values in cluster.column +#' @param cluster.names Column name in cluster.identities.table +#' containing custom cluster labels #' @param order.clusters.by Vector containing order of clusters in graph. Can #' contain a subset of cluster numbers to plot that match at least some of #' the values in the cluster.column. If NULL, use default order @@ -36,13 +39,29 @@ #' @export #' @return Returns Seurat-class object with updated meta.data slot containing #' custom cluster annotation and a plot +#' +#' @examples +#' \dontrun{ +#' map_tbl <- data.frame( +#' cluster_id = c("0", "1"), +#' label = c("T cell", "B cell") +#' ) +#' out <- nameClusters( +#' object = anno_so, +#' cluster.column = "seurat_clusters", +#' labels.column = "celltype", +#' cluster.identities.table = map_tbl, +#' cluster.numbers = "cluster_id", +#' cluster.names = "label" +#' ) +#' } nameClusters <- function(object, + cluster.column, + labels.column, cluster.identities.table, cluster.numbers, cluster.names, - cluster.column, - labels.column, order.clusters.by = NULL, order.celltypes.by = NULL, interactive = FALSE) diff --git a/R/Plot_Metadata.R b/R/Plot_Metadata.R index 2003c62..3f591de 100644 --- a/R/Plot_Metadata.R +++ b/R/Plot_Metadata.R @@ -41,19 +41,31 @@ #' @export #' #' @return a data.frame extracted from the Seurat object and plot +#' +#' @examples +#' \dontrun{ +#' out <- plotMetadata( +#' object = anno_so, +#' samples.to.include = c("sample1", "sample2"), +#' metadata.to.plot = c("celltype", "orig.ident"), +#' columns.to.summarize = c(), +#' reduction.type = "umap" +#' ) +#' } -plotMetadata <- function(#Basic Parameters: - object, - samples.to.include, - metadata.to.plot, - columns.to.summarize, - summarization.cut.off = 5, - reduction.type = "tsne", - use.cite.seq = FALSE, - show.labels = FALSE, - legend.text.size = 1, - legend.position = "right", - dot.size = 0.01 +plotMetadata <- function( + #Basic Parameters: + object, + samples.to.include, + metadata.to.plot, + columns.to.summarize, + summarization.cut.off = 5, + reduction.type = "tsne", + use.cite.seq = FALSE, + show.labels = FALSE, + legend.text.size = 1, + legend.position = "right", + dot.size = 0.01 ) { ################### @@ -358,15 +370,14 @@ plotMetadata <- function(#Basic Parameters: summarize.cut.off <- min(summarization.cut.off, 20) # checking for samples included: - if(any(grepl('c\\(|\\[\\]',samples))) { - samples = eval(parse(text = gsub('\\[\\]', 'c()', samples))) - }else{ - samples=samples + samples <- samples.to.include + if (is.character(samples) && any(grepl('c\\(|\\[\\]', samples))) { + samples <- eval(parse(text = gsub('\\[\\]', 'c()', samples))) } if (length(samples) == 0) { print("No samples specified. Using all samples...") - samples = unique(object@meta.data$sample_name) + samples = unique(object@meta.data$orig.ident) } ## Goal is to have column 1 of the new metadata be named "orig.ident" @@ -416,7 +427,7 @@ plotMetadata <- function(#Basic Parameters: # checking metadata for sanity - if(any(grepl('c\\(|\\[\\]',samples))) { + if (is.character(metadata.to.plot) && any(grepl('c\\(|\\[\\]', metadata.to.plot))) { m = eval(parse(text = gsub('\\[\\]', 'c()', metadata.to.plot))) }else{ m=metadata.to.plot @@ -452,7 +463,7 @@ plotMetadata <- function(#Basic Parameters: col <- meta.df[[i]] val.count <- length(unique(col)) - if ((val.count >= summarizeCutOff) & + if ((val.count >= summarize.cut.off) & (i != 'Barcode') & (!is.element(class(meta.df[[i]][1]), c("numeric", "integer")))) { freq.vals <- as.data.frame(-sort(-table(col)))$col[1:summarize.cut.off] diff --git a/R/Process_Raw_Data.R b/R/Process_Raw_Data.R index 4ba759d..43a8176 100755 --- a/R/Process_Raw_Data.R +++ b/R/Process_Raw_Data.R @@ -52,6 +52,15 @@ #' @export #' #' @return Seurat Object and QC plots +#' +#' @examples +#' \dontrun{ +#' out <- processRawData( +#' input = c("sample1_filtered_feature_bc_matrix.h5"), +#' organism = "Human", +#' do.normalize.data = TRUE +#' ) +#' } processRawData <- function(input, sample.metadata.table=NULL, diff --git a/R/Violin_Plots_by_Metadata.R b/R/Violin_Plots_by_Metadata.R index 3a0f580..2b81d02 100644 --- a/R/Violin_Plots_by_Metadata.R +++ b/R/Violin_Plots_by_Metadata.R @@ -9,16 +9,17 @@ #' @param layer Slot to extract gene expression data from (Default: scale.data) #' @param genes Genes to visualize on the violin plot #' @param group Split violin plot based on metadata group -#' @param facet_by Split violin plot based on a second metadata group -#' @param filter_outliers Filter outliers from the data (TRUE/FALSE) -#' @param outlier_low Filter lower bound outliers (Default = 0.05) -#' @param outlier_high Filter upper bound outliers (Default = 0.95) -#' @param jitter_points Scatter points on the plot (TRUE/FALSE) -#' @param jitter_dot_size Set size of individual points - +#' @param facet.by Split violin plot based on a second metadata group +#' @param filter.outliers Filter outliers from the data (TRUE/FALSE) +#' @param outlier.low Filter lower bound outliers (Default = 0.05) +#' @param outlier.high Filter upper bound outliers (Default = 0.95) +#' @param jitter.points Scatter points on the plot (TRUE/FALSE) +#' @param jitter.dot.size Set size of individual points +#' #' @import Seurat #' @import reshape2 #' @import tidyverse +#' @importFrom tidyr pivot_longer #' @import cowplot #' @import rlang #' @import ggplot2 @@ -26,41 +27,35 @@ #' @export #' @examples #' \dontrun{ -#' violinPlot_mod( +#' violinPlot( #' object = seurat, #' assay = "SCT", #' layer = "data", #' genes = c("Cd4", "Cd8a"), #' group = "celltype", -#' facet_by = "orig.ident", -#' filter_outliers = TRUE, -#' jitter_points = TRUE, -#' jitter_dot_size = 0.5 +#' facet.by = "orig.ident", +#' filter.outliers = TRUE, +#' jitter.points = TRUE, +#' jitter.dot.size = 0.5 #' ) #' } #' @return violin ggplot2 object -violinPlot_mod <- function (object, +violinPlot <- function (object, assay, layer, genes, group, - facet_by = "", - filter_outliers = F, - outlier_low = 0.05, - outlier_high = 0.95, - jitter_points, - jitter_dot_size) + facet.by = "", + filter.outliers = F, + outlier.low = 0.05, + outlier.high = 0.95, + jitter.points, + jitter.dot.size) { - library(Seurat) - library(ggplot2) - library(gridExtra) - library(tidyr) - library(dplyr) - library(broom) - - facet_data = facet_by != "" + + facet_data = facet.by != "" # for handling orig ident if (group == "orig.ident" | group == "orig_ident"){ @@ -96,7 +91,7 @@ violinPlot_mod <- function (object, genes.present <- genes[genes %in% rownames(gene_mtx)] if(facet_data){ - meta_sub <- object@meta.data[,c(group,facet_by)] + meta_sub <- object@meta.data[,c(group,facet.by)] } else { meta_sub <- object@meta.data[c(group)] } @@ -111,7 +106,7 @@ violinPlot_mod <- function (object, data_df$Gene <- factor(data_df$Gene, levels = genes.present) if(facet_data){ - unique_facets <- unique(object@meta.data[,facet_by]) + unique_facets <- unique(object@meta.data[,facet.by]) } else{ unique_facets <- NULL } @@ -126,18 +121,18 @@ violinPlot_mod <- function (object, # Define the outlier removal function .removeOutliers <- function(x, na.rm = TRUE){ - qnt <- quantile(x, probs = c(outlier_low, outlier_high), na.rm = na.rm) + qnt <- quantile(x, probs = c(outlier.low, outlier.high), na.rm = na.rm) H <- 1.5 * IQR(x, na.rm = na.rm) x[x < (qnt[1] - H) | x > (qnt[2] + H)] <- NA x } # Apply only if filtering is enabled - if (filter_outliers) { - group_vars <- colnames(data_df)[colnames(data_df) != 'Expression'] + if (filter.outliers) { + group.vars <- colnames(data_df)[colnames(data_df) != 'Expression'] data_df <- data_df %>% - group_by(across(all_of(group_vars))) %>% + group_by(across(all_of(group.vars))) %>% mutate(Expression = .removeOutliers(Expression)) %>% ungroup() } @@ -148,7 +143,7 @@ violinPlot_mod <- function (object, color_mapping <- setNames(rep(available_colors, length.out = length(unique_facets)), unique_facets) # Set up the common elements of the plot - g <- ggplot(data_df, aes(x = .data[[group]], y = Expression, fill = .data[[facet_by]])) + + g <- ggplot(data_df, aes(x = .data[[group]], y = Expression, fill = .data[[facet.by]])) + geom_violin(scale = "width", position = position_dodge(width = 0.9), trim = TRUE) + geom_boxplot(width = 0.2, position = position_dodge(width = 0.9), outlier.shape = NA) + scale_fill_manual(values = color_mapping) + @@ -180,9 +175,9 @@ violinPlot_mod <- function (object, } # Add jitter points conditionally - if (jitter_points) { - g <- g + geom_jitter(size = jitter_dot_size, shape = 1, position = position_dodge(width = 0.9), alpha = 0.5) + if (jitter.points) { + g <- g + geom_jitter(size = jitter.dot.size, shape = 1, position = position_dodge(width = 0.9), alpha = 0.5) } - return(g) + return(list("plots"=g)) } diff --git a/README.md b/README.md index f2e1e03..34311d6 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,22 @@ R package for Single Cell analysis The Single Cell Workflow streamlines the analysis of multimodal Single Cell RNA-Seq data produced from 10x Genomics. It can be run in a docker container, and for biologists, in user-friendly web-based interactive notebooks (NIDAP, Palantir Foundry). Much of it is based on the Seurat workflow in Bioconductor, and supports CITE-Seq data. It incorporates a cell identification step (ModScore) that utilizes module scores obtained from Seurat and also includes Harmony for batch correction. +## Key Functions + +### Sequential Workflow +1. **processRawData()** - Process H5 files into Seurat objects +2. **filterQC()** - Quality control and filtering +3. **combineNormalize()** - Merge samples, normalize, dimension reduction +4. **Harmony integration** (optional) - Batch correction +5. **annotateCellTypes()** - Automatic cell type annotation via SingleR + +### Analysis & Visualization +- **compareCellPopulations()** - Compare cell population distributions across groups +- **degGeneExpressionMarkers()** - Differential expression analysis +- **reclusterSeuratObject()** / **reclusterFilteredSeuratObject()** - Subset and re-cluster +- **colorByGene()**, **heatmapSC()**, **violinPlot()** - Visualization functions +- **plotMetadata()**, **dotPlotMet()** - Metadata visualization +
For further documentation see our detailed [Docs Website](https://nidap-community.github.io/SCWorkflow/) diff --git a/docs/CHANGELOG.html b/docs/CHANGELOG.html index cdf2cd9..d7caeea 100644 --- a/docs/CHANGELOG.html +++ b/docs/CHANGELOG.html @@ -44,6 +44,19 @@

CHANGELOG

+
+

v1.0.3 (in development)

+
+

Feature

+
  • feat: Add compareCellPopulations() function for comparing cell population distributions across experimental groups +
    • Visualizes cell population frequencies or absolute counts across multiple groups
    • +
    • Generates alluvial flow bar plots and faceted box plots
    • +
    • Supports custom group ordering and color palettes
    • +
    • Added ggalluvial dependency for flow visualizations
    • +
    • Generated from JSON template using json2r.prompt.md instructions
    • +
  • +
+

v1.0.2 (2024-02-01)

@@ -74,7 +87,7 @@

Documentation8b5cc98)

-

Feature

+

Feature

  • feat: Update test-annotation to supress warnings (3d5cf8f)

  • feat: test (4c4cee7)

  • feat: test (c8274f9)

  • diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 5519705..f0f2a04 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -42,27 +42,8 @@

    License

-
MIT License
-
-Copyright (c) 2024 NIDAP Community
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+
YEAR: 2024
+COPYRIGHT HOLDER: NIDAP Community
 
diff --git a/docs/LICENSE.html b/docs/LICENSE.html new file mode 100644 index 0000000..61736ff --- /dev/null +++ b/docs/LICENSE.html @@ -0,0 +1,71 @@ + +NA • SCWorkflow + Skip to contents + + +
+
+
+ + +

MIT License

+

Copyright (c) 2024 NIDAP Community

+

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

+

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

+

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+ + +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/articles/CONTRIBUTING.html b/docs/articles/CONTRIBUTING.html new file mode 100644 index 0000000..31a76ec --- /dev/null +++ b/docs/articles/CONTRIBUTING.html @@ -0,0 +1,526 @@ + + + + + + + +Contributing to SCWorkflow • SCWorkflow + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Overview +

+

+


+
+
+

+Propose Change +

+
+
+

+Clone the repo +

+

If you are a member of CCBR, +you can clone this repository to your computer or development +environment.

+


+

SCWorkflow is a large repository so this may take a few minutes.

+
git clone --single-branch --branch DEV https://github.com/NIDAP-Community/SCWorkflow.git
+
+

Cloning into ‘SCWorkflow’…
remote: Enumerating objects: 3126, +done.
remote: Counting objects: 100% (734/734), done.
remote: +Compressing objects: 100% (191/191), done.
remote: Total 3126 +(delta 630), reused 545 (delta 543), pack-reused 2392 (from 1)
+Receiving objects: 100% (3126/3126), 1.04 GiB | 4.99 MiB/s, done.
+Resolving deltas: 100% (1754/1754), done.
Updating files: 100% +(306/306), done.

+
+
cd SCWorkflow
+


+
+
+

+Install dependencies +

+

If this is your first time cloning the repo you may have to install +dependencies

+ +

Check R CMD: In an R console, make sure the package +passes R CMD check by running:

+
+   devtools::check()
+
+

⚠️ Note: If R CMD check doesn’t pass cleanly, it’s a +good idea to ask for help before continuing.

+
+ +


+
+
+

+Load SCWorkflow from repo +

+

In an R console, load the package from the local repo using:

+
+devtools::load_all()
+


+
+
+

+Create branch +

+

Create a Git branch for your pull request (PR). Give the branch a +descriptive name for the changes you will make.

+

Example: Use iss-10 if it’s for a +specific issue, or feature-new-plot for a new feature.

+

For bug fixes or small changes, you can branch from the +main branch.

+
# Create a new branch from main and switch to it
+git branch iss-10
+git switch iss-10
+
+

Success: Switched to a new branch ‘iss-10’

+
+

For new features or larger changes, branch from the DEV +branch.

+
# Switch to DEV branch, create a new branch, and switch to new branch
+git switch DEV
+git branch feature-new-plot
+git switch feature-new-plot
+
+

Success: Switched to a new branch +‘feature-new-plot’

+
+



+
+
+
+

Develop +

+
+
+

+Make your changes +

+

Now you’re ready to edit the code, write unit tests, and update the +documentation as needed.

+


+
+

+Code Style Guidelines +

+

New code should follow the general guidelines outlined here. +- Important: Don’t restyle code unrelated to your +PR

+

Tools to help: - Use the styler package to +apply these styles

+

Key conventions from the tidyverse style +guide:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ElementStyleExample
Variablessnake_casemy_variable
Functionsverbs in camelCaseprocessData()
Assignment +<- operatorx <- 5
Operationspipesdata %>% filter() %>% mutate()
+


+
+
+

+Function Organization +

+

Structure your functions like this:

+

Functions should follow this template. Use roxygen2 for +documentation:

+
+#' @title Function Title
+#' @description Brief description of what the function does
+#' @param param1 Description of first parameter
+#' @param param2 Description of second parameter
+#' @details Additional details if needed
+#' @importFrom package function_name
+#' @export
+#' @return Description of what the function returns
+
+yourFunction <- function(param1, param2) {
+  
+  ## --------- ##
+  ## Functions ##
+  ## --------- ##
+  
+  ## --------------- ##
+  ## Main Code Block ##
+  ## --------------- ##
+  
+  output_list <- list(
+    object = SeuratObject,
+    plots = list(
+      'plotTitle1' = p1,
+      'plotTitle2' = p2
+    ),
+    data = list(
+      'dataframeTitle' = df1
+    )
+  )
+  
+  return(output_list)
+}
+


+
+
+
+

+Commit and Push Your Changes +

+

Best practices for commits:

+

We recommend following the “atomic commits” +principle where each commit contains one new feature, fix, or task.

+

Learn more: Atomic +Commits Guide

+


+
+
+

+Step-by-Step Process: +

+
+

1️⃣ Check Status +

+

Check the current state of your Git working directory and staging +area:

+
    git status
+
+
+

2️⃣ Stage Files +

+

Add the files that you changed to the staging area:

+
    git add path/to/changed/files/
+
+
+

3️⃣ Make the Commit +

+
    git commit -m 'feat: create function for awesome feature'
+

Your commit message should follow the Conventional +Commits specification. Briefly, each commit should start with one of +the approved types such as feat, fix, +docs, etc. followed by a description of the commit. Take a +look at the Conventional +Commits specification for more detailed information about how to +write commit messages.

+ +
+
+

4️⃣ Push your changes to GitHub: +

+
   git push
+

If this is the first time you are pushing this branch, you may have +to explicitly set the upstream branch:

+
   git push --set-upstream origin iss-10
+

We recommend pushing your commits often so they will be backed up on +GitHub. You can view the files in your branch on GitHub at +https://github.com/NIDAP-Community/SCWorkflow/tree/<your-branch-name> +(replace <your-branch-name> with the actual name of +your branch).

+



+
+
+
+
+

Document and Tests +

+
+
+

+Writing Tests +

+

Why tests matter: Most changes to the code will also +need unit tests to demonstrate that the changes work as intended.

+

How to add tests:

+
    +
  1. Use testthat +to create your unit tests
  2. +
  3. Follow the organization described in the tidyverse test style +guide +
  4. +
  5. Look at existing code in this package for examples
  6. +
+


+
+
+

+Documentation +

+

When to update documentation:

+
    +
  • Written a new function
  • +
  • Changed the API of an existing function
  • +
  • Function is used in a vignette
  • +
+

How to update documentation:

+
    +
  1. Use roxygen2 with Markdown +syntax +
  2. +
  3. See the R Packages book +for detailed instructions
  4. +
  5. Update relevant vignettes if needed
  6. +
+


+
+
+

+Check Your Work +

+

🔍 Final validation step:

+

After making your changes, run the following command from an R +console to make sure the package still passes R CMD check:

+
+devtools::check()
+
+

Goal: All checks should pass with no errors, +warnings, or notes.

+
+



+
+
+
+

+Deploy Feature +

+
+
+

1️⃣ Create the PR +

+

Once your branch is ready, create a PR on GitHub: https://github.com/NIDAP-Community/SCWorkflow/pull/new/

+

Select the branch you just pushed:

+
+Create a new PR from your branch
Create a new PR from your branch
+
+

Edit the PR title and description. The title should briefly describe +the change. Follow the comments in the template to fill out the body of +the PR, and you can delete the comments (everything between +<!-- and -->) as you go. When you’re +ready, click ‘Create pull request’ to open it.

+
+Open the PR after editing the title and description
Open the PR after editing the title and +description
+
+

Optionally, you can mark the PR as a draft if you’re not yet ready +for it to be reviewed, then change it later when you’re ready.

+
+
+

2️⃣ Wait for a maintainer to review your PR +

+

We will do our best to follow the tidyverse code review principles: +https://code-review.tidyverse.org/. The reviewer may +suggest that you make changes before accepting your PR in order to +improve the code quality or style. If that’s the case, continue to make +changes in your branch and push them to GitHub, and they will appear in +the PR.

+

Once the PR is approved, the maintainer will merge it and the +issue(s) the PR links will close automatically. Congratulations and +thank you for your contribution!

+
+
+

3️⃣ After your PR has been merged +

+

After your PR has been merged, update your local clone of the repo by +switching to the DEV branch and pulling the latest changes:

+
   git checkout DEV
+   git pull
+

It’s a good idea to run git pull before creating a new +branch so it will start from the most recent commits in main.

+



+
+
+
+ + +
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/Intro.html b/docs/articles/Intro.html new file mode 100644 index 0000000..91ada33 --- /dev/null +++ b/docs/articles/Intro.html @@ -0,0 +1,167 @@ + + + + + + + + • SCWorkflow + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + + +
+

SCWorkflow +

+

The CCBR Single-cell RNA-seq Package (SCWorkflow) allows users to +analyze their own single-cell RNA-seq datasets starting from CellRanger +output files (H5 or mtx files, etc.).

+
+

Installation +

+
+

You can install the SCWorkflow package from GitHub +with:

+
+# install.packages("remotes")
+remotes::install_github("NIDAP-Community/SCWorkflow", dependencies = TRUE)
+

There is also a Docker container available at

+
+
+
+

Usage +

+
+

Following this workflow you can perform these steps of a single-cell +RNA-seq analysis, and more:

+
    +
  • +

    Quality Control:

    +
      +
    • Import, Select, & Rename Samples

    • +
    • Filter Cells based on QC metrics

    • +
    • Combine Samples, Cluster, and Normalize your Data

    • +
    • Batch Correction using Harmony

    • +
    +
  • +
  • +

    Cell Annotation:

    +
      +
    • SingleR Automated Annotations

    • +
    • Module Scores

    • +
    • Co-Expression

    • +
    • External Annotations

    • +
    +
  • +
  • +

    Visualizations:

    +
      +
    • Dimensionality Reductions (t-SNE and UMAP Plots) colored by +Marker Expression or by Metadata

    • +
    • Heatmaps

    • +
    • Violin Plots

    • +
    • Trajectory

    • +
    +
  • +
  • +

    Differential Expression Analysis

    +
      +
    • Seurat’s FindMarkers()

    • +
    • Pseudobulk Aggregation

    • +
    • Pathway Analysis

    • +
    +
  • +
+

Please see the introductory +vignette for a quick start tutorial. Take a look at the reference +documentation for detailed information on each function in the +package.

+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/README.html b/docs/articles/README.html index 3800316..d1bc74f 100644 --- a/docs/articles/README.html +++ b/docs/articles/README.html @@ -5,14 +5,14 @@ - • SCWorkflow +SCWorkflow-Intro • SCWorkflow - + Skip to contents @@ -64,7 +64,7 @@

-


-
-
-

Add External Cell Annotations -

-
-

This function will merge an external table of cell annotations into -an existing Seurat Object’s metadata table. The input external metadata -table must have a column named “Barcode” that contains barcodes matching -those found in the metadata already present in the input Seurat Object. -The output will be a new Seurat Object with metadata that now includes -the additional columns from the external table.

-
-
-CellType_Anno_Table=read.csv("./images/PerCell_Metadata.csv")
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
BarcodeCell.Type
PBS_AAACCTGCAAGGTTTC-1Monocytes
PBS_AAACCTGGTCTAGCGC-1Monocytes
PBS_AAACCTGTCACTTACT-1Monocytes
PBS_AAACCTGTCAGCGATT-1Monocytes
PBS_AAACCTGTCAGCTGGC-1Macrophages
PBS_AAACCTGTCGGCTTGG-1Macrophages
-
-
-ExtAnno_SO=ExternalAnnotation(object = Anno_SO$object,
-                              external_metadata = CellType_Anno_Table,
-                              seurat_object_filename = "seurat_object.rds",
-                              barcode_column = "Barcode",
-                              external_cols_to_add = c("Cell Type"),
-                              col_to_viz = "Cell Type"
-                             )
+ + + +


@@ -179,8 +127,8 @@

Cell Annotation with Co-Expression -
-grep('Cd4',rownames(Anno_SO$object@assays$RNA),ignore.case = T,value=T)
+
+grep("Cd4",rownames(Anno_SO$object@assays$RNA),ignore.case = T,value=T)
 
 DLAnno_SO=dualLabeling(object = Anno_SO$object, 
                         samples <- c("PBS","CD8dep","ENT","NHSIL12","Combo"), 
@@ -239,9 +187,9 @@ 

Color by Gene Lists
+
 Marker_Table <- read.csv("Marker_Table_demo.csv")
-
+
     
         colorByMarkerTable(object=Anno_SO$object,
                            samples.subset=c("PBS","ENT","NHSIL12", "Combo","CD8dep" ),
@@ -300,12 +248,12 @@ 

Module Score Cell ClassificationOutput: An updated scRNA-seq object with new cell type labels.

-
+
 
 MS_object=modScore(object=Anno_SO$object, 
                    marker.table=Marker_Table,
-                   use_columns = c("Neutrophils","Macrophages","CD8_T" ),
-                   ms_threshold=c("Neutrophils .25","Macrophages .40","CD8_T .14"), 
+                   use.columns = c("Neutrophils","Macrophages","CD8_T" ),
+                   ms.threshold=c("Neutrophils .25","Macrophages .40","CD8_T .14"), 
                    general.class=c("Neutrophils","Macrophages","CD8_T"), 
                    multi.lvl = FALSE, 
                    reduction = "umap",
@@ -353,7 +301,7 @@ 

Rename Clusters by Cell Type
+
 
 clstrTable <- read.table(file = "./images/Cluster_Names.txt",  sep = '\t',header = T)
@@ -396,7 +344,7 @@

Rename Clusters by Cell Type
+
 
 RNC_object=nameClusters(object=Anno_SO$object,
                          cluster.identities.table=clstrTable,
@@ -408,7 +356,7 @@ 

Rename Clusters by Cell Type= NULL, interactive = FALSE )

-
+
 
 # DimPlot(MS_object, group.by = "SCT_snn_res.0.2", label = T, reduction = 'umap')
 # DimPlot(MS_object, group.by = "mouseRNAseq_main", label = T, reduction = 'umap')
@@ -452,7 +400,7 @@ 

Dot Plot of Genes by Metadata
+
 
 FigOut=dotPlotMet(object=Anno_SO$object,
                    metadata="orig.ident",
diff --git a/docs/articles/SCWorkflow-DEG.html b/docs/articles/SCWorkflow-DEG.html
index 96afdea..2d6a8fc 100644
--- a/docs/articles/SCWorkflow-DEG.html
+++ b/docs/articles/SCWorkflow-DEG.html
@@ -208,61 +208,272 @@ 

Aggregate Seurat Counts
-aggregateCounts(object=so,
-               var.group=var_group,
-               slot=slot)
-

-
-

Statistical Analysis using Limma -

-

Given a matrix (typically log-normalized gene expression) and a -metadata table, this will run one- and two-factor statistical analyses -on groups using linear or mixed effects models with limma. -Reference.

-

There are 2 ways for treating Donor or Patient - one as a random -effect and the other as a fixed effect

-

When Using a Mixed Effects Model (Donor as random effect):

-

Add Donor column to the Donor Variable Column Do not add the Donor -variable under Covariate Columns. It will be handled separately in the -Donor Variable Column as a random effect. The Covariate Columns field -should include any other variables except the Donor. When Using a Basic -Linear Model (Donor as fixed effect):

-

You can add the Donor column as a covariate under Covariate Columns, -where it will be treated as a fixed effect. Additional variables can be -included under Covariate Columns Ensure the Donor Variable Column is -left blank. This function is a Beta version and is undergoing active -development. If you encounter problems, please contact CCBR

-
-Pseudobulk_LimmaStats()
-
-

-
-

Visualizations -

-
-
-

Volcano Plot - Enhanced -

-

This function utilizes the EnhancedVolcano R Bioconductor -package to generate publication-ready volcano plots for differential -expression analyses, offering a number of customizable visualization -options and optimizing gene label placement to avoid clutter

-

Methodology A volcano plot is a type of scatterplot -that shows statistical significance (P value) versus magnitude of change -(fold change). It enables quick visual identification of genes with -large fold changes that are also statistically significant. These may be -the most biologically significant features (such as genes, isoforms, -peptides and so on). Here, we are using a highly-configurable function -“EnhancedVolcano” that produces publication-ready volcano plots.

-
    -
  1. Maria Doyle, 2021 Visualization of RNA-Seq results with Volcano Plot -(Galaxy Training Materials). https://training.galaxyproject.org/training-material/topics/transcriptomics/tutorials/rna-seq-viz-with-volcanoplot/tutorial.html -Online; accessed Mon Aug 01 2022
  2. -
  3. Batut et al., 2018 Community-Driven Data Analysis Training for -Biology Cell Systems 10.1016/j.cels.2018.05.012
  4. -
  5. Blighe, K, S Rana, and M Lewis. 2018. EnhancedVolcano: -Publication-ready volcano plots with enhanced coloring and labeling. https://github.com/kevinblighe/EnhancedVolcano.
  6. -
+AggOut=aggregateCounts(object=Anno_SO$object, + var.group=c( "orig.ident"), + slot="data")
+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+


Filter Low Quality Cells

+

This function will filter genes and cells based on multiple metrics available in the Seurat Object metadata slot. A detailed guide for single cell quality filtering can be found from Xi and Li, 2021 [2]. @@ -190,7 +195,7 @@

Filter Low Quality CellsSO_filtered=filterQC(object=SOlist$object, ## Filter Genes min.cells = 20, - filter.vdj.genes=F, + filter.vdj.genes=FALSE, ## Filter Cells nfeature.limits=c(NA,NA), @@ -203,16 +208,17 @@

Filter Low Quality Cells mad.complexity.limits = c(5,NA), topNgenes.limits = c(NA,NA), mad.topNgenes.limits = c(5,5), - n.topgnes=20, - do.doublets.fitler=T - + n.topgenes=20, + do.doublets.filter=TRUE )

+


Combine, Normalize, and Cluster Data

+

This functions combines multiple sample level Seurat Objects into a single Seurat Object and normalizes the combined dataset. The multi-dimensionality of the data will be summarized into a set of @@ -233,7 +239,7 @@

Combine, Normalize, and Cluster Data high.cut = 8, low.cut.disp = 1, high.cut.disp = 100000, - selection.method = 'vst', + selection.method = "vst", # Dim Reduction only.var.genes = FALSE, draw.umap = TRUE, @@ -255,17 +261,22 @@

Combine, Normalize, and Cluster Data exclude.sample = NULL, cell.count.limit= 35000, reduce.so = FALSE, - project.name = 'scRNAProject', + project.name = "scRNAProject", cell.hashing.data = FALSE )

-



1. Hao Y et al. Integrated analysis of multimodal -single-cell data. Cell. 2021 Jun 24;184(13):3573-3587.e29. doi: +



+
    +
  1. Hao Y et al. Integrated analysis of multimodal single-cell data. +Cell. 2021 Jun 24;184(13):3573-3587.e29. doi: 10.1016/j.cell.2021.04.048. Epub 2021 May 31. PMID: 34062119; PMCID: -PMC8238499. 2. Heumos, L., Schaar, A.C., Lance, C. et al. Best practices -for single-cell analysis across modalities. Nat Rev Genet (2023). https://doi.org/10.1038/s41576-023-00586-w 3. Germain P, -Lun A, Macnair W, Robinson M (2021). “Doublet identification in -single-cell sequencing data using scDblFinder.” f1000research. doi:10.12688/f1000research.73600.1.

    +PMC8238499.

  2. +
  3. Heumos, L., Schaar, A.C., Lance, C. et al. Best practices for +single-cell analysis across modalities. Nat Rev Genet (2023). https://doi.org/10.1038/s41576-023-00586-w

  4. +
  5. Germain P, Lun A, Macnair W, Robinson M (2021). “Doublet +identification in single-cell sequencing data using scDblFinder.” +f1000research. doi:10.12688/f1000research.73600.1.

  6. +
diff --git a/docs/articles/SCWorkflow-SubsetReclust.html b/docs/articles/SCWorkflow-SubsetReclust.html index 9e7a9a9..b901d0a 100644 --- a/docs/articles/SCWorkflow-SubsetReclust.html +++ b/docs/articles/SCWorkflow-SubsetReclust.html @@ -85,9 +85,9 @@

Subset Seurat Objectfilter_SO=filterSeuratObjectByMetadata( object = Anno_SO$object, samples.to.include = c("PBS","ENT","NHSIL12","Combo","CD8dep"), - sample.name = 'orig.ident', - category.to.filter = 'immgen_main', - values.to.filter = c('Monocytes','Macrophages','DC'), + sample.name = "orig.ident", + category.to.filter = "immgen_main", + values.to.filter = c("Monocytes","Macrophages","DC"), keep.or.remove = FALSE, greater.less.than = "greater than", colors = c( diff --git a/docs/articles/SCWorkflow-Visualizations.html b/docs/articles/SCWorkflow-Visualizations.html index 86abf9b..deb01e0 100644 --- a/docs/articles/SCWorkflow-Visualizations.html +++ b/docs/articles/SCWorkflow-Visualizations.html @@ -91,7 +91,7 @@

Color by MetadataFigOut=plotMetadata( object=Anno_SO$object, samples.to.include=c("PBS","ENT","NHSIL12","Combo","CD8dep" ), - metadata.to.plot=c('SCT_snn_res.0.4','Phase'), + metadata.to.plot=c("SCT_snn_res.0.4","Phase"), columns.to.summarize=NULL, summarization.cut.off = 5, reduction.type = "umap", @@ -144,8 +144,8 @@

Plot 3D Dimensionality Reduction FigOut=tSNE3D( object=Anno_SO$object, - color.variable='SCT_snn_res.0.4', - label.variable='SCT_snn_res.0.4', + color.variable="SCT_snn_res.0.4", + label.variable="SCT_snn_res.0.4", dot.size = 4, legend = TRUE, colors = c("darkblue","purple4","green","red","darkcyan", @@ -176,7 +176,7 @@

Color by GenesFigOut=colorByGene( object=Anno_SO$object, samples.to.include=c("PBS","ENT","NHSIL12","Combo","CD8dep" ), - gene='Itgam', + gene="Itgam", reduction.type = "umap", number.of.rows = 0, return.seurat.object = FALSE, @@ -216,20 +216,20 @@

Violin Plot from Seurat Object
 
-FigOut=violinPlot_mod(
+FigOut=violinPlot(
                 object=Anno_SO$object, 
-                assay='SCT', 
-                slot='scale.data', 
-                genes=c('Cd163','Cd38'), 
-                group='SCT_snn_res.0.4', 
-                facet_by = "", 
-                filter_outliers = F,
-                outlier_low = 0.05,
-                outlier_high = 0.95,
-                jitter_points = TRUE, 
-                jitter_dot_size = 1
+                assay="SCT", 
+                layer="scale.data", 
+                genes=c("Itgam","Cd38"), 
+                group="SCT_snn_res.0.4", 
+                facet.by = "", 
+                filter.outliers = F,
+                outlier.low = 0.05,
+                outlier.high = 0.95,
+                jitter.points = TRUE, 
+                jitter.dot.size = 1
           )
-

+


@@ -276,9 +276,9 @@

HeatmapFigOut=heatmapSC( object=Anno_SO$object, sample.names=c("PBS","ENT","NHSIL12","Combo","CD8dep" ), - metadata='SCT_snn_res.0.4', - transcripts=c('Cd163','Cd38','Itgam','Cd4','Cd8a','Pdcd1','Ctla4'), - use_assay = 'SCT', + metadata="SCT_snn_res.0.4", + transcripts=c("Cd163","Cd38","Itgam","Cd4","Cd8a","Pdcd1","Ctla4"), + use.assay = "SCT", proteins = NULL, heatmap.color = "Bu Yl Rd", plot.title = "Heatmap", @@ -299,7 +299,7 @@

Heatmap order.heatmap.rows = FALSE, row.order = c() )

-

+


@@ -345,14 +345,60 @@

Dot Plot of Genes by Metadata FigOut=dotPlotMet( object=Anno_SO$object, - metadata='SCT_snn_res.0.4', + metadata="SCT_snn_res.0.4", cells=unique(Anno_SO$object$SCT_snn_res.0.4), - markers=c('Itgam','Cd163','Cd38','Cd4','Cd8a','Pdcd1','Ctla4'), + markers=c("Itgam","Cd163","Cd38","Cd4","Cd8a","Pdcd1","Ctla4"), plot.reverse = FALSE, cell.reverse.sort = FALSE, dot.color = "darkblue" )

-

+

+


+ +
+

Compare Cell Populations +

+
+

This function compares cell population composition across +experimental groups (for example sample, treatment, timepoints, or donor +cohorts) using metadata already stored in the Seurat object. It is +useful after clustering and annotation, when you want to quantify how +specific cell populations shift between conditions. + The function supports both Frequency (percent) and +Counts (absolute cell numbers) modes. In most +biological comparisons with unequal total cell recovery across samples, +frequency mode is preferred for interpretation. Counts mode can be +useful for QC and yield-focused assessments.

+ +

Methodology
+The method first aggregates metadata by annotation and group to compute +percentages and counts. It then links these summaries to sample-level +metadata and generates a composition-focused barplot for sample-level +variability. Together, these plots help distinguish overall +compositional shifts from replicate-level dispersion.

+
+
+FigOut=compareCellPopulations(
+            object=Anno_SO$object,
+            annotation.column="immgen_main",
+            group.column="Treatment",
+            sample.column = "orig.ident",
+            counts.type = "Frequency",
+            group.order = NULL,
+            wrap.ncols = 5
+)
+ 
+

+


diff --git a/docs/articles/images/Anno1.png b/docs/articles/images/Anno1.png index d7b6443..77aadc5 100644 Binary files a/docs/articles/images/Anno1.png and b/docs/articles/images/Anno1.png differ diff --git a/docs/articles/images/Anno2.png b/docs/articles/images/Anno2.png index 5cbcee0..07b6c6e 100644 Binary files a/docs/articles/images/Anno2.png and b/docs/articles/images/Anno2.png differ diff --git a/docs/articles/images/DEV_CheatSheet.png b/docs/articles/images/DEV_CheatSheet.png new file mode 100644 index 0000000..ea888ee Binary files /dev/null and b/docs/articles/images/DEV_CheatSheet.png differ diff --git a/docs/articles/images/DL1.png b/docs/articles/images/DL1.png index d773c20..6c7a940 100644 Binary files a/docs/articles/images/DL1.png and b/docs/articles/images/DL1.png differ diff --git a/docs/articles/images/DL2.png b/docs/articles/images/DL2.png index a09d734..d115a0c 100644 Binary files a/docs/articles/images/DL2.png and b/docs/articles/images/DL2.png differ diff --git a/docs/articles/images/DPM.png b/docs/articles/images/DPM.png index 763d7fc..b046d7a 100644 Binary files a/docs/articles/images/DPM.png and b/docs/articles/images/DPM.png differ diff --git a/docs/articles/images/MS2.png b/docs/articles/images/MS2.png new file mode 100644 index 0000000..46e88d4 Binary files /dev/null and b/docs/articles/images/MS2.png differ diff --git a/docs/articles/images/MS3.png b/docs/articles/images/MS3.png new file mode 100644 index 0000000..b447d4d Binary files /dev/null and b/docs/articles/images/MS3.png differ diff --git a/docs/articles/images/ProcessInputData1.png b/docs/articles/images/ProcessInputData1.png index de0eb06..96c1dbe 100644 Binary files a/docs/articles/images/ProcessInputData1.png and b/docs/articles/images/ProcessInputData1.png differ diff --git a/docs/articles/images/ProcessInputData2.png b/docs/articles/images/ProcessInputData2.png index 7f07fe9..12ecdb3 100644 Binary files a/docs/articles/images/ProcessInputData2.png and b/docs/articles/images/ProcessInputData2.png differ diff --git a/docs/articles/images/QC1.png b/docs/articles/images/QC1.png index 35c9f78..3b30e3f 100644 Binary files a/docs/articles/images/QC1.png and b/docs/articles/images/QC1.png differ diff --git a/docs/articles/images/QC2.png b/docs/articles/images/QC2.png index 369953e..951361a 100644 Binary files a/docs/articles/images/QC2.png and b/docs/articles/images/QC2.png differ diff --git a/docs/articles/images/QC3.png b/docs/articles/images/QC3.png index 3668caf..567ff97 100644 Binary files a/docs/articles/images/QC3.png and b/docs/articles/images/QC3.png differ diff --git a/docs/articles/images/RNC.png b/docs/articles/images/RNC.png index bd47367..c849208 100644 Binary files a/docs/articles/images/RNC.png and b/docs/articles/images/RNC.png differ diff --git a/docs/articles/images/SubRec_recl.png b/docs/articles/images/SubRec_recl.png new file mode 100644 index 0000000..81ad2eb Binary files /dev/null and b/docs/articles/images/SubRec_recl.png differ diff --git a/docs/articles/images/SubRec_sub2.png b/docs/articles/images/SubRec_sub2.png new file mode 100644 index 0000000..6a1db85 Binary files /dev/null and b/docs/articles/images/SubRec_sub2.png differ diff --git a/docs/articles/images/Vis_3D.html b/docs/articles/images/Vis_3D.html new file mode 100644 index 0000000..182a5b0 --- /dev/null +++ b/docs/articles/images/Vis_3D.html @@ -0,0 +1,1945 @@ + + + + +plotly +
+
+
+
+
+
+
+ + + + +
+
+ +
+
+ + + + diff --git a/docs/articles/images/Vis_CBG.png b/docs/articles/images/Vis_CBG.png new file mode 100644 index 0000000..453044b Binary files /dev/null and b/docs/articles/images/Vis_CBG.png differ diff --git a/docs/articles/images/Vis_CBM.png b/docs/articles/images/Vis_CBM.png new file mode 100644 index 0000000..a0bf1ca Binary files /dev/null and b/docs/articles/images/Vis_CBM.png differ diff --git a/docs/articles/images/Vis_CCPbar.png b/docs/articles/images/Vis_CCPbar.png new file mode 100644 index 0000000..46f47d3 Binary files /dev/null and b/docs/articles/images/Vis_CCPbar.png differ diff --git a/docs/articles/images/Vis_CCPbox.png b/docs/articles/images/Vis_CCPbox.png new file mode 100644 index 0000000..94c402c Binary files /dev/null and b/docs/articles/images/Vis_CCPbox.png differ diff --git a/docs/articles/images/Vis_DEGAggBar.png b/docs/articles/images/Vis_DEGAggBar.png new file mode 100644 index 0000000..9c77538 Binary files /dev/null and b/docs/articles/images/Vis_DEGAggBar.png differ diff --git a/docs/articles/images/Vis_DPM.png b/docs/articles/images/Vis_DPM.png new file mode 100644 index 0000000..a994770 Binary files /dev/null and b/docs/articles/images/Vis_DPM.png differ diff --git a/docs/articles/images/Vis_HM.png b/docs/articles/images/Vis_HM.png new file mode 100644 index 0000000..cb4f360 Binary files /dev/null and b/docs/articles/images/Vis_HM.png differ diff --git a/docs/articles/images/Vis_Violin.png b/docs/articles/images/Vis_Violin.png new file mode 100644 index 0000000..43e0338 Binary files /dev/null and b/docs/articles/images/Vis_Violin.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index a482a11..25cafbf 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -64,7 +64,7 @@

Developer

Getting Started
-
UNKNOWN TITLE
+
SCWorkflow-Intro