Merge pull request #281 from AlexsLemonade/jashapiro/280-null-seed-fix

Set seeds conditionally
AlexsLemonade · Sep 5, 2024 · 0b5d4ac · 0b5d4ac
2 parents 00946ce + b35a94f
commit 0b5d4ac
Show file tree

Hide file tree

Showing 10 changed files with 78 additions and 54 deletions.
diff --git a/.lintr b/.lintr
@@ -1,6 +1,7 @@
 encoding: "UTF-8"
 linters: linters_with_defaults(
     line_length_linter(120),
+    cyclocomp_linter(20),
     commented_code_linter = NULL,
     indentation_linter = NULL,
     object_usage_linter = NULL,

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
 # R specific hooks: https://github.com/lorenzwalthert/precommit
 repos:
   - repo: https://github.com/lorenzwalthert/precommit
-    rev: v0.4.2
+    rev: v0.4.3
     hooks:
       - id: style-files
         args: [--style_pkg=styler, --style_fun=tidyverse_style]

diff --git a/R/add_miQC.R b/R/add_miQC.R
@@ -19,14 +19,13 @@ add_miQC <- function(sce,
                      keep_all_below_boundary = TRUE,
                      enforce_left_cutoff = TRUE,
                      seed = NULL) {
-  # check that input is a SingleCellExperiment
-  if (!is(sce, "SingleCellExperiment")) {
-    stop("sce must be a SingleCellExperiment object")
-  }
-  # check that sce has subsets_mito_percent
-  if (!("subsets_mito_percent" %in% colnames(colData(sce)))) {
-    stop("sce must have subsets_mito_percent in the column data. Use scuttle::addPerCellQCMetrics or similar to add it.")
-  }
+  stopifnot(
+    "sce must be a SingleCellExperiment object" = is(sce, "SingleCellExperiment"),
+    "sce must have subsets_mito_percent in the colData. Use scuttle::addPerCellQCMetrics or similar to add it." =
+      "subsets_mito_percent" %in% colnames(colData(sce))
+  )
+
+
   # check if prob_compromised exists
   if (!is.null(sce$prob_compromised)) {
     warning("prob_compromised was already calculated and will be replaced.")
@@ -37,7 +36,9 @@ add_miQC <- function(sce,
   }
 
   # set seed
-  set.seed(seed)
+  if (!is.null(seed)) {
+    set.seed(seed)
+  }
 
   # generate linear mixture model of probability of cells being compromised
   model <- NULL

diff --git a/R/calculate_silhouette_width.R b/R/calculate_silhouette_width.R
@@ -38,7 +38,7 @@ calculate_silhouette_width <- function(merged_sce,
   }
 
   # Check that frac_cells is in range
-  if (frac_cells <= 0 | frac_cells >= 1) {
+  if (frac_cells <= 0 || frac_cells >= 1) {
     stop("The fraction of cells to downsample should be between 0 and 1.")
   }
 
@@ -50,15 +50,16 @@ calculate_silhouette_width <- function(merged_sce,
   # Calculate the silhouette width values across list of PCs
   all_silhouette_df <- purrr::map(
     pc_names,
-    \(pcs)
-    silhouette_width_from_pcs(
-      merged_sce = merged_sce,
-      batch_column = batch_column,
-      pc_name = pcs,
-      frac_cells = frac_cells,
-      nreps = nreps,
-      seed = seed
-    )
+    \(pcs) {
+      silhouette_width_from_pcs(
+        merged_sce = merged_sce,
+        batch_column = batch_column,
+        pc_name = pcs,
+        frac_cells = frac_cells,
+        nreps = nreps,
+        seed = seed
+      )
+    }
   ) |>
     dplyr::bind_rows()
 
@@ -92,7 +93,9 @@ silhouette_width_from_pcs <-
            nreps = 20,
            seed = NULL) {
     # Set the seed for subsampling
-    set.seed(seed)
+    if (!is.null(seed)) {
+      set.seed(seed)
+    }
 
     # Pull out the PCs or analogous reduction
     pcs <- reducedDim(merged_sce, pc_name)

diff --git a/R/cluster_sce.R b/R/cluster_sce.R
@@ -7,7 +7,7 @@
 #'   use and any additional clustering options
 #' @param cluster_column_name The name of the column to store the clustering
 #'   results in the SCE; for naming you may want to include the type of clustering
-#'   and k value of centers, e.g. `kmeans_10`
+#'   and k value of centers, e.g., "kmeans_10"
 #' @param seed Seed for reproducibility of clustering results
 #' @param ... Additional arguments to provide to `bluster::clusterRows()`
 #'
@@ -16,28 +16,31 @@
 #' @return SingleCellExperiment object containing clustering results
 #'
 #' @examples
-#'  \dontrun{
-#'   # Perform K-means clustering with 10 centers
-#'   cluster_sce(sce, "PCA", bluster::KmeansParam(centers = 10), "kmeans_10")
-#'  }
-cluster_sce <- function(sce,
-                        pc_name = "PCA",
-                        BLUSPARAM,
-                        cluster_column_name,
-                        seed = NULL,
-                        ...) {
+#' \dontrun{
+#' # Perform K-means clustering with 10 centers
+#' cluster_sce(sce, "PCA", bluster::KmeansParam(centers = 10), "kmeans_10")
+#' }
+#'
+cluster_sce <- function(
+    sce,
+    pc_name = "PCA",
+    BLUSPARAM,
+    cluster_column_name,
+    seed = NULL,
+    ...) {
   # Set the seed
-  set.seed(seed)
+  if (!is.null(seed)) {
+    set.seed(seed)
+  }
 
   # Check that provided sce is indeed an SCE
   if (!is(sce, "SingleCellExperiment")) {
-    stop("Expected a `SingleCellExperiment` object for `sce` argument.")
+    stop("`sce` must be a `SingleCellExperiment` object")
   }
 
   # Check that the PCs are present in the SingleCellExperiment object
   if (!pc_name %in% reducedDimNames(sce)) {
-    stop("The provided `pc_name` cannot be found in the reduced dimensions of
-         the SingleCellExperiment object.")
+    stop("The provided `pc_name` cannot be found in the reduced dimensions of the SingleCellExperiment object.")
   }
 
   # Extract PCs

diff --git a/R/filter_counts.R b/R/filter_counts.R
@@ -1,19 +1,21 @@
 #' Filter counts matrix using DropletUtils::emptyDropsCellRanger
 #'
 #' This function will filter a SingleCellExperiment object using DropletUtils::emptyDropsCellRanger() by default,
-#'   or DropletUtils::emptyDrops(), as well as any associated alternative experiments. If mean expression and percent detected
-#'   were previously calculated in the columns `mean` and `detected`, respectively, these
-#'   will be removed from both the main and alternative experiments.
+#'   or DropletUtils::emptyDrops(), as well as any associated alternative experiments.
+#'   If mean expression and percent detected were previously calculated in the columns `mean` and `detected`,
+#'   respectively, these will be removed from both the main and alternative experiments.
 #'
 #' @param sce SingleCellExperiment with unfiltered gene x cell counts matrix.
 #' @param cr_like Logical indicating whether or not to use DropletUtils::emptyDropsCellRanger.
 #'   Default is set to TRUE.
 #' @param fdr_cutoff FDR cutoff to use for DropletUtils::emptyDropsCellRanger or DropletUtils::emptyDrops.
 #'   Default is 0.01.
 #' @param seed An optional random seed for reproducibility.
-#' @param umi_cutoff The minimum UMI count for cells to pass filtering, only used if emptyDropsCellRanger or emptyDrops fails.
+#' @param umi_cutoff The minimum UMI count for cells to pass filtering.
+#'   Only used if emptyDropsCellRanger or emptyDrops fails.
 #'   Default is 100.
-#' @param ... Any arguments to be passed into DropletUtils::emptyDropsCellRanger or DropletUtils::emptyDrops.
+#' @param ... Any arguments to be passed into DropletUtils::emptyDropsCellRanger
+#'   or DropletUtils::emptyDrops.
 #'
 #' @return SingleCellExperiment with filtered gene x cell matrix.
 #'
@@ -26,7 +28,10 @@
 #' filter_counts(sce = sce_object)
 #' }
 filter_counts <- function(sce, cr_like = TRUE, fdr_cutoff = 0.01, seed = NULL, umi_cutoff = 100, ...) {
-  set.seed(seed)
+  # set seed
+  if (!is.null(seed)) {
+    set.seed(seed)
+  }
 
   if (!is(sce, "SingleCellExperiment")) {
     stop("Input must be a SingleCellExperiment object.")
@@ -36,7 +41,7 @@ filter_counts <- function(sce, cr_like = TRUE, fdr_cutoff = 0.01, seed = NULL, u
     stop("cr_like must be set as TRUE or FALSE")
   }
 
-  if (!is.numeric(umi_cutoff) | umi_cutoff < 0) {
+  if (!is.numeric(umi_cutoff) || umi_cutoff < 0) {
     stop("umi_cutoff must be a number greater than or equal to 0")
   }
 

diff --git a/R/integrate_sces.R b/R/integrate_sces.R
@@ -43,7 +43,9 @@ integrate_sces <- function(merged_sce,
                            seed = NULL,
                            ...) {
   # Set seed
-  set.seed(seed)
+  if (!is.null(seed)) {
+    set.seed(seed)
+  }
 
   # make sure that input is a SingleCellExperiment
   if (!is(merged_sce, "SingleCellExperiment")) {

diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -4,8 +4,10 @@ altExp
 altexp
 barcode
 bioconductor
+BlusterParam
 cDNA
 cellranger
+clusterRows
 CMD
 colData
 coldata
@@ -15,17 +17,20 @@ dev
 Dockerfile
 DropletUtils
 emptyDrops
+emptyDropsCellRanger
 Ensembl
 ensembl
 github
 intron
 Kallisto
 kallisto
+kmeans
 LinkingTo
 nf
 pre
 PRs
 renv
+reproducibility
 rowData
 rowdata
 Rproj
@@ -42,5 +47,6 @@ scpcatools
 SingleCellExperiment
 Spielman
 tidyverse
+UMI
 unspliced
 vcs
diff --git a/man/cluster_sce.Rd b/man/cluster_sce.Rd
diff --git a/man/filter_counts.Rd b/man/filter_counts.Rd