Run UMAP dim reduction before cluster identification (does not have i…

…mpact on the results, just make more sense and that what's usually done) and publish the output so that it can be reused later for further param exploration
vib-singlecell-nf · May 7, 2020 · b5fbb64 · b5fbb64
1 parent 98bff00
commit b5fbb64
Show file tree

Hide file tree

Showing 4 changed files with 43 additions and 21 deletions.
diff --git a/processes/batch_effect_correct.nf b/processes/batch_effect_correct.nf
@@ -15,7 +15,10 @@ process SC__SCANPY__BATCH_EFFECT_CORRECTION {
 			val(stashedParams)
 
   	output:
-    	tuple val(sampleId), path("${sampleId}.SC__SCANPY__BATCH_EFFECT_CORRECTION.${processParams.off}")
+    	tuple \
+			val(sampleId), \
+			path("${sampleId}.SC__SCANPY__BATCH_EFFECT_CORRECTION.${processParams.off}"), \
+			val(stashedParams)
 
 	script:
 		def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.batch_effect_correct)

diff --git a/processes/transform.nf b/processes/transform.nf
@@ -60,10 +60,14 @@ process SC__SCANPY__FEATURE_SCALING {
 	publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true
 
 	input:
-		tuple val(sampleId), path(f)
+		tuple \
+			val(sampleId), \
+			path(f)
 
 	output:
-		tuple val(sampleId), path("${sampleId}.SC__SCANPY__FEATURE_SCALING.${processParams.off}")
+		tuple \
+			val(sampleId), \
+			path("${sampleId}.SC__SCANPY__FEATURE_SCALING.${processParams.off}")
 
 	script:
 		def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.feature_scaling)

diff --git a/workflows/bec_bbknn.nf b/workflows/bec_bbknn.nf
@@ -18,7 +18,9 @@ nextflow.preview.dsl=2
 //  process imports:
 
 include '../../utils/processes/utils.nf' params(params)
-include '../../utils/workflows/utils.nf' params(params)
+include COMBINE_BY_PARAMS from "../../utils/workflows/utils.nf" params(params)
+include PUBLISH as PUBLISH_BEC_OUTPUT from "../../utils/workflows/utils.nf" params(params)
+include PUBLISH as PUBLISH_FINAL_BBKNN_OUTPUT from "../../utils/workflows/utils.nf" params(params)
 
 // scanpy:
 include '../processes/batch_effect_correct.nf' params(params)
@@ -48,33 +50,36 @@ workflow BEC_BBKNN {
                 it -> tuple(it[0], it[1], it[2]) 
             } 
         )
+        PUBLISH_BEC_OUTPUT(
+            SC__SCANPY__BATCH_EFFECT_CORRECTION.out,
+            "BEC_BBKNN.output",
+            null,
+            false
+        )
+
+        // Define the parameters for dimensionality reduction
+        def dimRedParams = SC__SCANPY__DIM_REDUCTION_PARAMS( clean(params.sc.scanpy.dim_reduction.umap) )
+        SC__SCANPY__DIM_REDUCTION__UMAP( 
+            SC__SCANPY__BATCH_EFFECT_CORRECTION.out.combine(
+                dimRedParams.$()
+            ).view()
+        )
 
         // Define the parameters for clustering
         def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) )
         CLUSTER_IDENTIFICATION(
             normalizedTransformedData,
-            SC__SCANPY__BATCH_EFFECT_CORRECTION.out,
+            SC__SCANPY__DIM_REDUCTION__UMAP.out,
             "Post Batch Effect Correction (BBKNN)"
         )
 
-        // Define the parameters for dimensionality reduction
-        def dimRedParams = SC__SCANPY__DIM_REDUCTION_PARAMS( clean(params.sc.scanpy.dim_reduction.umap) )
-
-        SC__SCANPY__DIM_REDUCTION__UMAP( 
+        PUBLISH_FINAL_BBKNN_OUTPUT(
             CLUSTER_IDENTIFICATION.out.marker_genes.map {
                 it -> tuple(
                     it[0], // sampleId
                     it[1], // data
                     !clusteringParams.isParameterExplorationModeOn() ? null : it[2..(it.size()-1)], // Stash params
                 )
-            }.combine(
-                dimRedParams.$()
-            )
-        )
-
-        PUBLISH(
-            SC__SCANPY__DIM_REDUCTION__UMAP.out.map {
-                it -> tuple(it[0], it[1], it[2]) 
             },
             "BEC_BBKNN.final_output",
             null,
@@ -86,7 +91,8 @@ workflow BEC_BBKNN {
         // - Post batch effect correction
         becDualDataPrePost = COMBINE_BY_PARAMS(
             clusterIdentificationPreBatchEffectCorrection,
-            PUBLISH.out,
+            // Use PUBLISH output to avoid "input file name collision"
+            PUBLISH_FINAL_BBKNN_OUTPUT.out,
             clusteringParams
         )
 
@@ -98,7 +104,7 @@ workflow BEC_BBKNN {
         )
 
     emit:
-        data = SC__SCANPY__DIM_REDUCTION__UMAP.out
+        data = CLUSTER_IDENTIFICATION.out.marker_genes
         cluster_report = CLUSTER_IDENTIFICATION.out.report
         bbknn_report
 

diff --git a/workflows/bec_mnncorrect.nf b/workflows/bec_mnncorrect.nf
@@ -38,8 +38,16 @@ workflow BEC_MNNCORRECT {
         clusterIdentificationPreBatchEffectCorrection
 
     main:
-        SC__SCANPY__BATCH_EFFECT_CORRECTION( data.map { it -> tuple(it[0], it[1], null) } )
-        SC__SCANPY__FEATURE_SCALING( SC__SCANPY__BATCH_EFFECT_CORRECTION.out )
+        SC__SCANPY__BATCH_EFFECT_CORRECTION( 
+            data.map { 
+                it -> tuple(it[0], it[1], null) 
+            }
+        )
+        SC__SCANPY__FEATURE_SCALING( 
+            SC__SCANPY__BATCH_EFFECT_CORRECTION.out.map { 
+                it -> tuple(it[0], it[1]) 
+            }
+        )
         if(params.sc.scanpy.containsKey("regress_out")) {
             preprocessed_data = SC__SCANPY__REGRESS_OUT( SC__SCANPY__FEATURE_SCALING.out )
         } else {
@@ -81,6 +89,7 @@ workflow BEC_MNNCORRECT {
         // - Post batch effect correction
         becDualDataPrePost = COMBINE_BY_PARAMS(
             clusterIdentificationPreBatchEffectCorrection,
+            // Use PUBLISH output to avoid "input file name collision"
             PUBLISH.out,
             clusteringParams
         )