Skip to content
This repository has been archived by the owner on Dec 8, 2020. It is now read-only.

Commit

Permalink
Run UMAP dim reduction before cluster identification (does not have i…
Browse files Browse the repository at this point in the history
…mpact on the results, just make more sense and that what's usually done) and publish the output so that it can be reused later for further param exploration
  • Loading branch information
dweemx committed May 7, 2020
1 parent 98bff00 commit b5fbb64
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 21 deletions.
5 changes: 4 additions & 1 deletion processes/batch_effect_correct.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ process SC__SCANPY__BATCH_EFFECT_CORRECTION {
val(stashedParams)

output:
tuple val(sampleId), path("${sampleId}.SC__SCANPY__BATCH_EFFECT_CORRECTION.${processParams.off}")
tuple \
val(sampleId), \
path("${sampleId}.SC__SCANPY__BATCH_EFFECT_CORRECTION.${processParams.off}"), \
val(stashedParams)

script:
def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.batch_effect_correct)
Expand Down
8 changes: 6 additions & 2 deletions processes/transform.nf
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,14 @@ process SC__SCANPY__FEATURE_SCALING {
publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true

input:
tuple val(sampleId), path(f)
tuple \
val(sampleId), \
path(f)

output:
tuple val(sampleId), path("${sampleId}.SC__SCANPY__FEATURE_SCALING.${processParams.off}")
tuple \
val(sampleId), \
path("${sampleId}.SC__SCANPY__FEATURE_SCALING.${processParams.off}")

script:
def sampleParams = params.parseConfig(sampleId, params.global, params.sc.scanpy.feature_scaling)
Expand Down
38 changes: 22 additions & 16 deletions workflows/bec_bbknn.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ nextflow.preview.dsl=2
// process imports:

include '../../utils/processes/utils.nf' params(params)
include '../../utils/workflows/utils.nf' params(params)
include COMBINE_BY_PARAMS from "../../utils/workflows/utils.nf" params(params)
include PUBLISH as PUBLISH_BEC_OUTPUT from "../../utils/workflows/utils.nf" params(params)
include PUBLISH as PUBLISH_FINAL_BBKNN_OUTPUT from "../../utils/workflows/utils.nf" params(params)

// scanpy:
include '../processes/batch_effect_correct.nf' params(params)
Expand Down Expand Up @@ -48,33 +50,36 @@ workflow BEC_BBKNN {
it -> tuple(it[0], it[1], it[2])
}
)
PUBLISH_BEC_OUTPUT(
SC__SCANPY__BATCH_EFFECT_CORRECTION.out,
"BEC_BBKNN.output",
null,
false
)

// Define the parameters for dimensionality reduction
def dimRedParams = SC__SCANPY__DIM_REDUCTION_PARAMS( clean(params.sc.scanpy.dim_reduction.umap) )
SC__SCANPY__DIM_REDUCTION__UMAP(
SC__SCANPY__BATCH_EFFECT_CORRECTION.out.combine(
dimRedParams.$()
).view()
)

// Define the parameters for clustering
def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) )
CLUSTER_IDENTIFICATION(
normalizedTransformedData,
SC__SCANPY__BATCH_EFFECT_CORRECTION.out,
SC__SCANPY__DIM_REDUCTION__UMAP.out,
"Post Batch Effect Correction (BBKNN)"
)

// Define the parameters for dimensionality reduction
def dimRedParams = SC__SCANPY__DIM_REDUCTION_PARAMS( clean(params.sc.scanpy.dim_reduction.umap) )

SC__SCANPY__DIM_REDUCTION__UMAP(
PUBLISH_FINAL_BBKNN_OUTPUT(
CLUSTER_IDENTIFICATION.out.marker_genes.map {
it -> tuple(
it[0], // sampleId
it[1], // data
!clusteringParams.isParameterExplorationModeOn() ? null : it[2..(it.size()-1)], // Stash params
)
}.combine(
dimRedParams.$()
)
)

PUBLISH(
SC__SCANPY__DIM_REDUCTION__UMAP.out.map {
it -> tuple(it[0], it[1], it[2])
},
"BEC_BBKNN.final_output",
null,
Expand All @@ -86,7 +91,8 @@ workflow BEC_BBKNN {
// - Post batch effect correction
becDualDataPrePost = COMBINE_BY_PARAMS(
clusterIdentificationPreBatchEffectCorrection,
PUBLISH.out,
// Use PUBLISH output to avoid "input file name collision"
PUBLISH_FINAL_BBKNN_OUTPUT.out,
clusteringParams
)

Expand All @@ -98,7 +104,7 @@ workflow BEC_BBKNN {
)

emit:
data = SC__SCANPY__DIM_REDUCTION__UMAP.out
data = CLUSTER_IDENTIFICATION.out.marker_genes
cluster_report = CLUSTER_IDENTIFICATION.out.report
bbknn_report

Expand Down
13 changes: 11 additions & 2 deletions workflows/bec_mnncorrect.nf
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,16 @@ workflow BEC_MNNCORRECT {
clusterIdentificationPreBatchEffectCorrection

main:
SC__SCANPY__BATCH_EFFECT_CORRECTION( data.map { it -> tuple(it[0], it[1], null) } )
SC__SCANPY__FEATURE_SCALING( SC__SCANPY__BATCH_EFFECT_CORRECTION.out )
SC__SCANPY__BATCH_EFFECT_CORRECTION(
data.map {
it -> tuple(it[0], it[1], null)
}
)
SC__SCANPY__FEATURE_SCALING(
SC__SCANPY__BATCH_EFFECT_CORRECTION.out.map {
it -> tuple(it[0], it[1])
}
)
if(params.sc.scanpy.containsKey("regress_out")) {
preprocessed_data = SC__SCANPY__REGRESS_OUT( SC__SCANPY__FEATURE_SCALING.out )
} else {
Expand Down Expand Up @@ -81,6 +89,7 @@ workflow BEC_MNNCORRECT {
// - Post batch effect correction
becDualDataPrePost = COMBINE_BY_PARAMS(
clusterIdentificationPreBatchEffectCorrection,
// Use PUBLISH output to avoid "input file name collision"
PUBLISH.out,
clusteringParams
)
Expand Down

0 comments on commit b5fbb64

Please sign in to comment.