Added cat/cat

GallVp · Jul 21, 2024 · d930e9a · d930e9a
1 parent db7fb04
commit d930e9a
Show file tree

Hide file tree

Showing 21 changed files with 885 additions and 1 deletion.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -80,7 +80,7 @@ jobs:
           head: ${{ github.sha }}
           base: ${{ github.event.pull_request.base.sha || github.event.merge_group.base_sha }}
           n_parents: 2
-          ignored: "nf-core-modules/modules/nf-core/**"
+          ignored: "nf-core-modules/*"
 
       - name: Separate modules and subworkflows
         id: outputs

diff --git a/docs/AVAILABLE.txt b/docs/AVAILABLE.txt
@@ -8,4 +8,5 @@
 <li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/gunzip">modules/gallvp/gunzip</a></li>
 <li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/custom/shortenfastaids">modules/gallvp/custom/shortenfastaids</a></li>
 <li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/custom/restoregffids">modules/gallvp/custom/restoregffids</a></li>
+<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/cat/cat">modules/gallvp/cat/cat</a></li>
 <li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/annosine">modules/gallvp/annosine</a></li>
diff --git a/docs/index.html b/docs/index.html
@@ -139,6 +139,11 @@ <h1>gallvp/nxf-components</h1>
                     >modules/gallvp/custom/restoregffids</a
                 >
             </li>
+            <li>
+                <a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/cat/cat"
+                    >modules/gallvp/cat/cat</a
+                >
+            </li>
             <li>
                 <a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/annosine"
                     >modules/gallvp/annosine</a

diff --git a/modules/gallvp/cat/cat/environment.yml b/modules/gallvp/cat/cat/environment.yml
@@ -0,0 +1,7 @@
+name: cat_cat
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - conda-forge::pigz=2.3.4
diff --git a/modules/gallvp/cat/cat/main.nf b/modules/gallvp/cat/cat/main.nf
@@ -0,0 +1,78 @@
+process CAT_CAT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/pigz:2.3.4' :
+        'biocontainers/pigz:2.3.4' }"
+
+    input:
+    tuple val(meta), path(files_in)
+
+    output:
+    tuple val(meta), path("${prefix}"), emit: file_out
+    path "versions.yml"               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def file_list = files_in.collect { it.toString() }
+
+    // choose appropriate concatenation tool depending on input and output format
+
+    // | input     | output     | command1 | command2 |
+    // |-----------|------------|----------|----------|
+    // | gzipped   | gzipped    | cat      |          |
+    // | ungzipped | ungzipped  | cat      |          |
+    // | gzipped   | ungzipped  | zcat     |          |
+    // | ungzipped | gzipped    | cat      | pigz     |
+
+    // Use input file ending as default
+    prefix   = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}"
+    out_zip  = prefix.endsWith('.gz')
+    in_zip   = file_list[0].endsWith('.gz')
+    command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
+    command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : ''
+    if(file_list.contains(prefix.trim())) {
+        error "The name of the input file can't be the same as for the output prefix in the " +
+        "module CAT_CAT (currently `$prefix`). Please choose a different one."
+    }
+    """
+    $command1 \\
+        $args \\
+        ${file_list.join(' ')} \\
+        $command2 \\
+        > ${prefix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
+
+    stub:
+    def file_list   = files_in.collect { it.toString() }
+    prefix          = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
+    if(file_list.contains(prefix.trim())) {
+        error "The name of the input file can't be the same as for the output prefix in the " +
+        "module CAT_CAT (currently `$prefix`). Please choose a different one."
+    }
+    """
+    touch $prefix
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
+}
+
+// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz
+def getFileSuffix(filename) {
+    def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/
+    return match ? match[0][1] : filename.substring(filename.lastIndexOf('.'))
+}
diff --git a/modules/gallvp/cat/cat/meta.yml b/modules/gallvp/cat/cat/meta.yml
@@ -0,0 +1,36 @@
+name: cat_cat
+description: A module for concatenation of gzipped or uncompressed files
+keywords:
+  - concatenate
+  - gzip
+  - cat
+tools:
+  - cat:
+      description: Just concatenation
+      documentation: https://man7.org/linux/man-pages/man1/cat.1.html
+      licence: ["GPL-3.0-or-later"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - files_in:
+      type: file
+      description: List of compressed / uncompressed files
+      pattern: "*"
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - file_out:
+      type: file
+      description: Concatenated file. Will be gzipped if file_out ends with ".gz"
+      pattern: "${file_out}"
+authors:
+  - "@erikrikarddaniel"
+  - "@FriederikeHanssen"
+maintainers:
+  - "@erikrikarddaniel"
+  - "@FriederikeHanssen"
diff --git a/modules/gallvp/cat/cat/tests/main.nf.test b/modules/gallvp/cat/cat/tests/main.nf.test
@@ -0,0 +1,178 @@
+nextflow_process {
+
+    name "Test Process CAT_CAT"
+    script "../main.nf"
+    process "CAT_CAT"
+    tag "modules"
+    tag "modules_gallvp"
+    tag "cat"
+    tag "cat/cat"
+
+    test("test_cat_name_conflict") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'genome', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert !process.success },
+                { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }
+            )
+        }
+    }
+
+    test("test_cat_unzipped_unzipped") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+
+    test("test_cat_zipped_zipped") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") },
+                { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")}
+            )
+        }
+    }
+
+    test("test_cat_zipped_unzipped") {
+        config './nextflow_zipped_unzipped.config'
+
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("test_cat_unzipped_zipped") {
+        config './nextflow_unzipped_zipped.config'
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") },
+                { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")}
+            )
+        }
+    }
+
+    test("test_cat_one_file_unzipped_zipped") {
+        config './nextflow_unzipped_zipped.config'
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") },
+                { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")}
+            )
+        }
+    }
+}