Skip to content

Commit

Permalink
Added cat/cat
Browse files Browse the repository at this point in the history
  • Loading branch information
GallVp committed Jul 21, 2024
1 parent db7fb04 commit d930e9a
Show file tree
Hide file tree
Showing 21 changed files with 885 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ jobs:
head: ${{ github.sha }}
base: ${{ github.event.pull_request.base.sha || github.event.merge_group.base_sha }}
n_parents: 2
ignored: "nf-core-modules/modules/nf-core/**"
ignored: "nf-core-modules/*"

- name: Separate modules and subworkflows
id: outputs
Expand Down
1 change: 1 addition & 0 deletions docs/AVAILABLE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/gunzip">modules/gallvp/gunzip</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/custom/shortenfastaids">modules/gallvp/custom/shortenfastaids</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/custom/restoregffids">modules/gallvp/custom/restoregffids</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/cat/cat">modules/gallvp/cat/cat</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/annosine">modules/gallvp/annosine</a></li>
5 changes: 5 additions & 0 deletions docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@ <h1>gallvp/nxf-components</h1>
>modules/gallvp/custom/restoregffids</a
>
</li>
<li>
<a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/cat/cat"
>modules/gallvp/cat/cat</a
>
</li>
<li>
<a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/annosine"
>modules/gallvp/annosine</a
Expand Down
7 changes: 7 additions & 0 deletions modules/gallvp/cat/cat/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: cat_cat
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- conda-forge::pigz=2.3.4
78 changes: 78 additions & 0 deletions modules/gallvp/cat/cat/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
process CAT_CAT {
tag "$meta.id"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pigz:2.3.4' :
'biocontainers/pigz:2.3.4' }"

input:
tuple val(meta), path(files_in)

output:
tuple val(meta), path("${prefix}"), emit: file_out
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def file_list = files_in.collect { it.toString() }

// choose appropriate concatenation tool depending on input and output format

// | input | output | command1 | command2 |
// |-----------|------------|----------|----------|
// | gzipped | gzipped | cat | |
// | ungzipped | ungzipped | cat | |
// | gzipped | ungzipped | zcat | |
// | ungzipped | gzipped | cat | pigz |

// Use input file ending as default
prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}"
out_zip = prefix.endsWith('.gz')
in_zip = file_list[0].endsWith('.gz')
command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : ''
if(file_list.contains(prefix.trim())) {
error "The name of the input file can't be the same as for the output prefix in the " +
"module CAT_CAT (currently `$prefix`). Please choose a different one."
}
"""
$command1 \\
$args \\
${file_list.join(' ')} \\
$command2 \\
> ${prefix}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""

stub:
def file_list = files_in.collect { it.toString() }
prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
if(file_list.contains(prefix.trim())) {
error "The name of the input file can't be the same as for the output prefix in the " +
"module CAT_CAT (currently `$prefix`). Please choose a different one."
}
"""
touch $prefix
cat <<-END_VERSIONS > versions.yml
"${task.process}":
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""
}

// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz
def getFileSuffix(filename) {
def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/
return match ? match[0][1] : filename.substring(filename.lastIndexOf('.'))
}
36 changes: 36 additions & 0 deletions modules/gallvp/cat/cat/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: cat_cat
description: A module for concatenation of gzipped or uncompressed files
keywords:
- concatenate
- gzip
- cat
tools:
- cat:
description: Just concatenation
documentation: https://man7.org/linux/man-pages/man1/cat.1.html
licence: ["GPL-3.0-or-later"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- files_in:
type: file
description: List of compressed / uncompressed files
pattern: "*"
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- file_out:
type: file
description: Concatenated file. Will be gzipped if file_out ends with ".gz"
pattern: "${file_out}"
authors:
- "@erikrikarddaniel"
- "@FriederikeHanssen"
maintainers:
- "@erikrikarddaniel"
- "@FriederikeHanssen"
178 changes: 178 additions & 0 deletions modules/gallvp/cat/cat/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
nextflow_process {

name "Test Process CAT_CAT"
script "../main.nf"
process "CAT_CAT"
tag "modules"
tag "modules_gallvp"
tag "cat"
tag "cat/cat"

test("test_cat_name_conflict") {
when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] =
[
[ id:'genome', single_end:true ],
[
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
]
]
"""
}
}
then {
assertAll(
{ assert !process.success },
{ assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }
)
}
}

test("test_cat_unzipped_unzipped") {
when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] =
[
[ id:'test', single_end:true ],
[
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
]
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}


test("test_cat_zipped_zipped") {
when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] =
[
[ id:'test', single_end:true ],
[
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
]
"""
}
}
then {
def lines = path(process.out.file_out.get(0).get(1)).linesGzip
assertAll(
{ assert process.success },
{ assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") },
{ assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")}
)
}
}

test("test_cat_zipped_unzipped") {
config './nextflow_zipped_unzipped.config'

when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] =
[
[ id:'test', single_end:true ],
[
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("test_cat_unzipped_zipped") {
config './nextflow_unzipped_zipped.config'
when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] =
[
[ id:'test', single_end:true ],
[
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
]
]
"""
}
}
then {
def lines = path(process.out.file_out.get(0).get(1)).linesGzip
assertAll(
{ assert process.success },
{ assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") },
{ assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")}
)
}
}

test("test_cat_one_file_unzipped_zipped") {
config './nextflow_unzipped_zipped.config'
when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] =
[
[ id:'test', single_end:true ],
[
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
]
"""
}
}
then {
def lines = path(process.out.file_out.get(0).get(1)).linesGzip
assertAll(
{ assert process.success },
{ assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") },
{ assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")}
)
}
}
}
Loading

0 comments on commit d930e9a

Please sign in to comment.