Skip to content

Commit

Permalink
Merge pull request #14 from GallVp/fasta_edta_lai
Browse files Browse the repository at this point in the history
Added fasta_edta_lai
  • Loading branch information
GallVp committed Jul 25, 2024
2 parents edeb006 + bbbe053 commit f9f884b
Show file tree
Hide file tree
Showing 15 changed files with 788 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,10 @@ jobs:
exclude:
- profile: conda
path: modules/gallvp/braker3
- profile: conda
path: modules/gallvp/edta/edta
- profile: conda
path: subworkflows/gallvp/fasta_edta_lai
env:
NXF_ANSI_LOG: false
NFTEST_VER: "0.9.0"
Expand Down
2 changes: 2 additions & 0 deletions docs/AVAILABLE.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<li><a href="https://github.com/gallvp/nxf-components/tree/main/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences">subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/subworkflows/gallvp/fasta_ltrretriever_lai">subworkflows/gallvp/fasta_ltrretriever_lai</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/subworkflows/gallvp/fasta_edta_lai">subworkflows/gallvp/fasta_edta_lai</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/tesorter">modules/gallvp/tesorter</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/syri">modules/gallvp/syri</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/repeatmasker">modules/gallvp/repeatmasker</a></li>
Expand All @@ -10,6 +11,7 @@
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/ltrharvest">modules/gallvp/ltrharvest</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/ltrfinder">modules/gallvp/ltrfinder</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/gunzip">modules/gallvp/gunzip</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/edta/edta">modules/gallvp/edta/edta</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/custom/shortenfastaids">modules/gallvp/custom/shortenfastaids</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/custom/restoregffids">modules/gallvp/custom/restoregffids</a></li>
<li><a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/cat/cat">modules/gallvp/cat/cat</a></li>
Expand Down
10 changes: 10 additions & 0 deletions docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ <h1>gallvp/nxf-components</h1>
>subworkflows/gallvp/fasta_ltrretriever_lai</a
>
</li>
<li>
<a href="https://github.com/gallvp/nxf-components/tree/main/subworkflows/gallvp/fasta_edta_lai"
>subworkflows/gallvp/fasta_edta_lai</a
>
</li>
<li>
<a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/tesorter"
>modules/gallvp/tesorter</a
Expand Down Expand Up @@ -148,6 +153,11 @@ <h1>gallvp/nxf-components</h1>
>modules/gallvp/gunzip</a
>
</li>
<li>
<a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/edta/edta"
>modules/gallvp/edta/edta</a
>
</li>
<li>
<a href="https://github.com/gallvp/nxf-components/tree/main/modules/gallvp/custom/shortenfastaids"
>modules/gallvp/custom/shortenfastaids</a
Expand Down
92 changes: 92 additions & 0 deletions modules/gallvp/edta/edta/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
process EDTA_EDTA {
tag "$meta.id"
label 'process_high'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/edta:2.1.0--hdfd78af_1':
'biocontainers/edta:2.1.0--hdfd78af_1' }"

input:
tuple val(meta), path(fasta)
path cds
path curatedlib
path rmout
path exclude

output:
tuple val(meta), path('*.log') , emit: log
tuple val(meta), path('*.EDTA.TElib.fa') , emit: te_lib_fasta
tuple val(meta), path('*.EDTA.pass.list') , emit: pass_list , optional: true
tuple val(meta), path('*.EDTA.out') , emit: out_file , optional: true
tuple val(meta), path('*.EDTA.TEanno.gff3') , emit: te_anno_gff3 , optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def mod_file_name = "${fasta}.mod"
def cds_file = cds ? "--cds $cds" : ''
def curatedlib_file = curatedlib ? "--curatedlib $curatedlib": ''
def rmout_file = rmout ? "--rmout $rmout" : ''
def exclude_file = exclude ? "--exclude $exclude" : ''
"""
EDTA.pl \\
--genome $fasta \\
--threads $task.cpus \\
$cds_file \\
$curatedlib_file \\
$rmout_file \\
$exclude_file \\
$args \\
&> >(tee "${prefix}.log" 2>&1)
mv \\
"${mod_file_name}.EDTA.TElib.fa" \\
"${prefix}.EDTA.TElib.fa"
[ -f "${mod_file_name}.EDTA.raw/LTR/${mod_file_name}.pass.list" ] \\
&& mv \\
"${mod_file_name}.EDTA.raw/LTR/${mod_file_name}.pass.list" \\
"${prefix}.EDTA.pass.list" \\
|| echo "EDTA did not produce a pass.list file"
[ -f "${mod_file_name}.EDTA.anno/${mod_file_name}.out" ] \\
&& mv \\
"${mod_file_name}.EDTA.anno/${mod_file_name}.out" \\
"${prefix}.EDTA.out" \\
|| echo "EDTA did not produce an out file"
[ -f "${mod_file_name}.EDTA.TEanno.gff3" ] \\
&& mv \\
"${mod_file_name}.EDTA.TEanno.gff3" \\
"${prefix}.EDTA.TEanno.gff3" \\
|| echo "EDTA did not produce a TEanno gff3 file"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def touch_pass_list = args.contains("--anno 1") ? "touch ${prefix}.EDTA.pass.list" : ''
def touch_out_file = args.contains("--anno 1") ? "touch ${prefix}.EDTA.out" : ''
def touch_te_anno = args.contains("--anno 1") ? "touch ${prefix}.EDTA.TEanno.gff3": ''
"""
touch "${prefix}.log"
touch "${prefix}.EDTA.TElib.fa"
$touch_pass_list
$touch_out_file
$touch_te_anno
cat <<-END_VERSIONS > versions.yml
"${task.process}":
EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}')
END_VERSIONS
"""
}
82 changes: 82 additions & 0 deletions modules/gallvp/edta/edta/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "edta_edta"
description: Extensive de-novo TE Annotator (EDTA)
keywords:
- genome
- repeat
- annotation
- transposable-elements
tools:
- "edta":
description: Extensive de-novo TE Annotator (EDTA)
homepage: "https://github.com/oushujun/EDTA"
documentation: "https://github.com/oushujun/EDTA"
tool_dev_url: "https://github.com/oushujun/EDTA"
doi: "10.1186/s13059-019-1905-y"
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test' ]`
- fasta:
type: file
description: Genome fasta file
pattern: "*.{fsa,fa,fasta}"
- cds:
type: file
description: |
A FASTA file containing the coding sequence (no introns, UTRs, nor TEs)
of this genome or its close relative
pattern: "*.{fsa,fa,fasta}"
- curatedlib:
type: file
description: |
A curated library to keep consistent naming and classification for known TEs
pattern: "*.liban"
- rmout:
type: file
description: |
Homology-based TE annotation instead of using the EDTA library for masking in
RepeatMasker .out format
pattern: "*.out"
- exclude:
type: file
description: Exclude regions (bed format) from TE masking in the MAKER.masked output
pattern: "*.bed"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test' ]`
- log:
type: file
description: Log emitted by EDTA
pattern: "*.log"
- te_lib_fasta:
type: file
description: A non-redundant TE library in fasta format
pattern: "*.EDTA.TElib.fa"
- pass_list:
type: file
description: A summary table of intact LTR-RTs with coordinate and structural information
pattern: "*.EDTA.pass.list"
- out_file:
type: file
description: RepeatMasker annotation of all LTR sequences in the genome
pattern: "*.EDTA.out"
- te_anno_gff3:
type: file
description: A gff3 file containing both structurally intact and fragmented TE annotations
pattern: "*.EDTA.TEanno.gff3"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@GallVp"
maintainers:
- "@GallVp"
84 changes: 84 additions & 0 deletions modules/gallvp/edta/edta/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
nextflow_process {

name "Test Process EDTA_EDTA"
script "../main.nf"
process "EDTA_EDTA"
config "./nextflow.config"

tag "modules"
tag "modules_gallvp"
tag "edta"
tag "edta/edta"
tag "modules/nf-core/gunzip"

test("actinidia_chinensis-genome_1_fasta_gz") {

setup {
run("GUNZIP") {
script "../../../../../modules/nf-core/gunzip"

process {
"""
input[0] = [
[ id:'test' ],
file(params.test_data['actinidia_chinensis']['genome']['genome_1_fasta_gz'], checkIfExists: true)
]
"""
}
}
}

when {
process {
"""
input[0] = GUNZIP.out.gunzip
input[1] = []
input[2] = []
input[3] = []
input[4] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match("versions") },
{ assert path(process.out.te_lib_fasta[0][1]).text.contains('LTR/Copia') },
{ assert path(process.out.pass_list[0][1]).text.contains('Copia') },
{ assert process.out.out_file == [] },
{ assert process.out.te_anno_gff3 == [] }
)
}

}

test("stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
]
input[1] = []
input[2] = []
input[3] = []
input[4] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
81 changes: 81 additions & 0 deletions modules/gallvp/edta/edta/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
{
"versions": {
"content": [
[
"versions.yml:md5,f9e6b414c1eb81520a9fdbb15f797405"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-10T14:43:10.298103"
},
"stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.log:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
[
{
"id": "test"
},
"test.EDTA.TElib.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"2": [

],
"3": [

],
"4": [

],
"5": [
"versions.yml:md5,f9e6b414c1eb81520a9fdbb15f797405"
],
"log": [
[
{
"id": "test"
},
"test.log:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"out_file": [

],
"pass_list": [

],
"te_anno_gff3": [

],
"te_lib_fasta": [
[
{
"id": "test"
},
"test.EDTA.TElib.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,f9e6b414c1eb81520a9fdbb15f797405"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-10T14:43:16.561778"
}
}
Loading

0 comments on commit f9f884b

Please sign in to comment.