-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14 from GallVp/fasta_edta_lai
Added fasta_edta_lai
- Loading branch information
Showing
15 changed files
with
788 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
process EDTA_EDTA { | ||
tag "$meta.id" | ||
label 'process_high' | ||
|
||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/edta:2.1.0--hdfd78af_1': | ||
'biocontainers/edta:2.1.0--hdfd78af_1' }" | ||
|
||
input: | ||
tuple val(meta), path(fasta) | ||
path cds | ||
path curatedlib | ||
path rmout | ||
path exclude | ||
|
||
output: | ||
tuple val(meta), path('*.log') , emit: log | ||
tuple val(meta), path('*.EDTA.TElib.fa') , emit: te_lib_fasta | ||
tuple val(meta), path('*.EDTA.pass.list') , emit: pass_list , optional: true | ||
tuple val(meta), path('*.EDTA.out') , emit: out_file , optional: true | ||
tuple val(meta), path('*.EDTA.TEanno.gff3') , emit: te_anno_gff3 , optional: true | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def mod_file_name = "${fasta}.mod" | ||
def cds_file = cds ? "--cds $cds" : '' | ||
def curatedlib_file = curatedlib ? "--curatedlib $curatedlib": '' | ||
def rmout_file = rmout ? "--rmout $rmout" : '' | ||
def exclude_file = exclude ? "--exclude $exclude" : '' | ||
""" | ||
EDTA.pl \\ | ||
--genome $fasta \\ | ||
--threads $task.cpus \\ | ||
$cds_file \\ | ||
$curatedlib_file \\ | ||
$rmout_file \\ | ||
$exclude_file \\ | ||
$args \\ | ||
&> >(tee "${prefix}.log" 2>&1) | ||
mv \\ | ||
"${mod_file_name}.EDTA.TElib.fa" \\ | ||
"${prefix}.EDTA.TElib.fa" | ||
[ -f "${mod_file_name}.EDTA.raw/LTR/${mod_file_name}.pass.list" ] \\ | ||
&& mv \\ | ||
"${mod_file_name}.EDTA.raw/LTR/${mod_file_name}.pass.list" \\ | ||
"${prefix}.EDTA.pass.list" \\ | ||
|| echo "EDTA did not produce a pass.list file" | ||
[ -f "${mod_file_name}.EDTA.anno/${mod_file_name}.out" ] \\ | ||
&& mv \\ | ||
"${mod_file_name}.EDTA.anno/${mod_file_name}.out" \\ | ||
"${prefix}.EDTA.out" \\ | ||
|| echo "EDTA did not produce an out file" | ||
[ -f "${mod_file_name}.EDTA.TEanno.gff3" ] \\ | ||
&& mv \\ | ||
"${mod_file_name}.EDTA.TEanno.gff3" \\ | ||
"${prefix}.EDTA.TEanno.gff3" \\ | ||
|| echo "EDTA did not produce a TEanno gff3 file" | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}') | ||
END_VERSIONS | ||
""" | ||
|
||
stub: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def touch_pass_list = args.contains("--anno 1") ? "touch ${prefix}.EDTA.pass.list" : '' | ||
def touch_out_file = args.contains("--anno 1") ? "touch ${prefix}.EDTA.out" : '' | ||
def touch_te_anno = args.contains("--anno 1") ? "touch ${prefix}.EDTA.TEanno.gff3": '' | ||
""" | ||
touch "${prefix}.log" | ||
touch "${prefix}.EDTA.TElib.fa" | ||
$touch_pass_list | ||
$touch_out_file | ||
$touch_te_anno | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
EDTA: \$(EDTA.pl -h | awk ' /##### Extensive/ {print \$7}') | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json | ||
name: "edta_edta" | ||
description: Extensive de-novo TE Annotator (EDTA) | ||
keywords: | ||
- genome | ||
- repeat | ||
- annotation | ||
- transposable-elements | ||
tools: | ||
- "edta": | ||
description: Extensive de-novo TE Annotator (EDTA) | ||
homepage: "https://github.com/oushujun/EDTA" | ||
documentation: "https://github.com/oushujun/EDTA" | ||
tool_dev_url: "https://github.com/oushujun/EDTA" | ||
doi: "10.1186/s13059-019-1905-y" | ||
licence: ["GPL v3"] | ||
input: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'test' ]` | ||
- fasta: | ||
type: file | ||
description: Genome fasta file | ||
pattern: "*.{fsa,fa,fasta}" | ||
- cds: | ||
type: file | ||
description: | | ||
A FASTA file containing the coding sequence (no introns, UTRs, nor TEs) | ||
of this genome or its close relative | ||
pattern: "*.{fsa,fa,fasta}" | ||
- curatedlib: | ||
type: file | ||
description: | | ||
A curated library to keep consistent naming and classification for known TEs | ||
pattern: "*.liban" | ||
- rmout: | ||
type: file | ||
description: | | ||
Homology-based TE annotation instead of using the EDTA library for masking in | ||
RepeatMasker .out format | ||
pattern: "*.out" | ||
- exclude: | ||
type: file | ||
description: Exclude regions (bed format) from TE masking in the MAKER.masked output | ||
pattern: "*.bed" | ||
output: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'test' ]` | ||
- log: | ||
type: file | ||
description: Log emitted by EDTA | ||
pattern: "*.log" | ||
- te_lib_fasta: | ||
type: file | ||
description: A non-redundant TE library in fasta format | ||
pattern: "*.EDTA.TElib.fa" | ||
- pass_list: | ||
type: file | ||
description: A summary table of intact LTR-RTs with coordinate and structural information | ||
pattern: "*.EDTA.pass.list" | ||
- out_file: | ||
type: file | ||
description: RepeatMasker annotation of all LTR sequences in the genome | ||
pattern: "*.EDTA.out" | ||
- te_anno_gff3: | ||
type: file | ||
description: A gff3 file containing both structurally intact and fragmented TE annotations | ||
pattern: "*.EDTA.TEanno.gff3" | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
authors: | ||
- "@GallVp" | ||
maintainers: | ||
- "@GallVp" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
nextflow_process { | ||
|
||
name "Test Process EDTA_EDTA" | ||
script "../main.nf" | ||
process "EDTA_EDTA" | ||
config "./nextflow.config" | ||
|
||
tag "modules" | ||
tag "modules_gallvp" | ||
tag "edta" | ||
tag "edta/edta" | ||
tag "modules/nf-core/gunzip" | ||
|
||
test("actinidia_chinensis-genome_1_fasta_gz") { | ||
|
||
setup { | ||
run("GUNZIP") { | ||
script "../../../../../modules/nf-core/gunzip" | ||
|
||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test' ], | ||
file(params.test_data['actinidia_chinensis']['genome']['genome_1_fasta_gz'], checkIfExists: true) | ||
] | ||
""" | ||
} | ||
} | ||
} | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] = GUNZIP.out.gunzip | ||
input[1] = [] | ||
input[2] = [] | ||
input[3] = [] | ||
input[4] = [] | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out.versions).match("versions") }, | ||
{ assert path(process.out.te_lib_fasta[0][1]).text.contains('LTR/Copia') }, | ||
{ assert path(process.out.pass_list[0][1]).text.contains('Copia') }, | ||
{ assert process.out.out_file == [] }, | ||
{ assert process.out.te_anno_gff3 == [] } | ||
) | ||
} | ||
|
||
} | ||
|
||
test("stub") { | ||
|
||
options "-stub" | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test' ], // meta map | ||
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) | ||
] | ||
input[1] = [] | ||
input[2] = [] | ||
input[3] = [] | ||
input[4] = [] | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out).match() } | ||
) | ||
} | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
{ | ||
"versions": { | ||
"content": [ | ||
[ | ||
"versions.yml:md5,f9e6b414c1eb81520a9fdbb15f797405" | ||
] | ||
], | ||
"meta": { | ||
"nf-test": "0.8.4", | ||
"nextflow": "23.10.1" | ||
}, | ||
"timestamp": "2024-05-10T14:43:10.298103" | ||
}, | ||
"stub": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
[ | ||
{ | ||
"id": "test" | ||
}, | ||
"test.log:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"1": [ | ||
[ | ||
{ | ||
"id": "test" | ||
}, | ||
"test.EDTA.TElib.fa:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"2": [ | ||
|
||
], | ||
"3": [ | ||
|
||
], | ||
"4": [ | ||
|
||
], | ||
"5": [ | ||
"versions.yml:md5,f9e6b414c1eb81520a9fdbb15f797405" | ||
], | ||
"log": [ | ||
[ | ||
{ | ||
"id": "test" | ||
}, | ||
"test.log:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"out_file": [ | ||
|
||
], | ||
"pass_list": [ | ||
|
||
], | ||
"te_anno_gff3": [ | ||
|
||
], | ||
"te_lib_fasta": [ | ||
[ | ||
{ | ||
"id": "test" | ||
}, | ||
"test.EDTA.TElib.fa:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"versions": [ | ||
"versions.yml:md5,f9e6b414c1eb81520a9fdbb15f797405" | ||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.8.4", | ||
"nextflow": "23.10.1" | ||
}, | ||
"timestamp": "2024-05-10T14:43:16.561778" | ||
} | ||
} |
Oops, something went wrong.