Skip to content

Commit

Permalink
check duplicated ids in mergeOutput for fasta files
Browse files Browse the repository at this point in the history
  • Loading branch information
trvinh committed Jun 6, 2024
1 parent 3893e66 commit e9340cf
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
12 changes: 8 additions & 4 deletions fdog/mergeOutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import argparse
import yaml
from pkg_resources import get_distribution
from Bio import SeqIO

def createConfigPP(phyloprofile, domains_0, ex_fasta, directory, out):
settings = dict(
Expand Down Expand Up @@ -57,6 +58,7 @@ def main():
ex_fasta = None
lines_seen = set()
lines_seen_2 = set()
fa_seq_id = set()
for infile in ldir(directory):
if infile.endswith('.phyloprofile') and not infile == out + '.phyloprofile':
if not phyloprofile:
Expand Down Expand Up @@ -91,10 +93,12 @@ def main():
if not ex_fasta:
ex_fasta = out + '.extended.fa'
ex_fasta_out = open(ex_fasta, 'w')
with open(directory + '/' + infile, 'r') as reader:
lines = reader.readlines()
for line in lines:
ex_fasta_out.write(line)
inSeq = SeqIO.to_dict((SeqIO.parse(open(directory + '/' + infile), 'fasta')))
for seq in inSeq:
if not seq in fa_seq_id:
ex_fasta_out.write('>%s\n%s\n' % (seq, inSeq[seq].seq))
fa_seq_id.add(seq)

if phyloprofile:
phyloprofile_out.close()
if domains_0:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

setup(
name="fdog",
version="0.1.31",
version="0.1.32",
python_requires='>=3.7.0',
description="Feature-aware Directed OrtholoG search tool",
long_description=long_description,
Expand Down

0 comments on commit e9340cf

Please sign in to comment.