Skip to content

Commit 48372cf

Browse files
committed
added warning for no hmm hit
1 parent a8541f8 commit 48372cf

File tree

2 files changed

+107
-103
lines changed

2 files changed

+107
-103
lines changed

fdog/libs/orthosearch.py

+106-102
Original file line numberDiff line numberDiff line change
@@ -78,111 +78,115 @@ def hamstr(args):
7878
refspec_seqs = fasta_fn.read_fasta(refspec_fa)
7979
search_seqs = fasta_fn.read_fasta(search_fa)
8080
### (3) Do re-blast search for each hmm hit against refspec
81-
for hmm_hit in hmm_hits:
82-
if not hmm_hit == seed_id: # only if search taxon == refspec
83-
hmm_hit_fa = '%s/hmm_%s_%s_%s.fa' % (
84-
outpath, seqName, search_taxon, hmm_hit)
85-
with open(hmm_hit_fa, 'w') as hmm_fa_out:
86-
hmm_fa_out.write('>%s\n%s' % (hmm_hit, search_seqs.fetch(hmm_hit)))
87-
blast_xml = blast_fn.do_blastsearch(
88-
hmm_hit_fa, refspec_db, evalBlast = evalBlast, lowComplexityFilter = lowComplexityFilter)
89-
blast_out = blast_fn.parse_blast_xml(blast_xml)
90-
output_fn.print_debug(debug, 'BLAST hits', blast_out)
91-
if noCleanup == False:
92-
os.remove(hmm_hit_fa)
93-
### (4) check reciprocity
94-
### (4a) if refspec_seq_id == best blast hit
95-
if len(blast_out['hits'].keys()) > 0:
96-
best_blast_hit = list(blast_out['hits'].keys())[0]
97-
if best_blast_hit == hmm_hit and len(blast_out['hits'].keys()) > 1:
98-
best_blast_hit = list(blast_out['hits'].keys())[1]
99-
if seed_id == best_blast_hit:
100-
output_fn.print_stdout(
101-
silentOff,
102-
'%s accepted (best blast hit is ref)' % (blast_out['query']))
103-
ortho_candi[hmm_hit] = search_seqs.fetch(hmm_hit)
104-
continue
105-
else:
106-
### (4b) else, check for co-ortholog ref
107-
if checkCoorthologsRefOff == False:
108-
aln_fa = '%s/blast_%s_%s_%s_%s_%s.fa' % (
109-
outpath, seqName, seed_id, search_taxon,
110-
hmm_hit, best_blast_hit)
111-
with open(aln_fa, 'w') as aln_fa_out:
112-
aln_fa_out.write(
113-
'>%s\n%s\n>%s\n%s\n>%s\n%s' % (
114-
seed_id, refspec_seqs.fetch(seed_id),
115-
hmm_hit, search_seqs.fetch(hmm_hit),
116-
best_blast_hit, refspec_seqs.fetch(best_blast_hit)
117-
)
118-
)
119-
fasta_fn.remove_dup(aln_fa)
120-
aln_seq = align_fn.do_align(aligner, aln_fa)
121-
output_fn.print_debug(
122-
debug, 'Alignment for checking co-ortholog ref', aln_seq)
123-
br_dist = align_fn.calc_Kimura_dist(aln_seq, best_blast_hit, seed_id, debug)
124-
bh_dist = align_fn.calc_Kimura_dist(aln_seq, best_blast_hit, hmm_hit, debug)
125-
output_fn.print_debug(
126-
debug, 'Check if distance blast_vs_ref < blast_vs_hmm',
127-
'd_br = %s; d_bh = %s' % (br_dist, bh_dist))
128-
if noCleanup == False:
129-
os.remove(aln_fa)
130-
if br_dist == bh_dist == 0 or br_dist < bh_dist:
131-
output_fn.print_stdout(
132-
silentOff,
133-
'%s accepted (best blast hit is co-ortholog to ref)'
134-
% (blast_out['query'])
135-
)
136-
ortho_candi[hmm_hit] = search_seqs.fetch(hmm_hit)
137-
continue
138-
### (5) check co-ortholog if more than 1 HMM hits are accepted
139-
if len(ortho_candi) == 0:
81+
if len(hmm_hits) == 0:
14082
output_fn.print_stdout(
141-
silentOff, 'WARNING: Reciprocity not fulfulled! No ortholog found!')
83+
silentOff, 'WARNING: No HMM hit found!')
14284
else:
143-
best_ortho = list(ortho_candi.keys())[0]
144-
if not best_ortho == seed_id:
145-
ortho_final = fasta_fn.add_seq_to_dict(
146-
ortho_final, '%s|%s|%s|1' % (seqName, search_taxon, best_ortho),
147-
ortho_candi[best_ortho])
148-
if rep == False:
149-
if len(ortho_candi) > 1:
150-
aln_co_fa = '%s/coortho_%s_%s.fa' % (
151-
outpath, seqName, search_taxon)
152-
with open(aln_co_fa, 'w') as aln_co_fa_out:
153-
aln_co_fa_out.write(('>%s\n%s\n') %
154-
(seed_id, refspec_seqs.fetch(seed_id)))
155-
for cand in ortho_candi:
156-
aln_co_fa_out.write(('>%s\n%s\n') %
157-
(cand, ortho_candi[cand]))
158-
aln_co_seq = align_fn.do_align(aligner, aln_co_fa)
159-
output_fn.print_debug(
160-
debug, 'Alignment for checking co-orthologs', aln_co_seq)
85+
for hmm_hit in hmm_hits:
86+
if not hmm_hit == seed_id: # only if search taxon == refspec
87+
hmm_hit_fa = '%s/hmm_%s_%s_%s.fa' % (
88+
outpath, seqName, search_taxon, hmm_hit)
89+
with open(hmm_hit_fa, 'w') as hmm_fa_out:
90+
hmm_fa_out.write('>%s\n%s' % (hmm_hit, search_seqs.fetch(hmm_hit)))
91+
blast_xml = blast_fn.do_blastsearch(
92+
hmm_hit_fa, refspec_db, evalBlast = evalBlast, lowComplexityFilter = lowComplexityFilter)
93+
blast_out = blast_fn.parse_blast_xml(blast_xml)
94+
output_fn.print_debug(debug, 'BLAST hits', blast_out)
16195
if noCleanup == False:
162-
os.remove(aln_co_fa)
163-
best_dist = align_fn.calc_Kimura_dist(
164-
aln_co_seq, seed_id, best_ortho, debug)
165-
for cand in ortho_candi:
166-
if not cand == best_ortho:
167-
candi_dist = align_fn.calc_Kimura_dist(
168-
aln_co_seq, best_ortho, cand, debug)
169-
output_fn.print_debug(
170-
debug,
171-
'Check if distance bestHmm_vs_ref > '
172-
+ 'other_vs_bestHmm',
173-
'd_best = %s; d_other = %s'
174-
% (best_dist, candi_dist))
175-
if candi_dist < best_dist:
176-
if not cand == seed_id:
177-
ortho_final = fasta_fn.add_seq_to_dict(
178-
ortho_final,
179-
'%s|%s|%s|0' \
180-
% (seqName, search_taxon, cand),
181-
ortho_candi[cand])
182-
output_fn.print_stdout(
183-
silentOff,
184-
'=> %s orthologs found: %s'
185-
% (len(ortho_final), list(ortho_final.keys())))
96+
os.remove(hmm_hit_fa)
97+
### (4) check reciprocity
98+
### (4a) if refspec_seq_id == best blast hit
99+
if len(blast_out['hits'].keys()) > 0:
100+
best_blast_hit = list(blast_out['hits'].keys())[0]
101+
if best_blast_hit == hmm_hit and len(blast_out['hits'].keys()) > 1:
102+
best_blast_hit = list(blast_out['hits'].keys())[1]
103+
if seed_id == best_blast_hit:
104+
output_fn.print_stdout(
105+
silentOff,
106+
'%s accepted (best blast hit is ref)' % (blast_out['query']))
107+
ortho_candi[hmm_hit] = search_seqs.fetch(hmm_hit)
108+
continue
109+
else:
110+
### (4b) else, check for co-ortholog ref
111+
if checkCoorthologsRefOff == False:
112+
aln_fa = '%s/blast_%s_%s_%s_%s_%s.fa' % (
113+
outpath, seqName, seed_id, search_taxon,
114+
hmm_hit, best_blast_hit)
115+
with open(aln_fa, 'w') as aln_fa_out:
116+
aln_fa_out.write(
117+
'>%s\n%s\n>%s\n%s\n>%s\n%s' % (
118+
seed_id, refspec_seqs.fetch(seed_id),
119+
hmm_hit, search_seqs.fetch(hmm_hit),
120+
best_blast_hit, refspec_seqs.fetch(best_blast_hit)
121+
)
122+
)
123+
fasta_fn.remove_dup(aln_fa)
124+
aln_seq = align_fn.do_align(aligner, aln_fa)
125+
output_fn.print_debug(
126+
debug, 'Alignment for checking co-ortholog ref', aln_seq)
127+
br_dist = align_fn.calc_Kimura_dist(aln_seq, best_blast_hit, seed_id, debug)
128+
bh_dist = align_fn.calc_Kimura_dist(aln_seq, best_blast_hit, hmm_hit, debug)
129+
output_fn.print_debug(
130+
debug, 'Check if distance blast_vs_ref < blast_vs_hmm',
131+
'd_br = %s; d_bh = %s' % (br_dist, bh_dist))
132+
if noCleanup == False:
133+
os.remove(aln_fa)
134+
if br_dist == bh_dist == 0 or br_dist < bh_dist:
135+
output_fn.print_stdout(
136+
silentOff,
137+
'%s accepted (best blast hit is co-ortholog to ref)'
138+
% (blast_out['query'])
139+
)
140+
ortho_candi[hmm_hit] = search_seqs.fetch(hmm_hit)
141+
continue
142+
### (5) check co-ortholog if more than 1 HMM hits are accepted
143+
if len(ortho_candi) == 0:
144+
output_fn.print_stdout(
145+
silentOff, 'WARNING: Reciprocity not fulfulled! No ortholog found!')
146+
else:
147+
best_ortho = list(ortho_candi.keys())[0]
148+
if not best_ortho == seed_id:
149+
ortho_final = fasta_fn.add_seq_to_dict(
150+
ortho_final, '%s|%s|%s|1' % (seqName, search_taxon, best_ortho),
151+
ortho_candi[best_ortho])
152+
if rep == False:
153+
if len(ortho_candi) > 1:
154+
aln_co_fa = '%s/coortho_%s_%s.fa' % (
155+
outpath, seqName, search_taxon)
156+
with open(aln_co_fa, 'w') as aln_co_fa_out:
157+
aln_co_fa_out.write(('>%s\n%s\n') %
158+
(seed_id, refspec_seqs.fetch(seed_id)))
159+
for cand in ortho_candi:
160+
aln_co_fa_out.write(('>%s\n%s\n') %
161+
(cand, ortho_candi[cand]))
162+
aln_co_seq = align_fn.do_align(aligner, aln_co_fa)
163+
output_fn.print_debug(
164+
debug, 'Alignment for checking co-orthologs', aln_co_seq)
165+
if noCleanup == False:
166+
os.remove(aln_co_fa)
167+
best_dist = align_fn.calc_Kimura_dist(
168+
aln_co_seq, seed_id, best_ortho, debug)
169+
for cand in ortho_candi:
170+
if not cand == best_ortho:
171+
candi_dist = align_fn.calc_Kimura_dist(
172+
aln_co_seq, best_ortho, cand, debug)
173+
output_fn.print_debug(
174+
debug,
175+
'Check if distance bestHmm_vs_ref > '
176+
+ 'other_vs_bestHmm',
177+
'd_best = %s; d_other = %s'
178+
% (best_dist, candi_dist))
179+
if candi_dist < best_dist:
180+
if not cand == seed_id:
181+
ortho_final = fasta_fn.add_seq_to_dict(
182+
ortho_final,
183+
'%s|%s|%s|0' \
184+
% (seqName, search_taxon, cand),
185+
ortho_candi[cand])
186+
output_fn.print_stdout(
187+
silentOff,
188+
'=> %s orthologs found: %s'
189+
% (len(ortho_final), list(ortho_final.keys())))
186190
return(ortho_final)
187191

188192

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
setup(
2828
name="fdog",
29-
version="0.1.16",
29+
version="0.1.17",
3030
python_requires='>=3.7.0',
3131
description="Feature-aware Directed OrtholoG search tool",
3232
long_description=long_description,

0 commit comments

Comments
 (0)