@@ -78,111 +78,115 @@ def hamstr(args):
78
78
refspec_seqs = fasta_fn .read_fasta (refspec_fa )
79
79
search_seqs = fasta_fn .read_fasta (search_fa )
80
80
### (3) Do re-blast search for each hmm hit against refspec
81
- for hmm_hit in hmm_hits :
82
- if not hmm_hit == seed_id : # only if search taxon == refspec
83
- hmm_hit_fa = '%s/hmm_%s_%s_%s.fa' % (
84
- outpath , seqName , search_taxon , hmm_hit )
85
- with open (hmm_hit_fa , 'w' ) as hmm_fa_out :
86
- hmm_fa_out .write ('>%s\n %s' % (hmm_hit , search_seqs .fetch (hmm_hit )))
87
- blast_xml = blast_fn .do_blastsearch (
88
- hmm_hit_fa , refspec_db , evalBlast = evalBlast , lowComplexityFilter = lowComplexityFilter )
89
- blast_out = blast_fn .parse_blast_xml (blast_xml )
90
- output_fn .print_debug (debug , 'BLAST hits' , blast_out )
91
- if noCleanup == False :
92
- os .remove (hmm_hit_fa )
93
- ### (4) check reciprocity
94
- ### (4a) if refspec_seq_id == best blast hit
95
- if len (blast_out ['hits' ].keys ()) > 0 :
96
- best_blast_hit = list (blast_out ['hits' ].keys ())[0 ]
97
- if best_blast_hit == hmm_hit and len (blast_out ['hits' ].keys ()) > 1 :
98
- best_blast_hit = list (blast_out ['hits' ].keys ())[1 ]
99
- if seed_id == best_blast_hit :
100
- output_fn .print_stdout (
101
- silentOff ,
102
- '%s accepted (best blast hit is ref)' % (blast_out ['query' ]))
103
- ortho_candi [hmm_hit ] = search_seqs .fetch (hmm_hit )
104
- continue
105
- else :
106
- ### (4b) else, check for co-ortholog ref
107
- if checkCoorthologsRefOff == False :
108
- aln_fa = '%s/blast_%s_%s_%s_%s_%s.fa' % (
109
- outpath , seqName , seed_id , search_taxon ,
110
- hmm_hit , best_blast_hit )
111
- with open (aln_fa , 'w' ) as aln_fa_out :
112
- aln_fa_out .write (
113
- '>%s\n %s\n >%s\n %s\n >%s\n %s' % (
114
- seed_id , refspec_seqs .fetch (seed_id ),
115
- hmm_hit , search_seqs .fetch (hmm_hit ),
116
- best_blast_hit , refspec_seqs .fetch (best_blast_hit )
117
- )
118
- )
119
- fasta_fn .remove_dup (aln_fa )
120
- aln_seq = align_fn .do_align (aligner , aln_fa )
121
- output_fn .print_debug (
122
- debug , 'Alignment for checking co-ortholog ref' , aln_seq )
123
- br_dist = align_fn .calc_Kimura_dist (aln_seq , best_blast_hit , seed_id , debug )
124
- bh_dist = align_fn .calc_Kimura_dist (aln_seq , best_blast_hit , hmm_hit , debug )
125
- output_fn .print_debug (
126
- debug , 'Check if distance blast_vs_ref < blast_vs_hmm' ,
127
- 'd_br = %s; d_bh = %s' % (br_dist , bh_dist ))
128
- if noCleanup == False :
129
- os .remove (aln_fa )
130
- if br_dist == bh_dist == 0 or br_dist < bh_dist :
131
- output_fn .print_stdout (
132
- silentOff ,
133
- '%s accepted (best blast hit is co-ortholog to ref)'
134
- % (blast_out ['query' ])
135
- )
136
- ortho_candi [hmm_hit ] = search_seqs .fetch (hmm_hit )
137
- continue
138
- ### (5) check co-ortholog if more than 1 HMM hits are accepted
139
- if len (ortho_candi ) == 0 :
81
+ if len (hmm_hits ) == 0 :
140
82
output_fn .print_stdout (
141
- silentOff , 'WARNING: Reciprocity not fulfulled! No ortholog found!' )
83
+ silentOff , 'WARNING: No HMM hit found!' )
142
84
else :
143
- best_ortho = list (ortho_candi .keys ())[0 ]
144
- if not best_ortho == seed_id :
145
- ortho_final = fasta_fn .add_seq_to_dict (
146
- ortho_final , '%s|%s|%s|1' % (seqName , search_taxon , best_ortho ),
147
- ortho_candi [best_ortho ])
148
- if rep == False :
149
- if len (ortho_candi ) > 1 :
150
- aln_co_fa = '%s/coortho_%s_%s.fa' % (
151
- outpath , seqName , search_taxon )
152
- with open (aln_co_fa , 'w' ) as aln_co_fa_out :
153
- aln_co_fa_out .write (('>%s\n %s\n ' ) %
154
- (seed_id , refspec_seqs .fetch (seed_id )))
155
- for cand in ortho_candi :
156
- aln_co_fa_out .write (('>%s\n %s\n ' ) %
157
- (cand , ortho_candi [cand ]))
158
- aln_co_seq = align_fn .do_align (aligner , aln_co_fa )
159
- output_fn .print_debug (
160
- debug , 'Alignment for checking co-orthologs' , aln_co_seq )
85
+ for hmm_hit in hmm_hits :
86
+ if not hmm_hit == seed_id : # only if search taxon == refspec
87
+ hmm_hit_fa = '%s/hmm_%s_%s_%s.fa' % (
88
+ outpath , seqName , search_taxon , hmm_hit )
89
+ with open (hmm_hit_fa , 'w' ) as hmm_fa_out :
90
+ hmm_fa_out .write ('>%s\n %s' % (hmm_hit , search_seqs .fetch (hmm_hit )))
91
+ blast_xml = blast_fn .do_blastsearch (
92
+ hmm_hit_fa , refspec_db , evalBlast = evalBlast , lowComplexityFilter = lowComplexityFilter )
93
+ blast_out = blast_fn .parse_blast_xml (blast_xml )
94
+ output_fn .print_debug (debug , 'BLAST hits' , blast_out )
161
95
if noCleanup == False :
162
- os .remove (aln_co_fa )
163
- best_dist = align_fn .calc_Kimura_dist (
164
- aln_co_seq , seed_id , best_ortho , debug )
165
- for cand in ortho_candi :
166
- if not cand == best_ortho :
167
- candi_dist = align_fn .calc_Kimura_dist (
168
- aln_co_seq , best_ortho , cand , debug )
169
- output_fn .print_debug (
170
- debug ,
171
- 'Check if distance bestHmm_vs_ref > '
172
- + 'other_vs_bestHmm' ,
173
- 'd_best = %s; d_other = %s'
174
- % (best_dist , candi_dist ))
175
- if candi_dist < best_dist :
176
- if not cand == seed_id :
177
- ortho_final = fasta_fn .add_seq_to_dict (
178
- ortho_final ,
179
- '%s|%s|%s|0' \
180
- % (seqName , search_taxon , cand ),
181
- ortho_candi [cand ])
182
- output_fn .print_stdout (
183
- silentOff ,
184
- '=> %s orthologs found: %s'
185
- % (len (ortho_final ), list (ortho_final .keys ())))
96
+ os .remove (hmm_hit_fa )
97
+ ### (4) check reciprocity
98
+ ### (4a) if refspec_seq_id == best blast hit
99
+ if len (blast_out ['hits' ].keys ()) > 0 :
100
+ best_blast_hit = list (blast_out ['hits' ].keys ())[0 ]
101
+ if best_blast_hit == hmm_hit and len (blast_out ['hits' ].keys ()) > 1 :
102
+ best_blast_hit = list (blast_out ['hits' ].keys ())[1 ]
103
+ if seed_id == best_blast_hit :
104
+ output_fn .print_stdout (
105
+ silentOff ,
106
+ '%s accepted (best blast hit is ref)' % (blast_out ['query' ]))
107
+ ortho_candi [hmm_hit ] = search_seqs .fetch (hmm_hit )
108
+ continue
109
+ else :
110
+ ### (4b) else, check for co-ortholog ref
111
+ if checkCoorthologsRefOff == False :
112
+ aln_fa = '%s/blast_%s_%s_%s_%s_%s.fa' % (
113
+ outpath , seqName , seed_id , search_taxon ,
114
+ hmm_hit , best_blast_hit )
115
+ with open (aln_fa , 'w' ) as aln_fa_out :
116
+ aln_fa_out .write (
117
+ '>%s\n %s\n >%s\n %s\n >%s\n %s' % (
118
+ seed_id , refspec_seqs .fetch (seed_id ),
119
+ hmm_hit , search_seqs .fetch (hmm_hit ),
120
+ best_blast_hit , refspec_seqs .fetch (best_blast_hit )
121
+ )
122
+ )
123
+ fasta_fn .remove_dup (aln_fa )
124
+ aln_seq = align_fn .do_align (aligner , aln_fa )
125
+ output_fn .print_debug (
126
+ debug , 'Alignment for checking co-ortholog ref' , aln_seq )
127
+ br_dist = align_fn .calc_Kimura_dist (aln_seq , best_blast_hit , seed_id , debug )
128
+ bh_dist = align_fn .calc_Kimura_dist (aln_seq , best_blast_hit , hmm_hit , debug )
129
+ output_fn .print_debug (
130
+ debug , 'Check if distance blast_vs_ref < blast_vs_hmm' ,
131
+ 'd_br = %s; d_bh = %s' % (br_dist , bh_dist ))
132
+ if noCleanup == False :
133
+ os .remove (aln_fa )
134
+ if br_dist == bh_dist == 0 or br_dist < bh_dist :
135
+ output_fn .print_stdout (
136
+ silentOff ,
137
+ '%s accepted (best blast hit is co-ortholog to ref)'
138
+ % (blast_out ['query' ])
139
+ )
140
+ ortho_candi [hmm_hit ] = search_seqs .fetch (hmm_hit )
141
+ continue
142
+ ### (5) check co-ortholog if more than 1 HMM hits are accepted
143
+ if len (ortho_candi ) == 0 :
144
+ output_fn .print_stdout (
145
+ silentOff , 'WARNING: Reciprocity not fulfulled! No ortholog found!' )
146
+ else :
147
+ best_ortho = list (ortho_candi .keys ())[0 ]
148
+ if not best_ortho == seed_id :
149
+ ortho_final = fasta_fn .add_seq_to_dict (
150
+ ortho_final , '%s|%s|%s|1' % (seqName , search_taxon , best_ortho ),
151
+ ortho_candi [best_ortho ])
152
+ if rep == False :
153
+ if len (ortho_candi ) > 1 :
154
+ aln_co_fa = '%s/coortho_%s_%s.fa' % (
155
+ outpath , seqName , search_taxon )
156
+ with open (aln_co_fa , 'w' ) as aln_co_fa_out :
157
+ aln_co_fa_out .write (('>%s\n %s\n ' ) %
158
+ (seed_id , refspec_seqs .fetch (seed_id )))
159
+ for cand in ortho_candi :
160
+ aln_co_fa_out .write (('>%s\n %s\n ' ) %
161
+ (cand , ortho_candi [cand ]))
162
+ aln_co_seq = align_fn .do_align (aligner , aln_co_fa )
163
+ output_fn .print_debug (
164
+ debug , 'Alignment for checking co-orthologs' , aln_co_seq )
165
+ if noCleanup == False :
166
+ os .remove (aln_co_fa )
167
+ best_dist = align_fn .calc_Kimura_dist (
168
+ aln_co_seq , seed_id , best_ortho , debug )
169
+ for cand in ortho_candi :
170
+ if not cand == best_ortho :
171
+ candi_dist = align_fn .calc_Kimura_dist (
172
+ aln_co_seq , best_ortho , cand , debug )
173
+ output_fn .print_debug (
174
+ debug ,
175
+ 'Check if distance bestHmm_vs_ref > '
176
+ + 'other_vs_bestHmm' ,
177
+ 'd_best = %s; d_other = %s'
178
+ % (best_dist , candi_dist ))
179
+ if candi_dist < best_dist :
180
+ if not cand == seed_id :
181
+ ortho_final = fasta_fn .add_seq_to_dict (
182
+ ortho_final ,
183
+ '%s|%s|%s|0' \
184
+ % (seqName , search_taxon , cand ),
185
+ ortho_candi [cand ])
186
+ output_fn .print_stdout (
187
+ silentOff ,
188
+ '=> %s orthologs found: %s'
189
+ % (len (ortho_final ), list (ortho_final .keys ())))
186
190
return (ortho_final )
187
191
188
192
0 commit comments