Skip to content

Commit 5cb0946

Browse files
committed
added all searchTaxa to phyloprofile
1 parent 3e0859b commit 5cb0946

File tree

5 files changed

+60
-7
lines changed

5 files changed

+60
-7
lines changed

fdog/libs/output.py

+22
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,25 @@ def hamstr_2_profile(fa_file):
9090
for id in list(fa.keys()):
9191
tmp = id.split('|')
9292
pp.write('%s\tncbi%s\t%s\n' % (tmp[0], tmp[1].split('@')[1], id))
93+
94+
95+
def add_all_taxa(pp_file, searchTaxa):
96+
""" Add all "missing" search taxa into phyloprofile file """
97+
missing_taxa = [] # missing_taxa = [ncbi_id]
98+
for taxon in searchTaxa.split(','):
99+
flag = general_fn.search_string_in_file(pp_file, taxon)
100+
if flag == 0:
101+
missing_taxa.append(taxon.split('@')[1])
102+
first_gene = ''
103+
if os.path.exists(pp_file):
104+
with open(pp_file, 'a') as pp:
105+
for line in general_fn.read_file(pp_file):
106+
if not line.startswith('geneID'):
107+
if not first_gene:
108+
first_gene = line.split('\t')[0]
109+
for i in missing_taxa:
110+
if len(line.split('\t')) == 5:
111+
pp.write(f'{first_gene}\tncbi{i}\tNA\tNA\tNA\n')
112+
else:
113+
pp.write(f'{first_gene}\tncbi{i}\tNA\n')
114+
break

fdog/libs/zzz.py

+13
Original file line numberDiff line numberDiff line change
@@ -181,3 +181,16 @@ def query_yes_no(question, default='yes'):
181181
else:
182182
sys.stdout.write('Please respond with "yes" or "no" '
183183
'(or "y" or "n").\n')
184+
185+
186+
def search_string_in_file(file, string):
187+
""" Search for a string in file
188+
Return 0 if not found, 1 if found
189+
"""
190+
flag = 0
191+
with open(file, 'r') as fp:
192+
for l_no, line in enumerate(fp):
193+
if string in line:
194+
flag = 1
195+
break
196+
return(flag)

fdog/runMulti.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ def main():
320320

321321
begin = time.time()
322322
##### Check and group parameters
323-
print('Preparing & Checking...')
323+
print('##### PREPARING & CHECKING #####')
324324
(inFol, hmmpath, corepath, searchpath, annopath) = prepare_fn.check_input(
325325
[inFol, refspec, outpath, hmmpath,
326326
corepath, searchpath, annopath, pathFile])
@@ -356,7 +356,7 @@ def main():
356356

357357
##### DO CORE COMPILATION
358358
if reuseCore == False:
359-
print('Starting compiling core orthologs...')
359+
print('##### COMPILING CORE ORTHOLOG GROUPS #####')
360360
start = time.time()
361361
coreArgs = [minDist, maxDist, coreSize, coreTaxa, distDeviation,
362362
alnStrategy, fasOff]
@@ -382,7 +382,7 @@ def main():
382382
##### DO ORTHOLOG SEARCH USING HMM (HAMSTR)
383383
finalFa = '%s/%s.extended.fa' % (outpath, jobName)
384384
if not coreOnly:
385-
print('Searching orthologs...')
385+
print('##### SEARCHING ORTHOLOGS #####')
386386
start = time.time()
387387
if not os.path.exists(finalFa) or force == True:
388388
### get list of search taxa
@@ -430,6 +430,7 @@ def main():
430430

431431
##### DO FINAL FAS CALCULATION
432432
if not fasOff:
433+
print('##### CALCULATING FAS SCORES #####')
433434
try:
434435
fasVersion = subprocess.run(['fas.run --version'], shell = True, capture_output = True, check = True)
435436
except:
@@ -443,6 +444,13 @@ def main():
443444
else:
444445
output_fn.hamstr_2_profile(finalFa)
445446

447+
##### ADD ALL SEARCH TAXA INTO PhyloProfile OUTPUT
448+
pp_file = f'{outpath}/{jobName}.phyloprofile'
449+
if not searchTaxa:
450+
tmp = general_fn.read_dir(searchpath)
451+
searchTaxa = ','.join(tmp)
452+
output_fn.add_all_taxa(pp_file, searchTaxa)
453+
446454
end = time.time()
447455
print('==> fdogs.run finished in ' + '{:5.3f}s'.format(end - begin))
448456

fdog/runSingle.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from pkg_resources import get_distribution
2424
import time
2525

26+
import fdog.libs.zzz as general_fn
2627
import fdog.libs.preparation as prepare_fn
2728
import fdog.libs.orthosearch as ortho_fn
2829
import fdog.libs.corecompile as core_fn
@@ -189,7 +190,7 @@ def main():
189190

190191
begin = time.time()
191192
##### Check and group parameters
192-
print('Preparing & Checking...')
193+
print('##### PREPARING & CHECKING #####')
193194
if seqFile == 'infile.fa':
194195
fdogPath = os.path.realpath(__file__).replace('/runSingle.py','')
195196
seqFile = '%s/data/infile.fa' % fdogPath
@@ -224,7 +225,7 @@ def main():
224225
seed_id = prepare_fn.get_seed_id_from_fa(core_fa, refspec)
225226
else:
226227
seed_id = prepare_fn.identify_seed_id(seqFile, refspec, corepath, debug, silentOff)
227-
print('Identified seed ID: %s' % seed_id)
228+
print('==> Identified seed ID: %s' % seed_id)
228229

229230
##### DO CORE COMPILATION
230231
# start = time.time()
@@ -234,7 +235,7 @@ def main():
234235
lowComplexityFilter, evalHmmer/10, coreHitLimit,
235236
scoreCutoff, aligner] # rep = True; e-value cutoff is 10x more stringent than from ortho search
236237
otherCoreArgs = [cpus, debugCore, silentOff, noCleanup, force, append]
237-
print('Compiling core set for %s' % seqName)
238+
print('##### COMPILING CORE SET FOR %s #####' % seqName)
238239
core_runtime = core_fn.run_compile_core([seqFile, seqName, refspec, seed_id, reuseCore,
239240
forceCore, coreArgs, pathArgs, orthoCoreArgs, otherCoreArgs, debug])
240241
print('==> Core compilation finished in %s' % core_runtime[1])
@@ -243,6 +244,7 @@ def main():
243244
##### DO ORTHOLOG SEARCH USING CORE HMM (HAMSTR)
244245
if not coreOnly:
245246
start = time.time()
247+
print('##### SEARCHING ORTHOLOGS #####')
246248
# check existing output
247249
finalOutfile = '%s/%s.extended.fa' % (outpath, seqName)
248250
finalOutfile = os.path.abspath(finalOutfile)
@@ -278,6 +280,7 @@ def main():
278280

279281
##### DO FINAL FAS CALCULATION
280282
if not fasOff:
283+
print('##### CALCULATING FAS SCORES #####')
281284
try:
282285
fasVersion = subprocess.run(['fas.run --version'], shell = True, capture_output = True, check = True)
283286
except:
@@ -290,6 +293,13 @@ def main():
290293
else:
291294
output_fn.hamstr_2_profile(finalOutfile)
292295

296+
##### ADD ALL SEARCH TAXA INTO PhyloProfile OUTPUT
297+
pp_file = f'{outpath}/{seqName}.phyloprofile'
298+
if not searchTaxa:
299+
tmp = general_fn.read_dir(searchpath)
300+
searchTaxa = ','.join(tmp)
301+
output_fn.add_all_taxa(pp_file, searchTaxa)
302+
293303
end = time.time()
294304
print('==> fdog.run finished in ' + '{:5.3f}s'.format(end - begin))
295305

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
setup(
2828
name="fdog",
29-
version="0.1.19",
29+
version="0.1.20",
3030
python_requires='>=3.7.0',
3131
description="Feature-aware Directed OrtholoG search tool",
3232
long_description=long_description,

0 commit comments

Comments
 (0)