Skip to content

Commit 2405698

Browse files
committed
save core jobs for fdogs.run to file
1 parent e728252 commit 2405698

File tree

2 files changed

+31
-11
lines changed

2 files changed

+31
-11
lines changed

fdog/libs/zzz.py

+13
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import urllib.request
2323
import yaml
2424
import time
25+
import pickle
2526

2627

2728
##### GENERAL FUNCTIONS FOR FILES, FOLDERS AND GENERAL VARIABLES #####
@@ -146,3 +147,15 @@ def join_2lists(first_list, second_list):
146147
in_second_but_not_in_first = in_second - in_first
147148
out = first_list + list(in_second_but_not_in_first)
148149
return(out)
150+
151+
152+
def save_pyobj(obj, out_file):
153+
""" Save a python object to out_file """
154+
with open(out_file, 'wb') as obj_out:
155+
pickle.dump(obj, obj_out)
156+
157+
158+
def read_pyobj_file(in_file):
159+
""" Read a python object from an in_file """
160+
with open(in_file, 'rb') as obj_file:
161+
return(pickle.load(obj_file))

fdog/runMulti.py

+18-11
Original file line numberDiff line numberDiff line change
@@ -57,23 +57,29 @@ def get_seed_name(seedFile):
5757
return(seqName)
5858

5959

60-
def compile_core(core_options, other_options, seeds, inFol, cpus, outpath, silentOff):
60+
def compile_core(core_options, other_options, seeds, inFol, cpus, outpath, silentOff, jobName):
6161
core_compilation_jobs = []
6262
(coreArgs, orthoCoreArgs, otherCoreArgs) = core_options
6363
(refspec, reuseCore, forceCore, pathArgs, debug) = other_options
6464
(outpath, hmmpath, corepath, searchpath, annopath) = pathArgs
6565
begin = time.time()
6666
print('Preparing core compilation jobs...')
67-
for seed in seeds:
68-
seqFile = ('%s/%s' % (inFol, seed))
69-
seqName = get_seed_name(seed)
70-
if not os.path.exists('%s/core_orthologs/%s/hmm_dir/%s.hmm' % (outpath, seqName, seqName)) or forceCore == True:
71-
seed_id = prepare_fn.identify_seed_id(seqFile, refspec, corepath, debug, silentOff)
72-
core_compilation_jobs.append([seqFile, seqName, refspec, seed_id,
73-
reuseCore, forceCore, coreArgs, pathArgs, orthoCoreArgs,
74-
otherCoreArgs, debug])
67+
core_job_file = '%s/%s_core_jobs.list' % (outpath, jobName)
68+
if os.path.exists(core_job_file) and os.stat(core_job_file).st_size > 0:
69+
print('... file contains jobs found (%s)' % core_job_file)
70+
core_compilation_jobs = general_fn.read_pyobj_file(core_job_file)
71+
else:
72+
for seed in seeds:
73+
seqFile = ('%s/%s' % (inFol, seed))
74+
seqName = get_seed_name(seed)
75+
if not os.path.exists('%s/core_orthologs/%s/hmm_dir/%s.hmm' % (outpath, seqName, seqName)) or forceCore == True:
76+
seed_id = prepare_fn.identify_seed_id(seqFile, refspec, corepath, debug, silentOff)
77+
core_compilation_jobs.append([seqFile, seqName, refspec, seed_id,
78+
reuseCore, forceCore, coreArgs, pathArgs, orthoCoreArgs,
79+
otherCoreArgs, debug])
80+
general_fn.save_pyobj(core_compilation_jobs, core_job_file)
7581
end = time.time()
76-
print('==> Preparing finished in %s\n' % '{:5.3f}s'.format(end - begin))
82+
print('==> %s jobs will be run. Preparing finished in %s' % (len(core_compilation_jobs), '{:5.3f}s'.format(end - begin)))
7783
if len(core_compilation_jobs) > 0:
7884
pool = mp.Pool(cpus)
7985
core_runtime = []
@@ -95,6 +101,7 @@ def search_ortholog(options, seeds, inFol, cpu, outpath):
95101
begin = time.time()
96102
seqFile = [inFol + '/' + seed]
97103
seqName = get_seed_name(seed)
104+
print('... %s' % seqName)
98105
if not os.path.exists('%s/%s.extended.fa' % (outpath, seqName)) or force == True:
99106
hamstr_out = ortho_fn.run_hamstr([seqName, refspec, pathArgs, orthoArgs, otherArgs])
100107
output_fn.write_hamstr(hamstr_out, outpath, seqName, force, append)
@@ -323,7 +330,7 @@ def main():
323330
otherCoreArgs = [cpus, debugCore, silentOff, noCleanup, force, append]
324331
core_options = [coreArgs, orthoCoreArgs, otherCoreArgs]
325332
other_options = [refspec, reuseCore, forceCore, pathArgs, debug]
326-
core_runtime = compile_core(core_options, other_options, seeds, inFol, cpus, outpath, silentOff)
333+
core_runtime = compile_core(core_options, other_options, seeds, inFol, cpus, outpath, silentOff, jobName)
327334
end = time.time()
328335
multi_core_time = '{:5.3f}'.format(end-start)
329336
print('==> Core compilation finished in %ss\n' % multi_core_time)

0 commit comments

Comments
 (0)