32
32
SOFTWARE.
33
33
"""
34
34
35
- tool_version = "0.6.0 "
35
+ tool_version = "0.5 "
36
36
welcome = """\
37
37
........................
38
38
* comBGC v.{version} *
61
61
these can be:
62
62
- antiSMASH: <sample name>.gbk and (optional) knownclusterblast/ directory
63
63
- DeepBGC: <sample name>.bgc.tsv
64
- - GECCO: <sample name>.clusters.tsv
65
- Note: Please provide files from a single sample only. If you would like to
66
- summarize multiple samples, please see the --antismash_multiple_samples flag.""" ,
64
+ - GECCO: <sample name>.clusters.tsv""" ,
67
65
)
68
66
parser .add_argument (
69
67
"-o" ,
75
73
type = str ,
76
74
default = "." ,
77
75
)
78
- parser .add_argument (
79
- "-a" ,
80
- "--antismash_multiple_samples" ,
81
- metavar = "PATH" ,
82
- dest = "antismash_multiple_samples" ,
83
- nargs = "?" ,
84
- help = """directory of antiSMASH output. Should contain subfolders (one per
85
- sample). Can only be used if --input is not specified.""" ,
86
- type = str ,
87
- )
88
76
parser .add_argument ("-vv" , "--verbose" , help = "increase output verbosity" , action = "store_true" )
89
77
parser .add_argument ("-v" , "--version" , help = "show version number and exit" , action = "store_true" )
90
78
93
81
94
82
# Assign input arguments to variables
95
83
input = args .input
96
- dir_antismash = args .antismash_multiple_samples
97
84
outdir = args .outdir
98
85
verbose = args .verbose
99
86
version = args .version
124
111
elif path .endswith ("knownclusterblast/" ):
125
112
input_antismash .append (path )
126
113
127
- if input and dir_antismash :
128
- exit (
129
- "The flags --input and --antismash_multiple_samples are mutually exclusive.\n Please use only one of them (or see --help for how to use)."
130
- )
131
-
132
114
# Make sure that at least one input argument is given
133
- if not (input_antismash or input_gecco or input_deepbgc or dir_antismash ):
115
+ if not (input_antismash or input_gecco or input_deepbgc ):
134
116
exit ("Please specify at least one input file (i.e. output from antismash, deepbgc, or gecco) or see --help" )
135
117
136
118
########################
137
119
# ANTISMASH FUNCTIONS
138
120
########################
139
121
140
122
141
- def prepare_multisample_input_antismash (antismash_dir ):
142
- """
143
- Prepare string of input paths of a given antiSMASH output folder (with sample subdirectories)
144
- """
145
- sample_paths = []
146
- for root , subdirs , files in os .walk (antismash_dir ):
147
- antismash_file = "/" .join ([root , "index.html" ])
148
- if os .path .exists (antismash_file ):
149
- sample = root .split ("/" )[- 1 ]
150
- gbk_path = "/" .join ([root , sample ]) + ".gbk"
151
- kkb_path = "/" .join ([root , "knownclusterblast" ])
152
- if os .path .exists (kkb_path ):
153
- sample_paths .append ([gbk_path , kkb_path ])
154
- else :
155
- sample_paths .append ([gbk_path ])
156
- return sample_paths
157
-
158
-
159
123
def parse_knownclusterblast (kcb_file_path ):
160
124
"""
161
125
Extract MIBiG IDs from knownclusterblast TXT file.
@@ -184,6 +148,9 @@ def antismash_workflow(antismash_paths):
184
148
- Return data frame with aggregated info.
185
149
"""
186
150
151
+ if verbose :
152
+ print ("\n Parsing antiSMASH files\n ... " , end = "" )
153
+
187
154
antismash_sum_cols = [
188
155
"Sample_ID" ,
189
156
"Prediction_tool" ,
@@ -219,9 +186,6 @@ def antismash_workflow(antismash_paths):
219
186
220
187
# Aggregate information
221
188
Sample_ID = gbk_path .split ("/" )[- 1 ].split (".gbk" )[- 2 ] # Assuming file name equals sample name
222
- if verbose :
223
- print ("\n Parsing antiSMASH file(s): " + Sample_ID + "\n ... " , end = "" )
224
-
225
189
with open (gbk_path ) as gbk :
226
190
for record in SeqIO .parse (gbk , "genbank" ): # GBK records are contigs in this case
227
191
# Initiate variables per contig
@@ -550,13 +514,7 @@ def gecco_workflow(gecco_paths):
550
514
########################
551
515
552
516
if __name__ == "__main__" :
553
- if input_antismash :
554
- tools = {"antiSMASH" : input_antismash , "deepBGC" : input_deepbgc , "GECCO" : input_gecco }
555
- elif dir_antismash :
556
- tools = {"antiSMASH" : dir_antismash }
557
- else :
558
- tools = {"deepBGC" : input_deepbgc , "GECCO" : input_gecco }
559
-
517
+ tools = {"antiSMASH" : input_antismash , "deepBGC" : input_deepbgc , "GECCO" : input_gecco }
560
518
tools_provided = {}
561
519
562
520
for tool in tools .keys ():
@@ -574,13 +532,7 @@ def gecco_workflow(gecco_paths):
574
532
575
533
for tool in tools_provided .keys ():
576
534
if tool == "antiSMASH" :
577
- if dir_antismash :
578
- antismash_paths = prepare_multisample_input_antismash (dir_antismash )
579
- for input_antismash in antismash_paths :
580
- summary_antismash_temp = antismash_workflow (input_antismash )
581
- summary_antismash = pd .concat ([summary_antismash , summary_antismash_temp ])
582
- else :
583
- summary_antismash = antismash_workflow (input_antismash )
535
+ summary_antismash = antismash_workflow (input_antismash )
584
536
elif tool == "deepBGC" :
585
537
summary_deepbgc = deepbgc_workflow (input_deepbgc )
586
538
elif tool == "GECCO" :
0 commit comments