Skip to content

Commit c955fac

Browse files
authored
Merge pull request #261 from nf-core/revert-258-combgc_new_input_param
Revert "ComBGC: Add functionality to screen whole sample directory (antismash)"
2 parents bc76195 + f6b2e37 commit c955fac

File tree

2 files changed

+8
-57
lines changed

2 files changed

+8
-57
lines changed

CHANGELOG.md

-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313
- [#251](https://github.com/nf-core/funcscan/pull/251) Added annotation tool: Pyrodigal. (by @jasmezz)
1414
- [#252](https://github.com/nf-core/funcscan/pull/252) Added a new parameter `-arg_rgi_savejson` that saves the file `<samplename>.json` in the RGI directory. The default ouput for RGI is now only `<samplename>.txt`. (by @darcy220606)
1515
- [#253](https://github.com/nf-core/funcscan/pull/253) Updated Prodigal to have compressed output files. (by @jasmezz)
16-
- [#258](https://github.com/nf-core/funcscan/pull/258) Added comBGC function to screen whole directory of antiSMASH output (one subfolder per sample). (by @jasmezz)
1716

1817
### `Fixed`
1918

bin/comBGC.py

+8-56
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
SOFTWARE.
3333
"""
3434

35-
tool_version = "0.6.0"
35+
tool_version = "0.5"
3636
welcome = """\
3737
........................
3838
* comBGC v.{version} *
@@ -61,9 +61,7 @@
6161
these can be:
6262
- antiSMASH: <sample name>.gbk and (optional) knownclusterblast/ directory
6363
- DeepBGC: <sample name>.bgc.tsv
64-
- GECCO: <sample name>.clusters.tsv
65-
Note: Please provide files from a single sample only. If you would like to
66-
summarize multiple samples, please see the --antismash_multiple_samples flag.""",
64+
- GECCO: <sample name>.clusters.tsv""",
6765
)
6866
parser.add_argument(
6967
"-o",
@@ -75,16 +73,6 @@
7573
type=str,
7674
default=".",
7775
)
78-
parser.add_argument(
79-
"-a",
80-
"--antismash_multiple_samples",
81-
metavar="PATH",
82-
dest="antismash_multiple_samples",
83-
nargs="?",
84-
help="""directory of antiSMASH output. Should contain subfolders (one per
85-
sample). Can only be used if --input is not specified.""",
86-
type=str,
87-
)
8876
parser.add_argument("-vv", "--verbose", help="increase output verbosity", action="store_true")
8977
parser.add_argument("-v", "--version", help="show version number and exit", action="store_true")
9078

@@ -93,7 +81,6 @@
9381

9482
# Assign input arguments to variables
9583
input = args.input
96-
dir_antismash = args.antismash_multiple_samples
9784
outdir = args.outdir
9885
verbose = args.verbose
9986
version = args.version
@@ -124,38 +111,15 @@
124111
elif path.endswith("knownclusterblast/"):
125112
input_antismash.append(path)
126113

127-
if input and dir_antismash:
128-
exit(
129-
"The flags --input and --antismash_multiple_samples are mutually exclusive.\nPlease use only one of them (or see --help for how to use)."
130-
)
131-
132114
# Make sure that at least one input argument is given
133-
if not (input_antismash or input_gecco or input_deepbgc or dir_antismash):
115+
if not (input_antismash or input_gecco or input_deepbgc):
134116
exit("Please specify at least one input file (i.e. output from antismash, deepbgc, or gecco) or see --help")
135117

136118
########################
137119
# ANTISMASH FUNCTIONS
138120
########################
139121

140122

141-
def prepare_multisample_input_antismash(antismash_dir):
142-
"""
143-
Prepare string of input paths of a given antiSMASH output folder (with sample subdirectories)
144-
"""
145-
sample_paths = []
146-
for root, subdirs, files in os.walk(antismash_dir):
147-
antismash_file = "/".join([root, "index.html"])
148-
if os.path.exists(antismash_file):
149-
sample = root.split("/")[-1]
150-
gbk_path = "/".join([root, sample]) + ".gbk"
151-
kkb_path = "/".join([root, "knownclusterblast"])
152-
if os.path.exists(kkb_path):
153-
sample_paths.append([gbk_path, kkb_path])
154-
else:
155-
sample_paths.append([gbk_path])
156-
return sample_paths
157-
158-
159123
def parse_knownclusterblast(kcb_file_path):
160124
"""
161125
Extract MIBiG IDs from knownclusterblast TXT file.
@@ -184,6 +148,9 @@ def antismash_workflow(antismash_paths):
184148
- Return data frame with aggregated info.
185149
"""
186150

151+
if verbose:
152+
print("\nParsing antiSMASH files\n... ", end="")
153+
187154
antismash_sum_cols = [
188155
"Sample_ID",
189156
"Prediction_tool",
@@ -219,9 +186,6 @@ def antismash_workflow(antismash_paths):
219186

220187
# Aggregate information
221188
Sample_ID = gbk_path.split("/")[-1].split(".gbk")[-2] # Assuming file name equals sample name
222-
if verbose:
223-
print("\nParsing antiSMASH file(s): " + Sample_ID + "\n... ", end="")
224-
225189
with open(gbk_path) as gbk:
226190
for record in SeqIO.parse(gbk, "genbank"): # GBK records are contigs in this case
227191
# Initiate variables per contig
@@ -550,13 +514,7 @@ def gecco_workflow(gecco_paths):
550514
########################
551515

552516
if __name__ == "__main__":
553-
if input_antismash:
554-
tools = {"antiSMASH": input_antismash, "deepBGC": input_deepbgc, "GECCO": input_gecco}
555-
elif dir_antismash:
556-
tools = {"antiSMASH": dir_antismash}
557-
else:
558-
tools = {"deepBGC": input_deepbgc, "GECCO": input_gecco}
559-
517+
tools = {"antiSMASH": input_antismash, "deepBGC": input_deepbgc, "GECCO": input_gecco}
560518
tools_provided = {}
561519

562520
for tool in tools.keys():
@@ -574,13 +532,7 @@ def gecco_workflow(gecco_paths):
574532

575533
for tool in tools_provided.keys():
576534
if tool == "antiSMASH":
577-
if dir_antismash:
578-
antismash_paths = prepare_multisample_input_antismash(dir_antismash)
579-
for input_antismash in antismash_paths:
580-
summary_antismash_temp = antismash_workflow(input_antismash)
581-
summary_antismash = pd.concat([summary_antismash, summary_antismash_temp])
582-
else:
583-
summary_antismash = antismash_workflow(input_antismash)
535+
summary_antismash = antismash_workflow(input_antismash)
584536
elif tool == "deepBGC":
585537
summary_deepbgc = deepbgc_workflow(input_deepbgc)
586538
elif tool == "GECCO":

0 commit comments

Comments
 (0)