Skip to content

Commit 47c5b19

Browse files
authored
Merge pull request #27 from cov-lineages/dev
Dev
2 parents d4e505d + 8ad38a1 commit 47c5b19

File tree

6 files changed

+97
-43
lines changed

6 files changed

+97
-43
lines changed

scorpio/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
_program = "scorpio"
2-
__version__ = "0.3.9"
2+
__version__ = "0.3.11"

scorpio/__main__.py

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import argparse
44
import sys
55
import os
6+
import logging
67

78
import constellations
89

@@ -22,20 +23,18 @@ def main(sysargs = sys.argv[1:]):
2223
parser.add_argument("-cv", "--constellations-version", action='version', version=f"constellations {constellations.__version__}", help="show constellation's version number and exit")
2324

2425
subparsers = parser.add_subparsers(
25-
title="Available subcommands", help="", metavar=""
26+
title="Available subcommands", help="", metavar="", dest='command'
2627
)
2728
# _______________________________ common _________________________________#
2829
common = argparse.ArgumentParser(prog=_program, add_help=False)
2930

3031
io_group = common.add_argument_group('Input/output options')
31-
io_group.add_argument("-i", "--input", dest="input", required=True, help="Primary input file")
32+
io_group.add_argument("-i", "--input", dest="input", required=False, help="Primary input file")
3233
io_group.add_argument("-m", "--metadata", dest="metadata", required=False, help="CSV of associated metadata")
33-
3434
io_group.add_argument("-o", "--output", dest="output", required=False, help="Output file or path")
3535
io_group.add_argument("-p", "--prefix", dest="prefix", required=False, help="Output prefix. Default: scorpio")
3636
io_group.add_argument("--log-file", dest="log_file", metavar='<filename>', required=False,
3737
help="Log file to use (otherwise uses stdout)")
38-
io_group.add_argument("--config", action="store", help="Input config file", dest="config")
3938

4039
constellation_group = common.add_argument_group('Constellation options')
4140
constellation_group.add_argument("-c", "--constellations", dest="constellations", required=False, nargs='+',
@@ -51,9 +50,6 @@ def main(sysargs = sys.argv[1:]):
5150
help="Extra mutations to type")
5251

5352
misc_group = common.add_argument_group('Misc options')
54-
misc_group.add_argument('--tempdir', action="store",
55-
help="Specify where you want the temporary stuff to go Default: $TMPDIR")
56-
misc_group.add_argument("--no-temp", action="store_true", help="Output all intermediate files")
5753
misc_group.add_argument("--verbose", action="store_true", help="Print lots of stuff to screen")
5854
misc_group.add_argument("--dry-run", dest="dry_run", action="store_true", help="Quit after checking constellations and variants are AOK")
5955
misc_group.add_argument('-t', '--threads', action='store', dest="threads", type=int, help="Number of threads")
@@ -161,7 +157,20 @@ def main(sysargs = sys.argv[1:]):
161157

162158
subparser_define.set_defaults(func=scorpio.subcommands.define.run)
163159

160+
# _______________________________ list __________________________________#
161+
162+
subparser_list = subparsers.add_parser(
163+
"list",
164+
parents=[common],
165+
help="Lists the constellations installed that would be typed/classified with the provided input options",
166+
)
167+
subparser_list.add_argument(
168+
'--reference-json', dest="reference_json", help='JSON file containing keys "genome" with reference sequence '
169+
'and "proteins", "features" or "genes" with features of interest'
170+
' and their coordinates'
171+
)
164172

173+
subparser_list.set_defaults(func=scorpio.subcommands.list.run)
165174
# _________________________________________________________________________#
166175

167176
args = parser.parse_args()
@@ -194,6 +203,20 @@ def main(sysargs = sys.argv[1:]):
194203
if not os.path.exists(args.prefix):
195204
os.mkdir(args.prefix)
196205

206+
## format logging
207+
format = '%(levelname)s: %(message)s'
208+
if args.verbose:
209+
level = logging.DEBUG
210+
elif args.command == 'list':
211+
level = logging.ERROR
212+
else:
213+
level = logging.INFO
214+
215+
if args.log_file:
216+
logging.basicConfig(filename=args.log_file, level=level, format=format)
217+
else:
218+
logging.basicConfig(level=level, format=format)
219+
197220
if not args.reference_json or not args.constellations:
198221
constellations_dir = constellations.__path__[0]
199222
reference_json = args.reference_json
@@ -202,7 +225,7 @@ def main(sysargs = sys.argv[1:]):
202225
constellation_subdirs = ["data", "definitions"]
203226
for dir in constellation_subdirs:
204227
data_dir = os.path.join(constellations_dir, dir)
205-
print(f"Looking in {data_dir} for data files...")
228+
logging.info(f"Looking in {data_dir} for data files...")
206229
for r, d, f in os.walk(data_dir):
207230
for fn in f:
208231
if fn == "SARS-CoV-2.json":
@@ -214,22 +237,21 @@ def main(sysargs = sys.argv[1:]):
214237
elif not args.pangolin and fn.endswith(".csv"):
215238
list_constellation_files.append(os.path.join(r, fn))
216239
if (not args.reference_json and reference_json == "") or (not args.constellations and list_constellation_files == []):
217-
print(sfunk.cyan(
218-
"""Please either provide a reference JSON and constellation definition file, or check your environment
219-
to make sure that constellations has been properly installed."""))
240+
logging.warning("""Please either provide a reference JSON and constellation definition file, or check your environment
241+
to make sure that constellations has been properly installed.""")
220242
sys.exit(-1)
221243
if not args.reference_json:
222244
args.reference_json = reference_json
223-
print("Found reference %s" %args.reference_json)
245+
logging.info("Found reference %s" %args.reference_json)
224246
if not args.constellations:
225247
args.constellations = list_constellation_files
226-
print("Found constellations:")
248+
logging.info("Found constellations:")
227249
for c in args.constellations:
228-
print(c)
229-
print("\n")
250+
logging.info(c)
251+
logging.info("\n")
230252

231-
if "call_all" in args and args.call_all and args.long:
232-
print("Cannot provide long format summary file with multiple calls, ignoring --long\n")
253+
if "call_all" in args and args.call_all and args.long and args.verbose:
254+
logging.info("Cannot provide long format summary file with multiple calls, ignoring --long\n")
233255

234256
if "append_genotypes" in args and args.append_genotypes and not args.ref_char:
235257
args.ref_char = None

scorpio/scripts/extract_definitions.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import operator
66
import argparse
77
import json
8+
import logging
89
from Bio.Seq import Seq
910
from operator import itemgetter
1011

@@ -65,12 +66,12 @@ def get_group_dict(in_variants, group_column, index_column, subset):
6566
if subset and row[group_column] not in subset:
6667
continue
6768
if row[index_column] in group_dict:
68-
print("%s is a duplicate in group CSV, keeping first")
69+
logging.warning("%s is a duplicate in group CSV, keeping first")
6970
else:
7071
group_dict[row[index_column]] = row[group_column]
7172
groups.add(row[group_column])
7273

73-
print("Found", len(groups), "groups")
74+
logging.info("Found %d groups" % len(groups))
7475

7576
return group_dict
7677

@@ -245,14 +246,14 @@ def extract_definitions(in_variants, in_groups, group_column, index_column, refe
245246
with open(in_variants, 'r', newline = '') as csv_in:
246247
reader = csv.DictReader(csv_in, delimiter=",", quotechar='\"', dialect = "unix")
247248
if index_column not in reader.fieldnames:
248-
print("Index column %s not found in %s" % (index_column, in_variants))
249+
logging.warning("Index column %s not found in %s" % (index_column, in_variants))
249250

250251
if "nucleotide_variants" in reader.fieldnames:
251252
var_column = "nucleotide_variants"
252253
elif "nucleotide_mutations" in reader.fieldnames:
253254
var_column = "nucleotide_mutations"
254255
else:
255-
print("No nucleotide_variants or nucleotide_mutations columns found")
256+
logging.warning("No nucleotide_variants or nucleotide_mutations columns found")
256257
sys.exit(-1)
257258

258259
for row in reader:
@@ -270,7 +271,7 @@ def extract_definitions(in_variants, in_groups, group_column, index_column, refe
270271
for lineage in outgroup_dict[group_dict[index]]:
271272
update_var_dict(outgroup_var_dict, lineage, variants)
272273
else:
273-
print("Index column or variants column not in row", row)
274+
logging.warning("Index column or variants column not in row", row)
274275

275276
#print("outgroup_var_dict", outgroup_var_dict)
276277
#print("var_dict", var_dict)

scorpio/scripts/type_constellations.py

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import sys
88
import json
99
import re
10+
import logging
1011

1112
if sys.version_info[0] < 3:
1213
raise Exception("Python 3 or a more recent version is required.")
@@ -54,7 +55,7 @@ def load_feature_coordinates(reference_json):
5455
features_dict[name] = (start, end, json_dict[feature][item]["gene"])
5556
else:
5657
features_dict[name] = (start, end)
57-
print("Found reference feature %s with coordinates" % name, features_dict[name])
58+
logging.info("Found reference feature %s with coordinates" % name, features_dict[name])
5859
if len(features_dict) == 0:
5960
sys.stderr.write("No features (keys \"genes\", \"proteins\" or \"features\" ) provided in JSON %s " %
6061
reference_json)
@@ -148,7 +149,7 @@ def variant_to_variant_record(l, refseq, features_dict):
148149
else:
149150
info["ref_start"] = info["pos"]
150151
info["name"] = l
151-
print("Warning: found variant of type insertion, which will be ignored during typing")
152+
logging.warning("Warning: found variant of type insertion, which will be ignored during typing")
152153
elif lsplit[0] in ["snp", "nuc"]:
153154
info = {"name": l, "type": "snp"}
154155
m = re.match(r'(?P<ref_allele>[ACGTUN]+)(?P<ref_start>\d+)(?P<alt_allele>[AGCTUN]*)', l[4:])
@@ -174,7 +175,7 @@ def variant_to_variant_record(l, refseq, features_dict):
174175
m = re.match(r'[aa:]*(?P<cds>\w+):(?P<ref_allele>[a-zA-Z-*]+)(?P<aa_pos>\d+)(?P<alt_allele>[a-zA-Z-*]*)', l)
175176
if not m:
176177
sys.stderr.write("Warning: couldn't parse the following string: %s - ignoring\n" % l)
177-
# sys.exit(1)
178+
sys.exit(1)
178179
return info
179180

180181
info = m.groupdict()
@@ -255,7 +256,8 @@ def parse_json_in(refseq, features_dict, variants_file, constellation_names=None
255256
if constellation_names and name not in constellation_names:
256257
return variant_list, name, rules, mrca_lineage, incompatible_lineage_calls
257258

258-
print("\nParsing constellation JSON file %s" % variants_file)
259+
logging.info("\n")
260+
logging.info("Parsing constellation JSON file %s" % variants_file)
259261

260262
if "sites" in json_dict:
261263
for site in json_dict["sites"]:
@@ -287,7 +289,8 @@ def parse_csv_in(refseq, features_dict, variants_file, constellation_names=None)
287289
if constellation_names and name not in constellation_names:
288290
return variant_list, name, compulsory
289291

290-
print("\nParsing constellation CSV file %s" % variants_file)
292+
logging.info("\n")
293+
logging.info("Parsing constellation CSV file %s" % variants_file)
291294

292295
csv_in = open("%s" % variants_file, 'r')
293296
reader = csv.DictReader(csv_in, delimiter=",")
@@ -299,7 +302,7 @@ def parse_csv_in(refseq, features_dict, variants_file, constellation_names=None)
299302

300303
if "id" not in reader.fieldnames:
301304
csv_in.close()
302-
print("Warning: CSV headerline does not contain 'id': %s - ignoring" % reader.fieldnames)
305+
logging.info("Warning: CSV headerline does not contain 'id': %s - ignoring" % reader.fieldnames)
303306
return variant_list, name, compulsory
304307

305308
for row in reader:
@@ -331,8 +334,8 @@ def parse_textfile_in(refseq, features_dict, variants_file, constellation_names=
331334
name = parse_name_from_file(variants_file)
332335
if constellation_names and name not in constellation_names:
333336
return variant_list, name
334-
335-
print("\nParsing constellation text file %s" % variants_file)
337+
logging.info("\n")
338+
logging.info("Parsing constellation text file %s" % variants_file)
336339

337340
with open("%s" % variants_file, "r") as f:
338341
for line in f:
@@ -380,7 +383,8 @@ def parse_variants_in(refseq, features_dict, variants_file, constellation_names=
380383

381384

382385
def parse_mutations_in(mutations_file):
383-
print("\nParsing mutations file %s" % mutations_file)
386+
logging.info("\n")
387+
logging.info("Parsing mutations file %s" % mutations_file)
384388

385389
mutations_list = []
386390
with open("%s" % mutations_file, "r") as f:
@@ -390,7 +394,7 @@ def parse_mutations_in(mutations_file):
390394
if l.startswith('id'):
391395
continue
392396
mutations_list.append(l)
393-
print("Found %d mutations" % len(mutations_list))
397+
logging.info("Found %d mutations" % len(mutations_list))
394398
return mutations_list
395399

396400

@@ -472,7 +476,7 @@ def call_variant_from_fasta(record_seq, var, ins_char="?", oth_char=None, codon=
472476
if len(query) % 3 != 0:
473477
query = query_allele.replace("-","N")
474478
if len(query) % 3 != 0:
475-
print("Warning: while typing variant %s (before,ref,after) = (%s,%s,%s) found sequence with query allele %s treated as %s. Handling by adding Ns which will result in ambiguous calls" %(var["name"], var["before"], var["ref_allele"], var["after"], query_allele, query))
479+
logging.warning("Warning: while typing variant %s (before,ref,after) = (%s,%s,%s) found sequence with query allele %s treated as %s. Handling by adding Ns which will result in ambiguous calls" %(var["name"], var["before"], var["ref_allele"], var["after"], query_allele, query))
476480
query_allele = query
477481
while len(query_allele) % 3 != 0:
478482
query_allele += "N"
@@ -548,7 +552,7 @@ def counts_follow_rules(counts, rules):
548552
else:
549553
counts["rules"] += 1
550554
else:
551-
print("Warning: Ignoring rule %s:%s" % (rule, str(rules[rule])))
555+
logging.warning("Warning: Ignoring rule %s:%s" % (rule, str(rules[rule])))
552556
return is_rule_follower
553557

554558
def count_and_classify(record_seq, variant_list, rules):
@@ -607,10 +611,10 @@ def type_constellations(in_fasta, list_constellation_files, constellation_names,
607611
continue
608612
if len(variants) > 0:
609613
constellation_dict[constellation] = variants
610-
print("Found file %s for constellation %s containing %i variants" % (
614+
logging.info("Found file %s for constellation %s containing %i variants" % (
611615
constellation_file, constellation, len([v["name"] for v in variants])))
612616
else:
613-
print("Warning: %s is not a valid constellation file - ignoring" % constellation_file)
617+
logging.warning("Warning: %s is not a valid constellation file - ignoring" % constellation_file)
614618
if mutations_list:
615619
new_mutations_list = []
616620
for entry in mutations_list:
@@ -619,7 +623,7 @@ def type_constellations(in_fasta, list_constellation_files, constellation_names,
619623
else:
620624
new_mutations_list.append(entry)
621625
mutations_list = new_mutations_list
622-
print("Typing provided mutations %s" % ",".join(mutations_list))
626+
logging.info("Typing provided mutations %s" % ",".join(mutations_list))
623627
mutation_variants = parse_mutations(reference_seq, features_dict, mutations_list)
624628
if len(constellation_dict) == 1 and "mutations" not in constellation_dict:
625629
constellation = list(constellation_dict)[0]
@@ -697,19 +701,19 @@ def classify_constellations(in_fasta, list_constellation_files, constellation_na
697701
if constellation_names and constellation not in constellation_names:
698702
continue
699703
if not rules:
700-
print("Warning: No rules provided to classify %s - ignoring" % constellation)
704+
logging.warning("Warning: No rules provided to classify %s - ignoring" % constellation)
701705
continue
702706
else:
703707
rule_dict[constellation] = rules
704708
if len(variants) > 0:
705709
constellation_dict[constellation] = variants
706-
print("Found file %s for constellation %s containing %i variants" % (
710+
logging.info("Found file %s for constellation %s containing %i variants" % (
707711
constellation_file, constellation, len([v["name"] for v in variants])))
708-
print("Rules", rule_dict[constellation])
712+
logging.info("Rules %s" %rule_dict[constellation])
709713
mrca_lineage_dict[constellation] = mrca_lineage
710714
incompatible_dict[constellation] = incompatible_lineage_calls
711715
else:
712-
print("Warning: %s is not a valid constellation file - ignoring" % constellation_file)
716+
logging.warning("Warning: %s is not a valid constellation file - ignoring" % constellation_file)
713717

714718
if mutations_list:
715719
new_mutations_list = []
@@ -803,6 +807,23 @@ def classify_constellations(in_fasta, list_constellation_files, constellation_na
803807
counts_out[constellation].close()
804808

805809

810+
def list_constellations(list_constellation_files, constellation_names, reference_json, label=None):
811+
812+
reference_seq, features_dict = load_feature_coordinates(reference_json)
813+
814+
list_of_constellations = []
815+
for constellation_file in list_constellation_files:
816+
constellation, variants, ignore, mrca_lineage, incompatible_lineage_calls = parse_variants_in(reference_seq, features_dict, constellation_file, constellation_names, label=label)
817+
if not constellation:
818+
continue
819+
if constellation_names and constellation not in constellation_names:
820+
continue
821+
if len(variants) > 0 and mrca_lineage:
822+
list_of_constellations.append(mrca_lineage)
823+
elif len(variants) > 0:
824+
list_of_constellations.append(constellation)
825+
print("\n".join(list_of_constellations))
826+
806827
def parse_args():
807828
parser = argparse.ArgumentParser(description="""Type an alignment at specific sites and classify with a barcode.""",
808829
formatter_class=argparse.RawTextHelpFormatter)

scorpio/subcommands/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/env python3
22

3-
__all__ = ["classify", "haplotype", "report", "define"]
3+
__all__ = ["classify", "haplotype", "report", "define", "list"]
44

55
from scorpio.subcommands import *

scorpio/subcommands/list.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/env python3
2+
3+
from scorpio.scripts.type_constellations import *
4+
5+
6+
def run(options):
7+
list_constellations(options.constellations,
8+
options.names,
9+
options.reference_json,
10+
options.label)

0 commit comments

Comments
 (0)