7
7
import sys
8
8
import json
9
9
import re
10
+ import logging
10
11
11
12
if sys .version_info [0 ] < 3 :
12
13
raise Exception ("Python 3 or a more recent version is required." )
@@ -54,7 +55,7 @@ def load_feature_coordinates(reference_json):
54
55
features_dict [name ] = (start , end , json_dict [feature ][item ]["gene" ])
55
56
else :
56
57
features_dict [name ] = (start , end )
57
- print ("Found reference feature %s with coordinates" % name , features_dict [name ])
58
+ logging . info ("Found reference feature %s with coordinates" % name , features_dict [name ])
58
59
if len (features_dict ) == 0 :
59
60
sys .stderr .write ("No features (keys \" genes\" , \" proteins\" or \" features\" ) provided in JSON %s " %
60
61
reference_json )
@@ -148,7 +149,7 @@ def variant_to_variant_record(l, refseq, features_dict):
148
149
else :
149
150
info ["ref_start" ] = info ["pos" ]
150
151
info ["name" ] = l
151
- print ("Warning: found variant of type insertion, which will be ignored during typing" )
152
+ logging . warning ("Warning: found variant of type insertion, which will be ignored during typing" )
152
153
elif lsplit [0 ] in ["snp" , "nuc" ]:
153
154
info = {"name" : l , "type" : "snp" }
154
155
m = re .match (r'(?P<ref_allele>[ACGTUN]+)(?P<ref_start>\d+)(?P<alt_allele>[AGCTUN]*)' , l [4 :])
@@ -174,7 +175,7 @@ def variant_to_variant_record(l, refseq, features_dict):
174
175
m = re .match (r'[aa:]*(?P<cds>\w+):(?P<ref_allele>[a-zA-Z-*]+)(?P<aa_pos>\d+)(?P<alt_allele>[a-zA-Z-*]*)' , l )
175
176
if not m :
176
177
sys .stderr .write ("Warning: couldn't parse the following string: %s - ignoring\n " % l )
177
- # sys.exit(1)
178
+ sys .exit (1 )
178
179
return info
179
180
180
181
info = m .groupdict ()
@@ -255,7 +256,8 @@ def parse_json_in(refseq, features_dict, variants_file, constellation_names=None
255
256
if constellation_names and name not in constellation_names :
256
257
return variant_list , name , rules , mrca_lineage , incompatible_lineage_calls
257
258
258
- print ("\n Parsing constellation JSON file %s" % variants_file )
259
+ logging .info ("\n " )
260
+ logging .info ("Parsing constellation JSON file %s" % variants_file )
259
261
260
262
if "sites" in json_dict :
261
263
for site in json_dict ["sites" ]:
@@ -287,7 +289,8 @@ def parse_csv_in(refseq, features_dict, variants_file, constellation_names=None)
287
289
if constellation_names and name not in constellation_names :
288
290
return variant_list , name , compulsory
289
291
290
- print ("\n Parsing constellation CSV file %s" % variants_file )
292
+ logging .info ("\n " )
293
+ logging .info ("Parsing constellation CSV file %s" % variants_file )
291
294
292
295
csv_in = open ("%s" % variants_file , 'r' )
293
296
reader = csv .DictReader (csv_in , delimiter = "," )
@@ -299,7 +302,7 @@ def parse_csv_in(refseq, features_dict, variants_file, constellation_names=None)
299
302
300
303
if "id" not in reader .fieldnames :
301
304
csv_in .close ()
302
- print ("Warning: CSV headerline does not contain 'id': %s - ignoring" % reader .fieldnames )
305
+ logging . info ("Warning: CSV headerline does not contain 'id': %s - ignoring" % reader .fieldnames )
303
306
return variant_list , name , compulsory
304
307
305
308
for row in reader :
@@ -331,8 +334,8 @@ def parse_textfile_in(refseq, features_dict, variants_file, constellation_names=
331
334
name = parse_name_from_file (variants_file )
332
335
if constellation_names and name not in constellation_names :
333
336
return variant_list , name
334
-
335
- print ( " \n Parsing constellation text file %s" % variants_file )
337
+ logging . info ( " \n " )
338
+ logging . info ( "Parsing constellation text file %s" % variants_file )
336
339
337
340
with open ("%s" % variants_file , "r" ) as f :
338
341
for line in f :
@@ -380,7 +383,8 @@ def parse_variants_in(refseq, features_dict, variants_file, constellation_names=
380
383
381
384
382
385
def parse_mutations_in (mutations_file ):
383
- print ("\n Parsing mutations file %s" % mutations_file )
386
+ logging .info ("\n " )
387
+ logging .info ("Parsing mutations file %s" % mutations_file )
384
388
385
389
mutations_list = []
386
390
with open ("%s" % mutations_file , "r" ) as f :
@@ -390,7 +394,7 @@ def parse_mutations_in(mutations_file):
390
394
if l .startswith ('id' ):
391
395
continue
392
396
mutations_list .append (l )
393
- print ("Found %d mutations" % len (mutations_list ))
397
+ logging . info ("Found %d mutations" % len (mutations_list ))
394
398
return mutations_list
395
399
396
400
@@ -472,7 +476,7 @@ def call_variant_from_fasta(record_seq, var, ins_char="?", oth_char=None, codon=
472
476
if len (query ) % 3 != 0 :
473
477
query = query_allele .replace ("-" ,"N" )
474
478
if len (query ) % 3 != 0 :
475
- print ("Warning: while typing variant %s (before,ref,after) = (%s,%s,%s) found sequence with query allele %s treated as %s. Handling by adding Ns which will result in ambiguous calls" % (var ["name" ], var ["before" ], var ["ref_allele" ], var ["after" ], query_allele , query ))
479
+ logging . warning ("Warning: while typing variant %s (before,ref,after) = (%s,%s,%s) found sequence with query allele %s treated as %s. Handling by adding Ns which will result in ambiguous calls" % (var ["name" ], var ["before" ], var ["ref_allele" ], var ["after" ], query_allele , query ))
476
480
query_allele = query
477
481
while len (query_allele ) % 3 != 0 :
478
482
query_allele += "N"
@@ -548,7 +552,7 @@ def counts_follow_rules(counts, rules):
548
552
else :
549
553
counts ["rules" ] += 1
550
554
else :
551
- print ("Warning: Ignoring rule %s:%s" % (rule , str (rules [rule ])))
555
+ logging . warning ("Warning: Ignoring rule %s:%s" % (rule , str (rules [rule ])))
552
556
return is_rule_follower
553
557
554
558
def count_and_classify (record_seq , variant_list , rules ):
@@ -607,10 +611,10 @@ def type_constellations(in_fasta, list_constellation_files, constellation_names,
607
611
continue
608
612
if len (variants ) > 0 :
609
613
constellation_dict [constellation ] = variants
610
- print ("Found file %s for constellation %s containing %i variants" % (
614
+ logging . info ("Found file %s for constellation %s containing %i variants" % (
611
615
constellation_file , constellation , len ([v ["name" ] for v in variants ])))
612
616
else :
613
- print ("Warning: %s is not a valid constellation file - ignoring" % constellation_file )
617
+ logging . warning ("Warning: %s is not a valid constellation file - ignoring" % constellation_file )
614
618
if mutations_list :
615
619
new_mutations_list = []
616
620
for entry in mutations_list :
@@ -619,7 +623,7 @@ def type_constellations(in_fasta, list_constellation_files, constellation_names,
619
623
else :
620
624
new_mutations_list .append (entry )
621
625
mutations_list = new_mutations_list
622
- print ("Typing provided mutations %s" % "," .join (mutations_list ))
626
+ logging . info ("Typing provided mutations %s" % "," .join (mutations_list ))
623
627
mutation_variants = parse_mutations (reference_seq , features_dict , mutations_list )
624
628
if len (constellation_dict ) == 1 and "mutations" not in constellation_dict :
625
629
constellation = list (constellation_dict )[0 ]
@@ -697,19 +701,19 @@ def classify_constellations(in_fasta, list_constellation_files, constellation_na
697
701
if constellation_names and constellation not in constellation_names :
698
702
continue
699
703
if not rules :
700
- print ("Warning: No rules provided to classify %s - ignoring" % constellation )
704
+ logging . warning ("Warning: No rules provided to classify %s - ignoring" % constellation )
701
705
continue
702
706
else :
703
707
rule_dict [constellation ] = rules
704
708
if len (variants ) > 0 :
705
709
constellation_dict [constellation ] = variants
706
- print ("Found file %s for constellation %s containing %i variants" % (
710
+ logging . info ("Found file %s for constellation %s containing %i variants" % (
707
711
constellation_file , constellation , len ([v ["name" ] for v in variants ])))
708
- print ("Rules" , rule_dict [constellation ])
712
+ logging . info ("Rules %s" % rule_dict [constellation ])
709
713
mrca_lineage_dict [constellation ] = mrca_lineage
710
714
incompatible_dict [constellation ] = incompatible_lineage_calls
711
715
else :
712
- print ("Warning: %s is not a valid constellation file - ignoring" % constellation_file )
716
+ logging . warning ("Warning: %s is not a valid constellation file - ignoring" % constellation_file )
713
717
714
718
if mutations_list :
715
719
new_mutations_list = []
@@ -803,6 +807,23 @@ def classify_constellations(in_fasta, list_constellation_files, constellation_na
803
807
counts_out [constellation ].close ()
804
808
805
809
810
+ def list_constellations (list_constellation_files , constellation_names , reference_json , label = None ):
811
+
812
+ reference_seq , features_dict = load_feature_coordinates (reference_json )
813
+
814
+ list_of_constellations = []
815
+ for constellation_file in list_constellation_files :
816
+ constellation , variants , ignore , mrca_lineage , incompatible_lineage_calls = parse_variants_in (reference_seq , features_dict , constellation_file , constellation_names , label = label )
817
+ if not constellation :
818
+ continue
819
+ if constellation_names and constellation not in constellation_names :
820
+ continue
821
+ if len (variants ) > 0 and mrca_lineage :
822
+ list_of_constellations .append (mrca_lineage )
823
+ elif len (variants ) > 0 :
824
+ list_of_constellations .append (constellation )
825
+ print ("\n " .join (list_of_constellations ))
826
+
806
827
def parse_args ():
807
828
parser = argparse .ArgumentParser (description = """Type an alignment at specific sites and classify with a barcode.""" ,
808
829
formatter_class = argparse .RawTextHelpFormatter )
0 commit comments