@@ -295,7 +295,10 @@ def parse_json_in(refseq, features_dict, variants_file, constellation_names=None
295
295
variant_list .append (record )
296
296
297
297
if "rules" in json_dict :
298
- rules = json_dict ["rules" ]
298
+ if type (json_dict ["rules" ]) == dict and "default" in json_dict ["rules" ]:
299
+ rules = json_dict ["rules" ]
300
+ else :
301
+ rules = {"default" : json_dict ["rules" ]}
299
302
300
303
in_json .close ()
301
304
sorted_variants = sorted (variant_list , key = lambda x : int (x ["ref_start" ]))
@@ -346,9 +349,9 @@ def parse_csv_in(refseq, features_dict, variants_file, constellation_names=None,
346
349
csv_in .close ()
347
350
rules = None
348
351
if len (compulsory ) > 0 :
349
- rules = {}
352
+ rules = {"default" : {} }
350
353
for var in compulsory :
351
- rules [var ] = "alt"
354
+ rules ["default" ][ var ] = "alt"
352
355
sorted_variants = sorted (variant_list , key = lambda x : int (x ["ref_start" ]))
353
356
return sorted_variants , name , rules
354
357
@@ -570,24 +573,24 @@ def var_follows_rules(call, rule):
570
573
else :
571
574
return call == rule_call
572
575
573
- def counts_follow_rules (counts , rules ):
576
+ def counts_follow_rules (counts , rules , key ):
574
577
# rules allowed include "max_ref", "min_alt", "min_snp_alt"
575
578
is_rule_follower = True
576
579
notes = []
577
- for rule in rules :
580
+ for rule in rules [ key ] :
578
581
if ":" in rule :
579
582
continue
580
583
elif str (rule ).startswith ("min" ) or str (rule ).startswith ("max" ):
581
584
rule_parts = rule .split ("_" )
582
585
if len (rule_parts ) <= 1 :
583
586
continue
584
587
elif len (rule_parts ) == 2 :
585
- if rule_parts [0 ] == "min" and counts [rule_parts [1 ]] < rules [rule ]:
588
+ if rule_parts [0 ] == "min" and counts [rule_parts [1 ]] < rules [key ][ rule ]:
586
589
is_rule_follower = False
587
- elif rule_parts [0 ] == "max" and counts [rule_parts [1 ]] > rules [rule ]:
590
+ elif rule_parts [0 ] == "max" and counts [rule_parts [1 ]] > rules [key ][ rule ]:
588
591
is_rule_follower = False
589
592
else :
590
- counts ["rules" ] += 1
593
+ counts ["rules" ][ key ] += 1
591
594
elif len (rule_parts ) == 3 :
592
595
part = None
593
596
if rule_parts [1 ] in ["substitution" , "snp" ]:
@@ -596,24 +599,27 @@ def counts_follow_rules(counts, rules):
596
599
part = "indel"
597
600
if not part :
598
601
is_rule_follower = False
599
- elif rule_parts [0 ] == "min" and counts [part ][rule_parts [2 ]] < rules [rule ]:
602
+ elif rule_parts [0 ] == "min" and counts [part ][rule_parts [2 ]] < rules [key ][ rule ]:
600
603
is_rule_follower = False
601
- notes .append ("%s_%s_count=%i is less than %i" % (part , rule_parts [2 ], counts [part ][rule_parts [2 ]], rules [rule ]))
602
- elif rule_parts [0 ] == "max" and counts [part ][rule_parts [2 ]] > rules [rule ]:
604
+ notes .append ("%s_%s_count=%i is less than %i" % (part , rule_parts [2 ], counts [part ][rule_parts [2 ]], rules [key ][ rule ]))
605
+ elif rule_parts [0 ] == "max" and counts [part ][rule_parts [2 ]] > rules [key ][ rule ]:
603
606
is_rule_follower = False
604
- notes .append ("%s_%s_count=%i is more than %i" % (part , rule_parts [2 ], counts [part ][rule_parts [2 ]], rules [rule ]))
607
+ notes .append ("%s_%s_count=%i is more than %i" % (part , rule_parts [2 ], counts [part ][rule_parts [2 ]], rules [key ][ rule ]))
605
608
else :
606
- counts ["rules" ] += 1
609
+ counts ["rules" ][ key ] += 1
607
610
else :
608
- logging .warning ("Warning: Ignoring rule %s:%s" % (rule , str (rules [rule ])))
611
+ logging .warning ("Warning: Ignoring rule %s:%s" % (rule , str (rules [key ][ rule ])))
609
612
return is_rule_follower , ";" .join (notes )
610
613
611
614
def count_and_classify (record_seq , variant_list , rules ):
612
615
assert rules is not None
613
- counts = {'ref' : 0 , 'alt' : 0 , 'ambig' : 0 , 'oth' : 0 , 'rules' : 0 ,
616
+ counts = {'ref' : 0 , 'alt' : 0 , 'ambig' : 0 , 'oth' : 0 , 'rules' : {} ,
614
617
'substitution' : {'ref' : 0 , 'alt' : 0 , 'ambig' : 0 , 'oth' : 0 },
615
618
'indel' : {'ref' : 0 , 'alt' : 0 , 'ambig' : 0 , 'oth' : 0 }}
616
- is_rule_follower = True
619
+ is_rule_follower_dict = {}
620
+ for key in rules :
621
+ is_rule_follower_dict [key ] = True
622
+ counts ["rules" ][key ] = 0
617
623
618
624
for var in variant_list :
619
625
call , query_allele = call_variant_from_fasta (record_seq , var )
@@ -623,20 +629,27 @@ def count_and_classify(record_seq, variant_list, rules):
623
629
counts ["substitution" ][call ] += 1
624
630
elif var ['type' ] in ["ins" , "del" ]:
625
631
counts ["indel" ][call ] += 1
626
- if var ["name" ] in rules :
627
- if var_follows_rules (call , rules [var ["name" ]]):
628
- counts ['rules' ] += 1
629
- elif is_rule_follower :
630
- is_rule_follower = False
632
+ for key in rules :
633
+ if var ["name" ] in rules [key ]:
634
+ if var_follows_rules (call , rules [key ][var ["name" ]]):
635
+ counts ['rules' ][key ] += 1
636
+ elif is_rule_follower_dict [key ]:
637
+ is_rule_follower_dict [key ] = False
631
638
632
639
counts ['support' ] = round (counts ['alt' ]/ float (counts ['alt' ] + counts ['ref' ] + counts ['ambig' ] + counts ['oth' ]),4 )
633
640
counts ['conflict' ] = round (counts ['ref' ] / float (counts ['alt' ] + counts ['ref' ] + counts ['ambig' ] + counts ['oth' ]),4 )
634
641
635
- if not is_rule_follower :
636
- return counts , False , ""
637
- else :
638
- call , note = counts_follow_rules (counts , rules )
639
- return counts , call , note
642
+ for key in rules :
643
+ if not is_rule_follower_dict [key ]:
644
+ continue
645
+ else :
646
+ call , note = counts_follow_rules (counts , rules , key )
647
+ if call :
648
+ counts ["rules" ] = counts ["rules" ][key ]
649
+ call = key
650
+ return counts , call , note
651
+ counts ["rules" ] = counts ["rules" ]["default" ]
652
+ return counts , False , ""
640
653
641
654
642
655
def generate_barcode (record_seq , variant_list , ref_char = None , ins_char = "?" , oth_char = "X" ,constellation_count_dict = None ):
@@ -920,7 +933,15 @@ def combine_counts_call_notes(counts1, call1, note1, counts2, call2, note2):
920
933
counts [key ] = counts1 [key ] + counts2 [key ]
921
934
counts ['support' ] = round (counts ['alt' ] / float (counts ['alt' ] + counts ['ref' ] + counts ['ambig' ] + counts ['oth' ]), 4 )
922
935
counts ['conflict' ] = round (counts ['ref' ] / float (counts ['alt' ] + counts ['ref' ] + counts ['ambig' ] + counts ['oth' ]), 4 )
923
- call = call1 and call2
936
+ if not call1 or not call2 :
937
+ call = False
938
+ elif call1 == call2 :
939
+ call = call1
940
+ elif call1 == "default" :
941
+ call = call2
942
+ else :
943
+ call = call1
944
+
924
945
note = note1
925
946
if note != "" and note2 != "" :
926
947
note += ";" + note2
@@ -989,10 +1010,12 @@ def classify_constellations(in_fasta, list_constellation_files, constellation_na
989
1010
best_support = 0
990
1011
best_conflict = 1
991
1012
best_counts = None
1013
+ best_call = False
992
1014
scores = {}
993
1015
children = {}
994
1016
for constellation in constellation_dict :
995
1017
constellation_name = name_dict [constellation ]
1018
+ logging .debug ("Consider constellation %s" % constellation_name )
996
1019
parents = []
997
1020
if not constellation_name :
998
1021
continue
@@ -1015,20 +1038,25 @@ def classify_constellations(in_fasta, list_constellation_files, constellation_na
1015
1038
children [parent ].append (constellation )
1016
1039
1017
1040
if call :
1041
+ logging .debug ("Have call for %s" % constellation_name )
1018
1042
if call_all :
1043
+ if call != "default" :
1044
+ constellation_name = "%s %s" % (call , constellation_name )
1019
1045
lineages .append (constellation_name )
1020
1046
names .append (constellation )
1021
1047
elif constellation in children and best_constellation in children [constellation ]:
1022
- continue
1048
+ logging . debug ( "Ignore as parent of best constellation" )
1023
1049
elif (not best_constellation ) \
1024
1050
or (counts ['support' ] > best_support ) \
1025
1051
or (counts ['support' ] == best_support and counts ['conflict' ] < best_conflict )\
1026
1052
or (counts ['support' ] == best_support and counts ['conflict' ] == best_conflict and counts ['rules' ] > best_counts ["rules" ])\
1027
1053
or (best_constellation in parents ):
1028
1054
best_constellation = constellation
1055
+ logging .debug ("Set best constellation %s" % best_constellation )
1029
1056
best_support = counts ['support' ]
1030
1057
best_conflict = counts ['conflict' ]
1031
1058
best_counts = counts
1059
+ best_call = call
1032
1060
1033
1061
if interspersion :
1034
1062
if counts ["alt" ] > 1 :
@@ -1042,7 +1070,11 @@ def classify_constellations(in_fasta, list_constellation_files, constellation_na
1042
1070
counts ['oth' ], counts ['rules' ], counts ['support' ],
1043
1071
counts ['conflict' ], call , constellation , note ))
1044
1072
if not call_all and best_constellation :
1045
- lineages .append (name_dict [best_constellation ])
1073
+ if best_call != "default" :
1074
+ best_constellation_name = "%s %s" % (best_call , name_dict [best_constellation ])
1075
+ else :
1076
+ best_constellation_name = name_dict [best_constellation ]
1077
+ lineages .append (best_constellation_name )
1046
1078
names .append (best_constellation )
1047
1079
1048
1080
out_entries = [record .id , "|" .join (lineages ), "|" .join ([mrca_lineage_dict [n ] for n in names ])]
0 commit comments