Skip to content

Commit b702621

Browse files
authored
Merge pull request #20 from cov-lineages/dev
merge in dev
2 parents 9c03472 + 100146b commit b702621

File tree

6 files changed

+41
-14
lines changed

6 files changed

+41
-14
lines changed

scorpio/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
_program = "scorpio"
2-
__version__ = "0.3.5"
2+
__version__ = "0.3.7"

scorpio/__main__.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,13 +99,17 @@ def main(sysargs = sys.argv[1:]):
9999
' and their coordinates'
100100
)
101101
subparser_haplotype.add_argument(
102-
"--ref-char", dest="ref_char", default='-', required=False,
102+
"--ref-char", dest="ref_char", required=False,
103103
help="Symbol to use to represent reference allele"
104104
)
105105
subparser_haplotype.add_argument(
106106
"--output-counts", dest="output_counts", action="store_true",
107107
help="Save a file per constellation of ref, alt and other counts"
108108
)
109+
subparser_haplotype.add_argument(
110+
"--append-genotypes", dest="append_genotypes", action="store_true",
111+
help="Output a column per variant with the call"
112+
)
109113
subparser_haplotype.set_defaults(func=scorpio.subcommands.haplotype.run)
110114

111115
# _______________________________ report __________________________________#
@@ -224,6 +228,14 @@ def main(sysargs = sys.argv[1:]):
224228
if "call_all" in args and args.call_all and args.long:
225229
print("Cannot provide long format summary file with multiple calls, ignoring --long\n")
226230

231+
if "append_genotypes" in args and args.append_genotypes and not args.ref_char:
232+
args.ref_char = None
233+
elif "ref_char" in args and not args.ref_char:
234+
args.ref_char = '-'
235+
236+
if "append_genotypes" in args and args.append_genotypes and not args.output_counts:
237+
args.output_counts = True
238+
227239
"""
228240
Exit with help menu if no args supplied
229241
"""

scorpio/scripts/extract_definitions.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ def translate_if_possible(nuc_start, nuc_ref, nuc_alt, feature_dict, reference_s
124124
#print(query_seq[nuc_start-5: nuc_end+5])
125125

126126
for feature in feature_dict:
127+
if len(feature_dict[feature]) > 2:
128+
continue # ignore nsp definitions
127129
if feature_dict[feature][0] <= nuc_start <= feature_dict[feature][1]:
128130
start, end = nuc_start, nuc_end
129131
while (start - feature_dict[feature][0]) % 3 != 0:

scorpio/scripts/type_constellations.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -552,18 +552,20 @@ def generate_barcode(record_seq, variant_list, ref_char=None, ins_char="?", oth_
552552
counts['support'] = round(counts['alt'] / float(counts['alt'] + counts['ref'] + counts['ambig'] + counts['oth']), 4)
553553
counts['conflict'] = round(counts['ref'] / float(counts['alt'] + counts['ref'] + counts['ambig'] + counts['oth']), 4)
554554

555-
return ''.join(barcode_list), counts
555+
return barcode_list, counts
556556

557557

558558
def type_constellations(in_fasta, list_constellation_files, constellation_names, out_csv, reference_json, ref_char=None,
559-
output_counts=False, label=None):
559+
output_counts=False, label=None, append_genotypes=False):
560560
reference_seq, features_dict = load_feature_coordinates(reference_json)
561561

562562
constellation_dict = {}
563563
for constellation_file in list_constellation_files:
564564
constellation, variants, ignore, mrca_lineage, incompatible_lineage_calls = parse_variants_in(reference_seq, features_dict, constellation_file, label=label)
565565
if not constellation:
566566
continue
567+
if constellation_names and constellation not in constellation_names:
568+
continue
567569
if len(variants) > 0:
568570
constellation_dict[constellation] = variants
569571
print("Found file %s for constellation %s containing %i variants" % (
@@ -579,7 +581,10 @@ def type_constellations(in_fasta, list_constellation_files, constellation_names,
579581
for constellation in constellation_dict:
580582
clean_name = re.sub("[^a-zA-Z0-9_\-.]", "_", constellation)
581583
counts_out[constellation] = open("%s.%s_counts.csv" % (out_csv.replace(".csv", ""), clean_name), "w")
582-
counts_out[constellation].write("query,ref_count,alt_count,ambig_count,other_count,support,conflict\n")
584+
columns = ["query,ref_count,alt_count,ambig_count,other_count,support,conflict"]
585+
if append_genotypes:
586+
columns.extend([var["name"] for var in constellation_dict[constellation]])
587+
counts_out[constellation].write("%s\n" % ','.join(columns))
583588

584589
with open(in_fasta, "r") as f:
585590
for record in SeqIO.parse(f, "fasta"):
@@ -590,11 +595,14 @@ def type_constellations(in_fasta, list_constellation_files, constellation_names,
590595

591596
out_list = [record.id]
592597
for constellation in constellation_dict:
593-
barcode, counts = generate_barcode(record.seq, constellation_dict[constellation], ref_char)
598+
barcode_list, counts = generate_barcode(record.seq, constellation_dict[constellation], ref_char)
594599
if output_counts:
595-
counts_out[constellation].write("%s,%i,%i,%i,%i,%f,%f\n" % (record.id, counts['ref'], counts['alt'],
596-
counts['ambig'], counts['oth'], counts['support'], counts['conflict']))
597-
out_list.append(barcode)
600+
columns = ["%s,%i,%i,%i,%i,%f,%f" % (record.id, counts['ref'], counts['alt'],
601+
counts['ambig'], counts['oth'], counts['support'], counts['conflict'])]
602+
if append_genotypes:
603+
columns.extend(barcode_list)
604+
counts_out[constellation].write("%s\n" % ','.join(columns))
605+
out_list.append(''.join(barcode_list))
598606

599607
variants_out.write("%s\n" % ",".join(out_list))
600608

scorpio/subcommands/haplotype.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ def run(options):
1111
options.reference_json,
1212
options.ref_char,
1313
options.output_counts,
14-
options.label)
14+
options.label,
15+
options.append_genotypes)

scorpio/tests/type_constellations_test.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -276,20 +276,24 @@ def test_generate_barcode():
276276
{"ref": 0, "alt": 1, "ambig": 0, "oth": 5, "support": 0.1667, "conflict": 0.0}]
277277

278278
for i in range(len(seqs)):
279-
barcode, counts = generate_barcode(seqs[i], variants, ref_char="-", ins_char="?", oth_char="X")
279+
barcode_list, counts = generate_barcode(seqs[i], variants, ref_char="-", ins_char="?", oth_char="X")
280+
barcode = ''.join(barcode_list)
280281
print(i, barcode, counts)
281282
assert barcode == expect_barcode_dash[i]
282283
assert counts == expect_counts[i]
283284

284-
barcode, counts = generate_barcode(seqs[i], variants, ref_char=None, ins_char="?", oth_char="X")
285+
barcode_list, counts = generate_barcode(seqs[i], variants, ref_char=None, ins_char="?", oth_char="X")
286+
barcode = ''.join(barcode_list)
285287
print(i, barcode, counts)
286288
assert barcode == expect_barcode_ref[i]
287289

288-
barcode, counts = generate_barcode(seqs[i], variants, ref_char=None, ins_char="?", oth_char=None)
290+
barcode_list, counts = generate_barcode(seqs[i], variants, ref_char=None, ins_char="?", oth_char=None)
291+
barcode = ''.join(barcode_list)
289292
print(i, barcode, counts)
290293
assert barcode == expect_barcode_ref_oth[i]
291294

292-
barcode, counts = generate_barcode(seqs[i], variants, ref_char="-", ins_char="$", oth_char="X")
295+
barcode_list, counts = generate_barcode(seqs[i], variants, ref_char="-", ins_char="$", oth_char="X")
296+
barcode = ''.join(barcode_list)
293297
print(i, barcode, counts)
294298
assert barcode == expect_barcode_ins[i]
295299

0 commit comments

Comments
 (0)