nrminor
diff --git a/‎bin/multisample_plot.py
+16-55 b/‎bin/multisample_plot.py
+16-55
diff --git a/‎bin/resplice_primers.py
+23-30 b/‎bin/resplice_primers.py
+23-30
diff --git a/‎bin/split_primer_combos.py
+3 b/‎bin/split_primer_combos.py
+3
diff --git a/‎main.nf
-6 b/‎main.nf
-6
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#! /usr/bin/env python3
 # /// script
 # requires-python = ">=3.11"
 # dependencies = [
@@ -36,8 +36,6 @@
 from __future__ import annotations
 
 import argparse
-import json
-import os
 from math import log10
 from pathlib import Path
 
@@ -49,6 +47,8 @@
     geom_line,
     ggplot,
     ggsave,
+    guide_legend,
+    guides,
     labs,
     theme_minimal,
 )
@@ -81,13 +81,6 @@ def parse_command_line_args() -> argparse.Namespace:
         required=True,
         help="Directory to scan for BED files.",
     )
-    parser.add_argument(
-        "--sample_lookup",
-        "-s",
-        type=Path,
-        required=True,
-        help="JSON-formatted sample lookup, where the keys are the barcodes, and the values are the sample ID to be associated with each barcode.",
-    )
     parser.add_argument(
         "--min_coverage",
         "-m",
@@ -104,37 +97,7 @@ def parse_command_line_args() -> argparse.Namespace:
     return parser.parse_args()
 
 
-def read_sample_lookup(file_path: str) -> dict[str, str]:
-    """
-    Reads a JSON file and returns its content.
-
-    This function opens the specified JSON file, loads its content into a Python object,
-    prints the content to the console, and then returns the loaded data.
-
-    Parameters:
-    file_path (str): The path to the JSON file that contains the data to be read.
-
-    Returns:
-    data (dict or list): The content of the JSON file as a Python dictionary or list,
-                         depending on the JSON structure.
-
-    Raises:
-    FileNotFoundError: If the specified file does not exist.
-    json.JSONDecodeError: If the file is not a valid JSON format or is empty.
-
-    Example:
-    >>> data = read_sample_lookup('path/to/data.json')
-    >>> print(data)
-    [ "item1", "item2", "item3" ]
-    """
-    assert Path(
-        file_path,
-    ).is_file(), f"The provided file path {file_path} does not exist."
-    with Path(file_path).open("r", encoding="utf8") as file:
-        return json.load(file)
-
-
-def accumulate_cov_dfs(directory: str, sample_lookup: dict[str, str]) -> pl.DataFrame:
+def accumulate_cov_dfs(directory: str, sample_list: list[str]) -> pl.DataFrame:
     """
     Accumulate and concatenate multiple CSV files from a specified directory into a single Polars DataFrame.
 
@@ -179,28 +142,21 @@ def accumulate_cov_dfs(directory: str, sample_lookup: dict[str, str]) -> pl.Data
         directory,
     ).is_dir(), f"The provided input directory {directory} does not exist."
 
-    bed_list = []
-    bc_list = []
-    for filename in os.listdir(directory):
-        f = Path(directory) / Path(filename)
-        if not Path(f).is_file() and not filename.endswith(".bed"):
-            continue
-        barcode = filename.split(".")[0]
-        bed_list.append(f)
-        bc_list.append(barcode)
+    sample_lookup = {
+        sample_id: Path(directory) / Path(f"{sample_id}.per-base.bed")
+        for sample_id in sample_list
+    }
 
     df_list = []
-    for bed_file, barcode in zip(bed_list, bc_list):
-        if barcode not in sample_lookup:
-            continue
+    for sample_id, bed_file in sample_lookup.items():
         bc_df = (
             pl.read_csv(
                 bed_file,
                 separator="\t",
                 has_header=False,
                 new_columns=["chromosome", "start", "stop", "coverage"],
             )
-            .with_columns(sample=pl.lit(sample_lookup[barcode]))
+            .with_columns(sample=pl.lit(sample_id))
             .with_columns(
                 pl.int_ranges(start=pl.col("start"), end=pl.col("stop")).alias(
                     "position",
@@ -267,6 +223,7 @@ def plot_log_coverages(
         )
         + facet_wrap("~chromosome", scales="free_x")
         + theme_minimal()
+        + guides(color=guide_legend(ncol=3))
     )
 
 
@@ -326,6 +283,7 @@ def plot_coverages(all_barcodes: pl.DataFrame, min_desired_depth: int = 20) -> g
         )
         + facet_wrap("~chromosome", scales="free_x")
         + theme_minimal()
+        + guides(color=guide_legend(ncol=3))
     )
 
 
@@ -359,7 +317,10 @@ def main() -> None:
     """
     args = parse_command_line_args()
     min_desired_depth = args.min_coverage
-    sample_list = read_sample_lookup(args.sample_lookup)
+    sample_list = [
+        path.name.replace(".per-base.bed", "")
+        for path in Path(args.input_dir).glob("*.per-base.bed")
+    ]
     sample_dataframe = accumulate_cov_dfs(args.input_dir, sample_list)
 
     if args.log:
 
@@ -458,13 +458,11 @@ def resolve_primer_names(
     # current iteration is handling, and which old primer names should be in which
     # order for a join downstream.
     new_primer_pairs: list[tuple[str, str]] = []
-    handled_pairs: list[list[int]] = []
     old_primer_pairs: list[tuple[str, str]] = []
-    combo_ticker = 0
 
     # loop through both primers, where primer1 is from the "deficit primer" list, and
     # primer2 is from the "excess primer" list.
-    for old_fwd_primer, old_rev_primer in all_possible_pairs:
+    for i, (old_fwd_primer, old_rev_primer) in enumerate(all_possible_pairs):
         # pull of the last element, delimited by hyphen, on both primer names
         fwd_final_element = old_fwd_primer.split(idx_delim)[idx_position]
         rev_final_element = old_rev_primer.split(idx_delim)[idx_position]
@@ -482,30 +480,18 @@ def resolve_primer_names(
             )
             sys.exit(1)
 
-        # figure out which combination of primers is being handled and check
-        # whether it has already been handled
-        primer1_index = int(fwd_final_element)
-        primer2_index = int(rev_final_element)
-        current_pair = sorted((primer1_index, primer2_index))
-        if current_pair in handled_pairs:
-            continue
-
-        # now that we know we're working with a previously unhandled pairing, incrememt
-        # the combo ticker by one
-        combo_ticker += 1
-
         # use f-strings to construct new names that make the combinations explicit
-        new_fwd_primer = f"{old_fwd_primer}_splice{combo_ticker}"
-        new_rev_primer = f"{old_rev_primer}_splice{combo_ticker}"
+        new_fwd_primer = (
+            old_fwd_primer.replace(f"-{fwd_final_element}", "") + f"_splice{i + 1}"
+        )
+        new_rev_primer = (
+            old_rev_primer.replace(f"-{rev_final_element}", "") + f"_splice{i + 1}"
+        )
 
         # continue accumulating old and new primer pair lists
         old_primer_pairs.append((old_fwd_primer, old_rev_primer))
         new_primer_pairs.append((new_fwd_primer, new_rev_primer))
 
-        # now that we know nothing has gone awry, at this pair to the accumulating list
-        # of handled primer pairs
-        handled_pairs.append(current_pair)
-
     # flatten the tuples at each position of the pair lists with two comprehensions
     # to make it explicit to the reader that forward primers come before reverse primers
     # in the flattened list. These comprehensions handle the old primer names.
@@ -565,12 +551,12 @@ def resplice_primers(
             logger.debug(
                 f"Pair of primers within the amplicon {amplicon} detected: {pair_labels}. No resplicing will be needed here, though double check that the necessary forward and reverse suffixes, {fwd_suffix} and {rev_suffix}, are present.",
             )
-            assert any(
-                fwd_suffix in primer for primer in pair_labels
-            ), f"The forward suffix {fwd_suffix} is missing in the provided primer pairs: {pair_labels}. Aborting."
-            assert any(
-                rev_suffix in primer for primer in pair_labels
-            ), f"The reverse suffix {rev_suffix} is missing in the provided primer pairs: {pair_labels}. Aborting."
+            assert any(fwd_suffix in primer for primer in pair_labels), (
+                f"The forward suffix {fwd_suffix} is missing in the provided primer pairs: {pair_labels}. Aborting."
+            )
+            assert any(rev_suffix in primer for primer in pair_labels), (
+                f"The reverse suffix {rev_suffix} is missing in the provided primer pairs: {pair_labels}. Aborting."
+            )
 
             # if so, all is well. Append the primers into the growing list of correct
             # dataframes and move onto the next amplicon
@@ -607,8 +593,10 @@ def resplice_primers(
         # current design this should be impossible
         assert len(old_primer_names) == len(
             new_primer_names,
-        ), f"Insufficient number of replacement names ({new_primer_names}) generated \
+        ), (
+            f"Insufficient number of replacement names ({new_primer_names}) generated \
         for partition for amplicon {amplicon}: {primer_df}"
+        )
 
         # run a join on the old primer names to bring in the new primer names in their
         # proper locations
@@ -669,8 +657,13 @@ def finalize_primer_pairings(
             for primer in df.select("NAME").to_series().to_list()
             if rev_suffix in primer
         ]
-        if len(fwd_keepers) > 0 and len(rev_keepers) > 0:
-            final_frames.append(df)
+        if len(fwd_keepers) == 0 or len(rev_keepers) == 0:
+            logger.warning(
+                "Incorrect splicing occurred for the following sets of primers. The amplicon they are derived from will be skipped:\n{fwd_keepers}\n{rev_keepers}",
+            )
+            continue
+
+        final_frames.append(df)
 
     return pl.concat(final_frames)
 
 
@@ -104,6 +104,9 @@ def main() -> None:
 
     for df in bed_dfs:
         splicing = df.select("NAME").unique().item()
+        assert (
+            len(df) == 2  # noqa: PLR2004
+        ), f"Problematic splicing occurred with {splicing}"
         df.write_csv(
             file=f"{splicing}.bed",
             separator="\t",
 
@@ -67,10 +67,6 @@ workflow {
         Channel.fromPath( params.snpEff_config ) :
         Channel.empty()
 
-    ch_sample_lookup = params.sample_lookup ?
-        Channel.fromPath( params.sample_lookup ) :
-        Channel.empty()
-
     // decide whether to run the ont or the illumina workflow
     if ( params.platform == "ont" ) {
 
@@ -79,7 +75,6 @@ workflow {
             ch_refseq,
             ch_ref_gbk,
             ch_snpeff_config,
-            ch_sample_lookup
         )
 
     }  else if ( params.platform == "illumina" ) {
@@ -89,7 +84,6 @@ workflow {
             ch_refseq,
             ch_ref_gbk,
             ch_snpeff_config,
-            ch_sample_lookup
         )
 
     } else {