adding a docstring and switching a couple dependencies' sources

nrminor · nrminor · commit 1fe92c9ad005 · 2024-09-30T15:18:44.000-05:00
diff --git a/bin/multisample_plot.py b/bin/multisample_plot.py
@@ -194,6 +194,30 @@ def accumulate_cov_dfs(directory: str, sample_lookup: dict[str, str]) -> pl.Data
 
 
 def fix_dataframe(df: pl.DataFrame) -> pl.DataFrame:
+    """
+    Fix and preprocess the input DataFrame for analysis.
+
+    This function takes a Polars DataFrame containing coverage data and performs
+    the following operations:
+    1. Converts the DataFrame to a list of dictionaries.
+    2. Iterates through the rows, adding new rows where there are large
+       jumps in coverage (greater than 50).
+    3. Creates a new row with zero coverage between segments where large
+       jumps occur.
+
+    Parameters:
+    df (pl.DataFrame): Input DataFrame containing coverage data.
+                       Expected columns: 'start', 'stop', 'coverage',
+                       'sample', and 'chromosome'.
+
+    Returns:
+    pl.DataFrame: A new DataFrame with additional rows inserted to
+                  represent gaps in coverage.
+
+    Note:
+    The function assumes that the input DataFrame is sorted by position
+    within each chromosome and sample.
+    """
     rows = df.to_dicts()
     new_rows = []
     for i in range(len(rows) - 1):
diff --git a/pyproject.toml b/pyproject.toml
@@ -44,6 +44,8 @@ dependencies = [
     "numpy",
     "scipy",
     "paramiko",
+    "plotnine",
+    "loguru",
 ]
 
 [build-system]
@@ -82,7 +84,6 @@ pyyaml = ">=5.4.1,<5.5"
 rasusa = ">=2.0.0,<2.1"
 python-edlib = ">=1.3.9,<1.4"
 mosdepth = ">=0.3.8,<0.4"
-plotnine = ">=0.13.6,<0.14"
 pyarrow = ">=16.1.0,<16.2"
 rust = ">=1.77.2,<1.80"
 csvtk = ">=0.30.0,<0.31"