Skip to content

Commit 1fe92c9

Browse files
committed
adding a docstring and switching a couple dependencies' sources
1 parent 06c0e80 commit 1fe92c9

File tree

2 files changed

+26
-1
lines changed

2 files changed

+26
-1
lines changed

bin/multisample_plot.py

+24
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,30 @@ def accumulate_cov_dfs(directory: str, sample_lookup: dict[str, str]) -> pl.Data
194194

195195

196196
def fix_dataframe(df: pl.DataFrame) -> pl.DataFrame:
197+
"""
198+
Fix and preprocess the input DataFrame for analysis.
199+
200+
This function takes a Polars DataFrame containing coverage data and performs
201+
the following operations:
202+
1. Converts the DataFrame to a list of dictionaries.
203+
2. Iterates through the rows, adding new rows where there are large
204+
jumps in coverage (greater than 50).
205+
3. Creates a new row with zero coverage between segments where large
206+
jumps occur.
207+
208+
Parameters:
209+
df (pl.DataFrame): Input DataFrame containing coverage data.
210+
Expected columns: 'start', 'stop', 'coverage',
211+
'sample', and 'chromosome'.
212+
213+
Returns:
214+
pl.DataFrame: A new DataFrame with additional rows inserted to
215+
represent gaps in coverage.
216+
217+
Note:
218+
The function assumes that the input DataFrame is sorted by position
219+
within each chromosome and sample.
220+
"""
197221
rows = df.to_dicts()
198222
new_rows = []
199223
for i in range(len(rows) - 1):

pyproject.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ dependencies = [
4444
"numpy",
4545
"scipy",
4646
"paramiko",
47+
"plotnine",
48+
"loguru",
4749
]
4850

4951
[build-system]
@@ -82,7 +84,6 @@ pyyaml = ">=5.4.1,<5.5"
8284
rasusa = ">=2.0.0,<2.1"
8385
python-edlib = ">=1.3.9,<1.4"
8486
mosdepth = ">=0.3.8,<0.4"
85-
plotnine = ">=0.13.6,<0.14"
8687
pyarrow = ">=16.1.0,<16.2"
8788
rust = ">=1.77.2,<1.80"
8889
csvtk = ">=0.30.0,<0.31"

0 commit comments

Comments
 (0)