Skip to content

Commit d71671a

Browse files
committed
formatting tweaks post-linter tweak, adding new growth data files, refs #61, #66
1 parent 96c6990 commit d71671a

File tree

9 files changed

+81
-21261
lines changed

9 files changed

+81
-21261
lines changed

growthviz-data/.DS_Store

-6 KB
Binary file not shown.
973 KB
Binary file not shown.
398 KB
Binary file not shown.

growthviz-data/ext/swaps.csv

-21,195
This file was deleted.

growthviz/charts.py

+33-31
Original file line numberDiff line numberDiff line change
@@ -45,19 +45,20 @@ def weight_distr(df, mode):
4545
Create charts with overall and outlier weight distributions (included values only)
4646
4747
Parameters:
48-
df: (DataFrame) with subjid, param, measurement, age, sex, clean_value, clean_cat, include,
49-
category, colors, patterns, and sort_order columns
50-
mode: (str) indicates how many of the weights you want to use. If set to 'high', the function
51-
will only use weights above a certain threshold. Otherwise, it displays all the weights.
48+
df: (DataFrame) with subjid, param, measurement, age, sex, clean_value, and
49+
include columns
50+
mode: (str) indicates how many of the weights you want to use. If set to 'high',
51+
the function will only use weights above a certain threshold. Otherwise, it
52+
displays all the weights.
5253
"""
53-
wgt_grp = df[(df["param"] == "WEIGHTKG") & (df["include"] == True)]
54+
wgt_grp = df[(df["param"] == "WEIGHTKG") & (df["include"] is True)]
5455
if mode == "high":
5556
wgt_grp = wgt_grp.loc[wgt_grp["measurement"] >= 135]
5657
plt.title("Weights At or Above 135kg")
5758
else:
5859
plt.title("All Weights")
5960
if len(wgt_grp.index) == 0:
60-
print("No included observations with weight (kg) >= 135")
61+
print("No included observations with weight (kg) >= 135.")
6162
plt.close()
6263
else:
6364
round_col = wgt_grp.apply(
@@ -66,7 +67,7 @@ def weight_distr(df, mode):
6667
wgt_grp = wgt_grp.assign(round_weight=round_col.values)
6768
wgt_grp_sum = wgt_grp.groupby("round_weight")["subjid"].count().reset_index()
6869
plt.rcParams["figure.figsize"] = [7, 5]
69-
wgt_grp_sum_plot = plt.bar(wgt_grp_sum["round_weight"], wgt_grp_sum["subjid"])
70+
plt.bar(wgt_grp_sum["round_weight"], wgt_grp_sum["subjid"])
7071
# Assure there is some breadth to the x-axis in case of just a few observations
7172
if wgt_grp["measurement"].max() - wgt_grp["measurement"].min() < 10:
7273
plt.xlim(wgt_grp["measurement"].min() - 5, wgt_grp["measurement"].max() + 5)
@@ -78,11 +79,12 @@ def weight_distr(df, mode):
7879

7980
def make_age_charts(df, mode):
8081
"""
81-
Creates a chart with the age ranges in the dataset. Counts the number of subjids in each range.
82+
Creates a chart with the age ranges in the dataset. Counts the number of subjids in
83+
each range.
8284
8385
Parameters:
84-
df: (DataFrame) with subjid, param, measurement, age, sex, clean_value, clean_cat, include,
85-
category, colors, patterns, and sort_order columns
86+
df: (DataFrame) with subjid, param, measurement, age, sex, clean_value, clean_cat,
87+
include, category, colors, patterns, and sort_order columns
8688
mode: (str) indicates whether you want the adults or pediatrics values.
8789
"""
8890
obs_grp = df
@@ -94,8 +96,8 @@ def make_age_charts(df, mode):
9496
else:
9597
raise Exception("Valid modes are 'adults' and 'pediatrics'")
9698

97-
# Adds label, color, pattern and sort order columns to the dataframe based on the age of each
98-
# row in the dataframe
99+
# Adds label, color, pattern and sort order columns to the dataframe based on the
100+
# age of each row in the dataframe
99101
def add_categories_to_frame(df_data, df_reference):
100102
categories = []
101103
colors = []
@@ -119,8 +121,8 @@ def add_categories_to_frame(df_data, df_reference):
119121
# Call the categorizing function on the data
120122
obs_grp = add_categories_to_frame(obs_grp, label_frame)
121123

122-
# Groups the new dataframe by category, sort order, colors and patterns. It then counts the
123-
# number of subject ids in each group and sorts the values by sort order.
124+
# Groups the new dataframe by category, sort order, colors and patterns. It then
125+
# counts the number of subject ids in each group and sorts the values by sort order.
124126
obs_grp = (
125127
obs_grp.groupby(["category", "sort_order", "colors", "patterns"])["subjid"]
126128
.count()
@@ -214,14 +216,14 @@ def overlap_view_adults(
214216
xmin = math.floor(individual.age.min())
215217
xmax = math.ceil(individual.age.max())
216218
selected_param_plot.set_xlim(xmin, xmax)
217-
if include_carry_forward == True:
219+
if include_carry_forward is True:
218220
carry_forward = selected_param[
219221
selected_param.clean_value == "Exclude-Carried-Forward"
220222
]
221223
selected_param_plot.scatter(
222224
x=carry_forward.age, y=carry_forward.measurement, c="c", marker="^"
223225
)
224-
if include_percentiles == True:
226+
if include_percentiles is True:
225227
if param == "WEIGHTKG":
226228
percentile_df = wt_df
227229
elif param == "BMI":
@@ -287,7 +289,7 @@ def overlap_view_adults_show(
287289
"""
288290
Wraps overlap_view_adult with plt.show().
289291
"""
290-
plot = overlap_view_adults(
292+
overlap_view_adults(
291293
obs_df,
292294
subjid,
293295
param,
@@ -345,14 +347,14 @@ def overlap_view_pediatrics(
345347
c="r",
346348
marker="x",
347349
)
348-
if include_carry_forward == True:
350+
if include_carry_forward is True:
349351
carry_forward = selected_param[
350352
selected_param.clean_value == "Exclude-Carried-Forward"
351353
]
352354
selected_param_plot.scatter(
353355
x=carry_forward.age, y=carry_forward.measurement, c="c", marker="^"
354356
)
355-
if include_percentiles == True:
357+
if include_percentiles is True:
356358
percentile_df = wt_df if param == "WEIGHTKG" else ht_df
357359
percentile_window = percentile_df.loc[
358360
(percentile_df.Sex == individual.sex.min())
@@ -374,7 +376,7 @@ def overlap_view_pediatrics_show(
374376
"""
375377
Wraps overlap_view_pediatrics with plt.show().
376378
"""
377-
plot = overlap_view_pediatrics(
379+
overlap_view_pediatrics(
378380
obs_df, subjid, param, include_carry_forward, include_percentiles, wt_df, ht_df
379381
)
380382
plt.show()
@@ -442,7 +444,7 @@ def overlap_view_double_pediatrics(
442444
ax2.set_ylabel(
443445
"weight (kg)", color=color_secondary
444446
) # we already handled the x-label with ax1
445-
if include_percentiles == True:
447+
if include_percentiles is True:
446448
percentile_window = wt_df.loc[wt_df.Sex == individual.sex.min()]
447449
ax2.plot(percentile_window.age, percentile_window.P5, color="lightblue")
448450
ax2.plot(
@@ -476,21 +478,21 @@ def overlap_view_double_pediatrics(
476478
)
477479
ax1.plot(percentile_window_ht.age, percentile_window_ht.P95, color="pink")
478480

479-
if show_all_measurements == True:
481+
if show_all_measurements is True:
480482
ax1.plot(height["age"], height["measurement"], color=color, label="stature")
481483
ax2.plot(
482484
weight["age"], weight["measurement"], color=color_secondary, label="weight"
483485
)
484486

485-
if show_excluded_values == True:
487+
if show_excluded_values is True:
486488
ax1.scatter(
487489
excluded_height.age, excluded_height.measurement, c="black", marker="x"
488490
)
489491
ax2.scatter(
490492
excluded_weight.age, excluded_weight.measurement, c="black", marker="x"
491493
)
492494

493-
if show_trajectory_with_exclusions == True:
495+
if show_trajectory_with_exclusions is True:
494496
ax1.plot(
495497
included_height["age"],
496498
included_height["measurement"],
@@ -509,7 +511,7 @@ def overlap_view_double_pediatrics(
509511

510512
fig.tight_layout() # otherwise the right y-label is slightly clipped
511513

512-
if include_carry_forward == True:
514+
if include_carry_forward is True:
513515
carry_forward_height = height[height.clean_value == "Exclude-Carried-Forward"]
514516
carry_forward_weight = weight[weight.clean_value == "Exclude-Carried-Forward"]
515517
ax1.scatter(
@@ -594,7 +596,7 @@ def five_by_five_view(obs_df, subjids, param, wt_df, ht_df, bmi_df, linestyle):
594596
for x in range(nrows):
595597
try:
596598
subjid = subjids[x * 5 + y]
597-
except IndexError as ie:
599+
except IndexError:
598600
# No more subjects to render
599601
break
600602
individual = obs_df[obs_df.subjid == subjid]
@@ -758,15 +760,15 @@ def top_ten(
758760
in the notebook.
759761
"""
760762
working_set = merged_df
761-
if age != None:
763+
if age is not None:
762764
working_set = working_set.loc[
763765
working_set.rounded_age.ge(age[0]) & working_set.rounded_age.le(age[1])
764766
]
765-
if sex != None:
767+
if sex is not None:
766768
working_set = working_set[working_set.sex == sex]
767-
if wexclusion != None:
769+
if wexclusion is not None:
768770
working_set = working_set[working_set.weight_cat.isin(wexclusion)]
769-
if hexclusion != None:
771+
if hexclusion is not None:
770772
working_set = working_set[working_set.height_cat.isin(hexclusion)]
771773
# if order == 'largest':
772774
# working_set = working_set.nlargest(10, field)
@@ -796,7 +798,7 @@ def top_ten(
796798
"BMIz",
797799
]
798800
]
799-
if out == None:
801+
if out is None:
800802
return working_set
801803
else:
802804
out.clear_output()

growthviz/compare.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def subject_stats_comparison(combined_df):
130130
for rn in combined_df.run_name.unique():
131131
total_subjects = combined_df[combined_df.run_name == rn].subjid.nunique()
132132
only_exclusions = combined_df[
133-
(combined_df.run_name == rn) & (combined_df.include == False)
133+
(combined_df.run_name == rn) & (combined_df.include is False)
134134
]
135135
percent_with_exclusion = (
136136
only_exclusions.subjid.nunique() / total_subjects

growthviz/processdata.py

+44-32
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
1-
from IPython.display import FileLinks
2-
import matplotlib as mpl
3-
import matplotlib.pyplot as plt
41
import numpy as np
52
import pandas as pd
3+
from IPython.display import FileLinks
64

75

86
def setup_individual_obs_df(obs_df):
97
"""
108
Standardizes adults and pediatrics files for clean processing in GrowthViz notebooks
119
1210
Parameters:
13-
obs_df: (DataFrame) with subjid, sex, age, measurement, param and clean_value columns
11+
obs_df: (DataFrame) with subjid, sex, age, measurement, param and clean_value
12+
columns
1413
1514
Returns:
1615
DataFrame with updated columns
@@ -135,16 +134,23 @@ def setup_percentiles_pediatrics(percentiles_file):
135134

136135
def keep_age_range(df, mode):
137136
"""
138-
Returns specified age range
137+
Returns specified age range, removing extraneous columns as well
139138
140139
Parameters:
141-
df: (DataFrame) with subjid, param, measurement, age, sex, clean_value, clean_cat, include,
142-
category, colors, patterns, and sort_order columns
143-
mode: (str) indicates whether you want the "adults" (18-80) or "pediatrics" (0-25) values
140+
df: (DataFrame) with subjid, param, measurement, age, sex, clean_value, and
141+
include columns
142+
mode: (str) indicates whether you want the "adults" (18-80) or "pediatrics" (0-25)
143+
values
144144
145145
Returns:
146146
DataFrame with filtered ages, unchanged if invalid mode is specified
147147
"""
148+
# Note: this is a side effect; just the simplest place to remove these
149+
cols_to_drop = []
150+
for extra_col in ["clean_cat", "category", "colors", "patterns", "sort_order"]:
151+
if extra_col in df.columns:
152+
cols_to_drop.append(extra_col)
153+
df = df.drop(columns=cols_to_drop)
148154
if mode == "adults":
149155
return df[df["age"].between(18, 80, inclusive="both")]
150156
elif mode == "pediatrics":
@@ -158,7 +164,8 @@ def setup_merged_df(obs_df):
158164
Merges together weight and height data for calculating BMI
159165
160166
Parameters:
161-
obs_df: (DataFrame) with subjid, sex, age, measurement, param and clean_value columns
167+
obs_df: (DataFrame) with subjid, sex, age, measurement, param and clean_value
168+
columns
162169
163170
Returns:
164171
DataFrame with merged data
@@ -206,7 +213,8 @@ def setup_merged_df(obs_df):
206213

207214
def exclusion_information(obs):
208215
"""
209-
Provides a count and percentage of growthcleanr categories by measurement type (param).
216+
Provides a count and percentage of growthcleanr categories by measurement type
217+
(param).
210218
211219
Parameters:
212220
obs: a DataFrame, in the format output by setup_individual_obs_df
@@ -238,15 +246,16 @@ def exclusion_information(obs):
238246

239247
def label_incl(row):
240248
"""
241-
Categorizes BMI calculations as Include, Implausible, or unable to calculate (Only Wt or Ht)
249+
Categorizes BMI calculations as Include, Implausible, or unable to calculate (Only
250+
Wt or Ht)
242251
243252
Parameters:
244253
row: (Series) dataframe row
245254
246255
Returns:
247256
Category (str) for BMI calculation
248257
"""
249-
if row["include_both"] == True:
258+
if row["include_both"] is True:
250259
return "Include"
251260
elif (row["weight_cat"] == "Implausible") | (row["height_cat"] == "Implausible"):
252261
return "Implausible"
@@ -261,8 +270,8 @@ def setup_bmi_adults(merged_df, obs):
261270
Parameters:
262271
merged_df: (DataFrame) with subjid, bmi, include_height, include_weight, rounded_age
263272
and sex columns
264-
obs: (DataFrame) with subjid, param, measurement, age, sex, clean_value, clean_cat, include,
265-
category, colors, patterns, and sort_order columns
273+
obs: (DataFrame) with subjid, param, measurement, age, sex, clean_value, clean_cat,
274+
include, category, colors, patterns, and sort_order columns
266275
267276
Returns:
268277
DataFrame with appended values
@@ -324,15 +333,17 @@ def export_to_csv(da_locals, selection_widget, out):
324333

325334
def clean_swapped_values(merged_df):
326335
"""
327-
This function will look in a DataFrame for rows where the height_cat and weight_cat are set to
328-
"Swapped-Measurements" (or the adult equivalent). It will then swap the height and weight values
329-
for those rows, and recalculate BMIs based on these changes. It will also create two new columns:
330-
postprocess_height_cat and postprocess_weight_cat. The values for these columns are copied from
331-
the original categories except in the case where swaps are fixed when it is set to
336+
This function will look in a DataFrame for rows where the height_cat and weight_cat
337+
are set to "Swapped-Measurements" (or the adult equivalent). It will then swap the
338+
height and weight values for those rows, and recalculate BMIs based on these
339+
changes. It will also create two new columns: postprocess_height_cat and
340+
postprocess_weight_cat. The values for these columns are copied from the original
341+
categories except in the case where swaps are fixed when it is set to
332342
"Include-Fixed-Swap".
333343
334344
Parameters:
335-
merged_df: (DataFrame) with subjid, height, weight, include_height and include_weight columns
345+
merged_df: (DataFrame) with subjid, height, weight, include_height and
346+
include_weight columns
336347
337348
Returns:
338349
The cleaned DataFrame
@@ -368,20 +379,21 @@ def clean_swapped_values(merged_df):
368379

369380
def clean_unit_errors(merged_df):
370381
"""
371-
This function will look in a DataFrame for rows where the height_cat and weight_cat are set to
372-
"Unit-Error-High" or "Unit-Error-Low". It will then multiply / divide the height and weight
373-
values to convert them. It will also create two new columns: postprocess_height_cat and
374-
postprocess_weight_cat. The values for these columns are copied from the original categories
375-
except in the case where unit errors are fixed when it is set to "Include-UH" or "Include-UL"
376-
respectively.
377-
378-
At present, the adult algorithm does not specify high or low unit errors, rather it only flags
379-
"Exclude-Adult-Unit-Errors", so this function only works with pediatrics data. If growthcleanr
380-
adds high and low designations for adult unit errors, a comparable set of conditions could be
381-
added here to accommodate adult data.
382+
This function will look in a DataFrame for rows where the height_cat and weight_cat
383+
are set to "Unit-Error-High" or "Unit-Error-Low". It will then multiply / divide
384+
the height and weight values to convert them. It will also create two new columns:
385+
postprocess_height_cat and postprocess_weight_cat. The values for these columns
386+
are copied from the original categories except in the case where unit errors are
387+
fixed when it is set to "Include-UH" or "Include-UL" respectively.
388+
389+
At present, the adult algorithm does not specify high or low unit errors, rather it
390+
only flags "Exclude-Adult-Unit-Errors", so this function only works with pediatrics
391+
data. If growthcleanr adds high and low designations for adult unit errors, a
392+
comparable set of conditions could be added here to accommodate adult data.
382393
383394
Parameters:
384-
merged_df: (DataFrame) with subjid, height, weight, include_height and include_weight columns
395+
merged_df: (DataFrame) with subjid, height, weight, include_height and
396+
include_weight columns
385397
386398
Returns:
387399
The cleaned DataFrame

growthviz/sumstats.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from IPython.display import Markdown
21
import numpy as np
2+
from IPython.display import Markdown
33

44

55
def setup_percentile_zscore_adults(percentiles_clean):
@@ -169,7 +169,7 @@ def bmi_stats(
169169
merged_stats = merged_stats.rename(
170170
columns={"std_raw": "sd_raw", "std_clean": "sd_clean"}
171171
)
172-
if out == None:
172+
if out is None:
173173
return merged_stats
174174
else:
175175
# Clear output on first update and all subsequent updates, see

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
ipywidgets~=7.0
2+
jupyter-server<2.0.0
23
matplotlib>=3.3.4
34
pandas>=1.2.2
45
qgrid>=1.3.1

0 commit comments

Comments
 (0)