Skip to content

Commit e55d999

Browse files
committed
Updating pediatrics notebook and chart generation for new sample set; tweaked five by five to handle <25 subjects; specifying plt.show() on some chart functions to initiate chart refresh, refs #37
1 parent a1d4bf7 commit e55d999

10 files changed

+80623
-86360
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -145,3 +145,4 @@ cython_debug/
145145
*.swo
146146
*.swp
147147
.DS_Store
148+
output

GrowthViz-pediatrics.ipynb

+478-603
Large diffs are not rendered by default.

GrowthViz-pediatrics.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -251,13 +251,17 @@ def handle_selection_change(_event, _widget):
251251

252252

253253
all_ids = obs['subjid'].unique()
254-
val = 47085108 if 47085108 in all_ids else np.random.choice(all_ids, size=1, replace=False)
255-
interactive(charts.overlap_view_pediatrics, obs_df=fixed(obs),
254+
val = 5450 if 5450 in all_ids else np.random.choice(all_ids, size=1, replace=False)
255+
interactive(charts.overlap_view_pediatrics_show,
256+
obs_df=fixed(obs),
256257
subjid=widgets.Dropdown(options=all_ids, value=val, description='Subject ID:', disabled=False),
257258
param=['HEIGHTCM', 'WEIGHTKG'],
258259
include_carry_forward=widgets.Checkbox(value=True,description='Include Carry Forward',disabled=False,indent=False),
259260
include_percentiles=widgets.Checkbox(value=True,description='Include Measurement Percentile Bands',disabled=False,indent=False),
260-
wt_df=fixed(wt_percentiles), ht_df=fixed(ht_percentiles), bmi_df=fixed(bmi_percentiles))
261+
wt_df=fixed(wt_percentiles),
262+
ht_df=fixed(ht_percentiles),
263+
bmi_df=fixed(bmi_percentiles)
264+
)
261265

262266

263267
# The cell below also creates a plot for an individual modeled after the [CDC paper growth charts](https://www.cdc.gov/growthcharts/data/set1clinical/cj41c021.pdf). It shows both the weight trajectory and height trajectory. The lighter bands in the diagram background represent the 5th through 95th percentile values for age and sex for the given measurement type.
@@ -268,8 +272,9 @@ def handle_selection_change(_event, _widget):
268272

269273

270274
all_ids = obs['subjid'].unique()
271-
val = 47085108 if 47085108 in all_ids else np.random.choice(all_ids, size=1, replace=False)
272-
interactive(charts.overlap_view_double_pediatrics, obs_df=fixed(obs),
275+
val = 5446 if 5446 in all_ids else np.random.choice(all_ids, size=1, replace=False)
276+
interactive(charts.overlap_view_double_pediatrics,
277+
obs_df=fixed(obs),
273278
subjid=widgets.Dropdown(options=all_ids, value=val, description='Subject ID:', disabled=False),
274279
show_all_measurements=widgets.Checkbox(value=True,description='Show All Measurements',disabled=False,indent=False),
275280
show_excluded_values=widgets.Checkbox(value=True,description='Show Excluded Values (x)',disabled=False,indent=False),
@@ -340,7 +345,8 @@ def edge25(obs, category, sort_order, param):
340345
filtered_by_cat = filtered_by_cat.nlargest(25, 'measurement')
341346
else:
342347
filtered_by_cat = filtered_by_cat.nsmallest(25, 'measurement')
343-
return charts.five_by_five_view(obs, filtered_by_cat.subjid.values, param, wt_percentiles, ht_percentiles, bmi_percentiles, 'solid')
348+
fig = charts.five_by_five_view(obs, filtered_by_cat.subjid.values, param, wt_percentiles, ht_percentiles, bmi_percentiles, 'solid')
349+
plt.show()
344350

345351
interact(edge25, obs = fixed(obs), category = obs.clean_cat.unique(),
346352
sort_order = ['largest', 'smallest'], param = ['WEIGHTKG', 'HEIGHTCM'])

README.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,10 @@ If not using Anaconda, specific versions of packages can be found in `requiremen
151151

152152
By default when you reach Step 6 of the [Simple Install](#simple-install)
153153
instructions above the notebook will use sample data loaded from the `.csv`
154-
files located in the GrowthViz-master project.
154+
files located in the GrowthViz project. This is the same synthetic sample data
155+
that is packaged with
156+
[growthcleanr](https://github.com/carriedaymont/growthcleanr), cleaned and
157+
then separated into pediatric and adult sets for GrowthViz.
155158

156159
To ensure that all of the necessary example files are present, run the
157160
`check_setup.py` script.

README.pdf

50.9 KB
Binary file not shown.

growthviz-data/sample-adults-data.csv

+49,774-29,025
Large diffs are not rendered by default.

growthviz-data/sample-pediatrics-data.csv

+30,288-56,703
Large diffs are not rendered by default.

growthviz/charts.py

+60-14
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def weight_distr(df, mode):
2929
plt.ylabel("Total Patient Observations")
3030
plt.xlabel("Recorded Weight (Kg)")
3131
plt.grid()
32-
return wgt_grp_sum_plot
32+
plt.show()
3333

3434

3535
def overlap_view_adults(
@@ -215,6 +215,18 @@ def overlap_view_pediatrics(
215215
return selected_param_plot
216216

217217

218+
def overlap_view_pediatrics_show(
219+
obs_df, subjid, param, include_carry_forward, include_percentiles, wt_df, ht_df
220+
):
221+
"""
222+
Wraps overlap_view_pediatrics with plt.show().
223+
"""
224+
plot = overlap_view_pediatrics(
225+
obs_df, subjid, param, include_carry_forward, include_percentiles, wt_df, ht_df
226+
)
227+
plt.show()
228+
229+
218230
def overlap_view_double_pediatrics(
219231
obs_df,
220232
subjid,
@@ -359,7 +371,7 @@ def overlap_view_double_pediatrics(
359371

360372
# Reset figsize to default
361373
plt.rcParams["figure.figsize"] = [6.4, 4.8]
362-
return fig
374+
plt.show()
363375

364376

365377
def mult_obs(obs):
@@ -375,6 +387,24 @@ def mult_obs(obs):
375387
return obs[obs["any_ones"] == 0]
376388

377389

390+
def five_by_five_shape(n):
391+
"""
392+
Determines shape of five by five view, allowing for fewer than 25 observations.
393+
394+
Parameters:
395+
n: length of subject list to display
396+
397+
Returns:
398+
Dimensions of grid/subplots as (nrows, ncols)
399+
"""
400+
if n // 5 == 0:
401+
return (1, n % 5)
402+
elif n % 5 > 0:
403+
return ((n // 5) + 1, 5)
404+
else:
405+
return (n // 5, 5)
406+
407+
378408
def five_by_five_view(obs_df, subjids, param, wt_df, ht_df, bmi_df, linestyle):
379409
"""
380410
Creates a small multiples plot showing the growth trend for 25 individuals
@@ -384,17 +414,32 @@ def five_by_five_view(obs_df, subjids, param, wt_df, ht_df, bmi_df, linestyle):
384414
subjids: An list of the ids of the individuals to be plotted
385415
param: (String) Whether to plot heights or weights. Expected values are "HEIGHTCM" or "WEIGHTKG"
386416
"""
387-
fig, ax = plt.subplots(5, 5)
388-
for y in range(5):
389-
for x in range(5):
390-
subjid = subjids[x * 5 + y]
417+
if len(subjids) == 0:
418+
print("No matching subjects found.")
419+
return
420+
nrows, ncols = five_by_five_shape(len(subjids))
421+
fig, ax = plt.subplots(nrows, ncols)
422+
for y in range(ncols):
423+
for x in range(nrows):
424+
try:
425+
subjid = subjids[x * 5 + y]
426+
except IndexError as ie:
427+
# No more subjects to render
428+
break
391429
individual = obs_df[obs_df.subjid == subjid]
392430
selected_param = individual[individual.param == param]
393-
ax[x, y].plot(selected_param.age, selected_param.measurement, marker=".")
431+
# Indexing varies by dimensionality, so simplify
432+
if nrows > 1:
433+
tgt = ax[x, y]
434+
elif len(subjids) == 1:
435+
tgt = ax
436+
else:
437+
tgt = ax[y]
438+
tgt.plot(selected_param.age, selected_param.measurement, marker=".")
394439
excluded_selected_param = selected_param[
395440
selected_param.clean_value != "Include"
396441
]
397-
ax[x, y].scatter(
442+
tgt.scatter(
398443
excluded_selected_param.age,
399444
excluded_selected_param.measurement,
400445
c="r",
@@ -411,22 +456,23 @@ def five_by_five_view(obs_df, subjids, param, wt_df, ht_df, bmi_df, linestyle):
411456
& (percentile_df.age >= math.floor(individual.age.min()))
412457
& (percentile_df.age <= math.ceil(individual.age.max()))
413458
]
414-
ax[x, y].plot(
459+
tgt.plot(
415460
percentile_window.age,
416461
percentile_window.P5,
417462
color="k",
418463
linestyle=linestyle,
419464
zorder=1,
420465
)
421-
ax[x, y].plot(
466+
tgt.plot(
422467
percentile_window.age,
423468
percentile_window.P95,
424469
color="k",
425470
linestyle=linestyle,
426471
zorder=1,
427472
)
428-
ax[x, y].set(title=subjid)
429-
fig.set_size_inches(20, 12)
473+
tgt.set(title=subjid)
474+
# Set size dynamically to average out about the same
475+
fig.set_size_inches(4 * ncols, 2.4 * nrows)
430476
return plt.tight_layout()
431477

432478

@@ -465,7 +511,7 @@ def bmi_with_percentiles(merged_df, bmi_percentiles, subjid):
465511

466512
ax[1].set(xlabel="age (y)", ylabel="BMI", title="BMI Cleaned")
467513
ax[1].grid()
468-
return plt
514+
plt.show()
469515

470516

471517
def param_with_percentiles(merged_df, subjid, param, wt_df, ht_df, bmi_df):
@@ -499,7 +545,7 @@ def param_with_percentiles(merged_df, subjid, param, wt_df, ht_df, bmi_df):
499545

500546
ax[1].set(xlabel="age (y)", ylabel="", title=(param + " Cleaned"))
501547
ax[1].grid()
502-
return plt
548+
plt.show()
503549

504550

505551
def top_ten(

growthviz/processdata.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def setup_percentiles_pediatrics(percentiles_file):
9595
Processes pediatrics percentiles from CDC
9696
"""
9797
percentiles = pd.read_csv(
98-
f'growthviz-data/ext/{percentiles_file}',
98+
f"growthviz-data/ext/{percentiles_file}",
9999
dtype={
100100
"Agemos": float,
101101
"P5": float,
@@ -332,11 +332,9 @@ def export_to_csv(da_locals, selection_widget, out):
332332
Saves out csv file of dataframe
333333
"""
334334
df_name = selection_widget.value
335-
da_locals[df_name].to_csv(
336-
"growthviz-data/output/{}.csv".format(df_name), index=False
337-
)
335+
da_locals[df_name].to_csv("output/{}.csv".format(df_name), index=False)
338336
out.clear_output()
339-
out.append_display_data(FileLinks("growthviz-data/output"))
337+
out.append_display_data(FileLinks("output"))
340338

341339

342340
def clean_swapped_values(merged_df):

requirements.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
ipywidgets
2-
matplotlib>=3.3.4
3-
pandas>=1.2.2
2+
matplotlib>=3.3.4
3+
pandas>=1.2.2
44
qgrid>=1.3.1
5-
seaborn>=0.11.1
5+
seaborn>=0.11.1

0 commit comments

Comments
 (0)