Skip to content

Commit 72c9916

Browse files
committed
Fix demos to use data frames
Closes #188
1 parent e330486 commit 72c9916

File tree

5 files changed

+52
-31
lines changed

5 files changed

+52
-31
lines changed

demo/ARM_Ch03.R

+14-6
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22
demo("SETUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE)
33

44
source(paste0(ROOT, "ARM/Ch.3/kidiq.data.R"), local = DATA_ENV, verbose = FALSE)
5+
dat <- with(DATA_ENV, data.frame(kid_score, mom_hs, mom_iq))
56

67
# Estimate four contending models
7-
post1 <- stan_glm(kid_score ~ mom_hs, data = DATA_ENV,
8+
post1 <- stan_glm(kid_score ~ mom_hs, data = dat,
89
family = gaussian(link = "identity"),
910
seed = SEED, refresh = REFRESH)
1011
post2 <- update(post1, formula = kid_score ~ mom_iq)
11-
post3 <- stan_lm(kid_score ~ mom_hs + mom_iq, data = DATA_ENV,
12+
post3 <- stan_lm(kid_score ~ mom_hs + mom_iq, data = dat,
1213
prior = R2(location = 0.25, what = "mean"),
1314
seed = SEED, refresh = REFRESH)
1415
post4 <- update(post3, formula = kid_score ~ mom_hs * mom_iq,
@@ -45,15 +46,22 @@ source(paste0(ROOT, "ARM/Ch.3/kids_before1987.data.R"),
4546
local = DATA_ENV, verbose = FALSE)
4647
source(paste0(ROOT, "ARM/Ch.3/kids_after1987.data.R"),
4748
local = DATA_ENV, verbose = FALSE)
48-
post5 <- stan_lm(ppvt ~ hs + afqt, data = DATA_ENV,
49+
50+
fit_data <- with(DATA_ENV, data.frame(ppvt, hs, afqt))
51+
pred_data <- with(DATA_ENV, data.frame(ppvt_ev, hs_ev, afqt_ev))
52+
53+
post5 <- stan_lm(ppvt ~ hs + afqt, data = fit_data,
4954
prior = R2(location = 0.25, what = "mean"),
5055
seed = SEED, refresh = REFRESH)
51-
y_ev <- posterior_predict(post5, newdata =
52-
data.frame(hs = DATA_ENV$hs_ev, afqt = DATA_ENV$afqt_ev))
56+
y_ev <- posterior_predict(
57+
post5,
58+
newdata = with(pred_data, data.frame(hs = hs_ev, afqt = afqt_ev))
59+
)
5360
par(mfrow = c(1,1))
54-
hist(-sweep(y_ev, 2, STATS = DATA_ENV$ppvt_ev, FUN = "-"), prob = TRUE,
61+
hist(-sweep(y_ev, 2, STATS = pred_data$ppvt_ev, FUN = "-"), prob = TRUE,
5562
xlab = "Predictive Errors in ppvt", main = "", las = 2)
5663

64+
5765
ANSWER <- tolower(readline("Do you want to remove the objects this demo created? (y/n) "))
5866
if (ANSWER != "n") {
5967
rm(IQ_SEQ, y_nohs, y_hs, y_ev, ANSWER)

demo/ARM_Ch04.R

+12-6
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
demo("SETUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE)
33

44
source(paste0(ROOT, "ARM/Ch.4/earnings.data.R"), local = DATA_ENV, verbose = FALSE)
5+
earnings_dat <- with(DATA_ENV, data.frame(earn, height, male))
56

67
# The stuff in sections 4.0 -- 4.3 is not very relevant
78
# Moreover, centering predictors is NOT recommended in the rstanarm package
@@ -10,16 +11,16 @@ source(paste0(ROOT, "ARM/Ch.4/earnings.data.R"), local = DATA_ENV, verbose = FAL
1011

1112
# These two models are essentially equivalent in the likelihood
1213
# But the "same" priors affect the posterior differently
13-
post1 <- stan_glm(log(earn) ~ height, data = DATA_ENV,
14+
post1 <- stan_glm(log(earn) ~ height, data = earnings_dat,
1415
family = gaussian(link = "identity"),
1516
seed = SEED, refresh = REFRESH)
16-
# post2 <- stan_glm(earn ~ height, data = DATA_ENV,
17+
# post2 <- stan_glm(earn ~ height, data = earnings_dat,
1718
# family = gaussian(link = "log"),
1819
# seed = SEED, refresh = REFRESH)
1920
# and this does not even converge
2021

2122
# These models add terms to the right-hand side
22-
post3 <- stan_lm(log(earn) ~ height + male, data = DATA_ENV,
23+
post3 <- stan_lm(log(earn) ~ height + male, data = earnings_dat,
2324
prior = R2(location = 0.3, what = "mean"),
2425
seed = SEED, refresh = REFRESH)
2526
post4 <- update(post3, formula = log(earn) ~ height * male)
@@ -49,15 +50,18 @@ boxplot(y_men, outline = FALSE, col = "red", axes = FALSE, log = "y", ylim = YLI
4950
axis(1, at = 1:ncol(y_men), labels = MEN_SEQ, las = 3)
5051

5152
# Prediction of the weight of mesquite trees
53+
DATA_ENV <- new.env()
5254
source(paste0(ROOT, "ARM/Ch.4/mesquite.data.R"), local = DATA_ENV, verbose = FALSE)
55+
tree_dat <- as.data.frame(do.call(cbind, as.list(DATA_ENV)))
56+
5357
CONTINUE1 <- tolower(readline(
5458
paste("A heads up: the next part of the demo (Predicting weight of mesquite trees )",
5559
"prints many lines \nto the console as it runs many models and compares the results",
5660
"Proceed? (y/n)")
5761
))
5862
if (CONTINUE1 != "n") {
5963
post5 <- stan_lm(weight ~ diam1 + diam2 + canopy_height + total_height +
60-
density + group, data = DATA_ENV,
64+
density + group, data = tree_dat,
6165
prior = R2(0.9), seed = SEED, refresh = REFRESH)
6266
post6 <- update(post5, formula = log(weight) ~ log(diam1) + log(diam2) +
6367
log(canopy_height) + log(total_height) + log(density) + group)
@@ -80,11 +84,13 @@ CONTINUE2 <- tolower(readline(
8084
"Proceed? (y/n)")
8185
))
8286
if (CONTINUE2 != "n") {
83-
YEARS <- as.character(seq(from = 1972, to = 2000, by = 4))
87+
YEARS <- as.character(seq(from = 1972, to = 1980, by = 4))
8488
round(digits = 2, x = sapply(YEARS, FUN = function(YEAR) {
89+
DATA_ENV <- new.env()
8590
source(paste0(ROOT, "ARM/Ch.4/nes", YEAR, ".data.R"), local = DATA_ENV, verbose = FALSE)
91+
pid_dat <- as.data.frame(do.call(cbind, as.list(DATA_ENV)))
8692
coef(stan_lm(partyid7 ~ real_ideo + I(race_adj == 1) + as.factor(age_discrete) +
87-
educ1 + gender + income, data = DATA_ENV, prior = R2(0.5),
93+
educ1 + gender + income, data = pid_dat, prior = R2(0.5),
8894
seed = SEED, refresh = 0))
8995
}))
9096
}

demo/ARM_Ch07.R

+9-5
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,32 @@
22
demo("SETUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE)
33

44
source(paste0(ROOT, "ARM/Ch.7/congress.data.R"), local = DATA_ENV, verbose = FALSE)
5+
cong_dat <- with(DATA_ENV, data.frame(incumbency_88, vote_88, vote_86))
56

67
# The stuff in sections 7.0 -- 7.2 is not very relevant
78

8-
post1 <- stan_lm(vote_88 ~ vote_86 + incumbency_88, data = DATA_ENV,
9+
post1 <- stan_lm(vote_88 ~ vote_86 + incumbency_88, data = cong_dat,
910
prior = R2(0.9, what = "mean"),
1011
seed = SEED, refresh = REFRESH)
1112
post1 # badly underfitting
1213
y_tilde <- posterior_predict(post1) # incumbency_90 is not available
1314
summary(rowSums(y_tilde > 0.5))
1415

15-
source(paste0(ROOT, "ARM/Ch.6/wells.data.R"), local = DATA_ENV, verbose = FALSE)
16-
post2 <- stan_glm(switch ~ I(dist / 100), data = DATA_ENV, family = "binomial",
16+
17+
data(wells, package = "rstanarm")
18+
wells$dist100 <- with(wells, dist / 100)
19+
post2 <- stan_glm(switch ~ dist100, data = wells, family = "binomial", iter = 100, chains = 1,
1720
seed = SEED, refresh = REFRESH)
18-
prop.table(table(c(ppd)))
21+
prop.table(table(c(posterior_predict(post2))))
22+
1923

2024
# the compound model is not good because it assumes the two errors are
2125
# independent. rstanarm will eventually support Heckman models, which
2226
# would be a better choice here.
2327

2428
ANSWER <- tolower(readline("Do you want to remove the objects this demo created? (y/n) "))
2529
if (ANSWER != "n") {
26-
rm(y_tilde, ppd, ANSWER)
30+
rm(y_tilde, wells, ANSWER)
2731
# removes stanreg and loo objects, plus what was created by STARTUP
2832
demo("CLEANUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE)
2933
}

demo/ARM_Ch08.R

+7-6
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
demo("SETUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE)
33

44
source(paste0(ROOT, "ARM/Ch.8/lightspeed.data.R"), local = DATA_ENV, verbose = FALSE)
5-
5+
light_dat <- with(DATA_ENV, data.frame(y))
66
# The stuff in sections 8.0 -- 8.2 is not very relevant
77

8-
(post1 <- stan_glm(y ~ 1, data = DATA_ENV, seed = SEED, refresh = REFRESH))
8+
(post1 <- stan_glm(y ~ 1, data = light_dat, seed = SEED, refresh = REFRESH))
99
y_rep <- posterior_predict(post1)
1010

1111
pp_check(post1, plotfun = "stat", stat = "min") +
@@ -25,20 +25,21 @@ pp_check(post1, plotfun = "hist") + ggtitle(ttl)
2525
# Make similar plot manually but combine all y_rep
2626
op <- par('mfrow')
2727
par(mfrow = 1:2, mar = c(5,4,1,1) + .1)
28-
hist(DATA_ENV$y, prob = TRUE, main = "", las = 1,
28+
hist(light_dat$y, prob = TRUE, main = "", las = 1,
2929
xlab = "Measurement Error for the Speed of Light")
3030
hist(y_rep, prob = TRUE, main = "", las = 1,
3131
xlab = "Predicted Measurement Error")
3232
par(mfrow = op)
3333

34-
source(paste0(ROOT, "ARM/Ch.8/roaches.data.R"), local = DATA_ENV, verbose = FALSE)
35-
post2 <- stan_glm(y ~ roach1 + treatment + senior, data = DATA_ENV,
34+
# Roaches example
35+
data(roaches, package = "rstanarm")
36+
post2 <- stan_glm(y ~ roach1 + treatment + senior, data = roaches,
3637
family = poisson(link = "log"), seed = SEED, refresh = REFRESH)
3738
y_rep <- posterior_predict(post2)
3839

3940
# Compare observed proportion of zeros to predicted proportion of zeros
4041
mean(y_rep == 0)
41-
mean(DATA_ENV$y == 0)
42+
mean(roaches$y == 0)
4243
summary(apply(y_rep == 0, 1, mean))
4344
prop0 <- function(x) mean(x == 0)
4445
pp_check(post2, plotfun = "stat", stat = "prop0") # model doesn't predict enough zeros

demo/ARM_Ch09.R

+10-8
Original file line numberDiff line numberDiff line change
@@ -3,30 +3,32 @@ demo("SETUP", package = "rstanarm", verbose = FALSE, echo = FALSE, ask = FALSE)
33
# read data into DATA_ENV environment
44
source(paste0(ROOT, "ARM/Ch.9/electric_grade4.data.R"), local = DATA_ENV,
55
verbose = FALSE)
6+
dat <- with(DATA_ENV, data.frame(post_test, grade, pre_test, treatment))
67

7-
post1 <- stan_lm(post_test ~ treatment * pre_test, data = DATA_ENV,
8+
post1 <- stan_lm(post_test ~ treatment * pre_test, data = dat,
89
prior = R2(0.75), seed = SEED, refresh = REFRESH)
910
post1 # underfitting but ok because it is an experiment
1011
plot(post1)
1112

12-
y_0 <- posterior_predict(post1, data.frame(treatment = 0, pre_test = DATA_ENV$pre_test))
13-
y_1 <- posterior_predict(post1, data.frame(treatment = 1, pre_test = DATA_ENV$pre_test))
13+
y_0 <- posterior_predict(post1, data.frame(treatment = 0, pre_test = dat$pre_test))
14+
y_1 <- posterior_predict(post1, data.frame(treatment = 1, pre_test = dat$pre_test))
1415
diff <- y_1 - y_0
1516
mean(diff)
1617
sd(diff) # much larger than in ARM
1718
hist(diff, prob = TRUE, main = "", xlab = "Estimated Average Treatment Effect", las = 1)
1819

1920

20-
stopifnot(require(gridExtra))
21+
stopifnot(require(bayesplot))
2122
plots <- sapply(1:4, simplify = FALSE, FUN = function(k) {
22-
source(paste0(ROOT, "ARM/Ch.9/electric_grade", k, "_supp.data.R"),
23-
local = DATA_ENV, verbose = FALSE)
24-
out <- plot(stan_lm(post_test ~ supp + pre_test, data = DATA_ENV,
23+
dat$supp <-
24+
source(paste0(ROOT, "ARM/Ch.9/electric_grade", k, "_supp.data.R"),
25+
verbose = FALSE)$value
26+
out <- plot(stan_lm(post_test ~ supp + pre_test, data = dat,
2527
seed = SEED, refresh = REFRESH,
2628
prior = R2(0.75, what = "mean")))
2729
out + ggtitle(paste("Grade =", k))
2830
})
29-
marrangeGrob(plots, nrow = 2, ncol = 2)
31+
bayesplot_grid(plots = plots, grid_args = list(nrow = 2, ncol = 2))
3032

3133
ANSWER <- tolower(readline("Do you want to remove the objects this demo created? (y/n) "))
3234
if (ANSWER != "n") {

0 commit comments

Comments
 (0)