Skip to content

Commit c9bad39

Browse files
committed
add new measures to list of acceptable measures
1 parent b853160 commit c9bad39

File tree

2 files changed

+1440
-1438
lines changed

2 files changed

+1440
-1438
lines changed

R/predictiveness_measure.R

Lines changed: 180 additions & 179 deletions
Original file line numberDiff line numberDiff line change
@@ -1,179 +1,180 @@
1-
#' Construct a Predictiveness Measure
2-
#'
3-
#' @param type the measure of interest (e.g., "accuracy", "auc", "r_squared")
4-
#' @param y the outcome of interest
5-
#' @param a the exposure of interest (only used if \code{type = "average_value"})
6-
#' @param fitted_values fitted values from a regression function using the
7-
#' observed data (may be within a specified fold, for cross-fitted estimates).
8-
#' @param cross_fitting_folds folds for cross-fitting, if used to obtain the
9-
#' fitted values. If not used, a vector of ones.
10-
#' @param full_y the observed outcome (not used, defaults to \code{NULL}).
11-
#' @param nuisance_estimators a list of nuisance function estimators on the
12-
#' observed data (may be within a specified fold, for cross-fitted estimates).
13-
#' For the average value measure: an estimator of the optimal treatment rule (\code{f_n}); an estimator of the
14-
#' propensity score under the estimated optimal treatment rule (\code{g_n}); and an estimator
15-
#' of the outcome regression when treatment is assigned according to the estimated optimal rule (\code{q_n}).
16-
#' @param C the indicator of coarsening (1 denotes observed, 0 denotes
17-
#' unobserved).
18-
#' @param Z either \code{NULL} (if no coarsening) or a matrix-like object
19-
#' containing the fully observed data.
20-
#' @param folds_Z either the cross-validation folds for the observed data
21-
#' (no coarsening) or a vector of folds for the fully observed data Z.
22-
#' @param ipc_weights weights for inverse probability of coarsening (IPC)
23-
#' (e.g., inverse weights from a two-phase sample) weighted estimation.
24-
#' Assumed to be already inverted.
25-
#' (i.e., ipc_weights = 1 / [estimated probability weights]).
26-
#' @param ipc_fit_type if "external", then use \code{ipc_eif_preds}; if "SL",
27-
#' fit a SuperLearner to determine the IPC correction to the efficient
28-
#' influence function.
29-
#' @param ipc_eif_preds if \code{ipc_fit_type = "external"}, the fitted values
30-
#' from a regression of the full-data EIF on the fully observed
31-
#' covariates/outcome; otherwise, not used.
32-
#' @param ipc_est_type IPC correction, either \code{"ipw"} (for classical
33-
#' inverse probability weighting) or \code{"aipw"} (for augmented inverse
34-
#' probability weighting; the default).
35-
#' @param scale if doing an IPC correction, then the scale that the correction
36-
#' should be computed on (e.g., "identity"; or "logit" to logit-transform,
37-
#' apply the correction, and back-transform).
38-
#' @param na.rm logical; should \code{NA}s be removed in computation?
39-
#' (defaults to \code{FALSE})
40-
#' @param ... other arguments to SuperLearner, if \code{ipc_fit_type = "SL"}.
41-
#'
42-
#' @return An object of class \code{"predictiveness_measure"}, with the following
43-
#' attributes:
44-
#' @export
45-
predictiveness_measure <- function(type = character(),
46-
y = numeric(),
47-
a = numeric(),
48-
fitted_values = numeric(),
49-
cross_fitting_folds = rep(1, length(fitted_values)),
50-
full_y = NULL,
51-
nuisance_estimators = list(),
52-
C = rep(1, length(y)),
53-
Z = NULL,
54-
folds_Z = cross_fitting_folds,
55-
ipc_weights = rep(1, length(y)),
56-
ipc_fit_type = "SL",
57-
ipc_eif_preds = numeric(),
58-
ipc_est_type = "aipw",
59-
scale = "identity",
60-
na.rm = TRUE,
61-
...) {
62-
validate_predictiveness_measure(new_predictiveness_measure(
63-
type = type, y = y, a = a, fitted_values = fitted_values, cross_fitting_folds = cross_fitting_folds,
64-
full_y = full_y, nuisance_estimators = nuisance_estimators, C = C, Z = Z, folds_Z = folds_Z,
65-
ipc_weights = ipc_weights, ipc_fit_type = ipc_fit_type, ipc_eif_preds = ipc_eif_preds,
66-
ipc_est_type = ipc_est_type, scale = scale, na.rm = na.rm, ...
67-
))
68-
}
69-
70-
71-
new_predictiveness_measure <- function(type = character(),
72-
y = numeric(),
73-
a = numeric(),
74-
fitted_values = numeric(),
75-
cross_fitting_folds = numeric(),
76-
full_y = NULL,
77-
nuisance_estimators = list(),
78-
C = numeric(),
79-
Z = NULL,
80-
folds_Z = NULL,
81-
ipc_weights = numeric(),
82-
ipc_fit_type = character(),
83-
ipc_eif_preds = numeric(),
84-
ipc_est_type = character(),
85-
scale = character(),
86-
na.rm = logical(),
87-
...) {
88-
stopifnot(type %in% c("accuracy", "anova", "auc", "average_value",
89-
"cross_entropy", "deviance", "mse", "r_squared"))
90-
stopifnot(is.numeric(y))
91-
stopifnot(is.numeric(a))
92-
stopifnot(is.numeric(fitted_values))
93-
stopifnot(is.numeric(cross_fitting_folds))
94-
stopifnot(is.numeric(C))
95-
stopifnot(is.numeric(ipc_weights))
96-
stopifnot(is.character(ipc_fit_type))
97-
stopifnot(is.numeric(ipc_eif_preds))
98-
stopifnot(is.character(ipc_est_type))
99-
stopifnot(scale %in% c("identity", "log", "logit"))
100-
stopifnot(is.logical(na.rm))
101-
102-
arg_lst <- list(...)
103-
if (length(ipc_weights) == 0) {
104-
ipc_weights <- rep(1, length(y))
105-
}
106-
if (length(C) == 0) {
107-
C <- rep(1, length(y))
108-
folds_Z <- cross_fitting_folds
109-
}
110-
structure(
111-
c(list(y = y, a = a, fitted_values = fitted_values, cross_fitting_folds = cross_fitting_folds,
112-
K = length(unique(cross_fitting_folds)), full_y = full_y,
113-
nuisance_estimators = nuisance_estimators, point_est = NA, eif = rep(NA, length(y)),
114-
C = C, Z = Z, folds_Z = folds_Z, ipc_weights = ipc_weights, ipc_eif_preds = ipc_eif_preds),
115-
arg_lst),
116-
type = type, ipc_fit_type = ipc_fit_type, ipc_est_type = ipc_est_type,
117-
scale = scale, na.rm = na.rm,
118-
class = "predictiveness_measure"
119-
)
120-
}
121-
122-
validate_predictiveness_measure <- function(x) {
123-
input_data <- unclass(x)
124-
type <- attr(x, "type")
125-
ipc_fit_type <- attr(x, "ipc_fit_type")
126-
ipc_est_type <- attr(x, "ipc_est_type")
127-
scale <- attr(x, "scale")
128-
na.rm <- attr(x, "na.rm")
129-
130-
if (!any(grepl("average_value", type))) {
131-
if (length(input_data$y) != length(input_data$fitted_values)) {
132-
stop("The outcome data must have the same dimension as the fitted values",
133-
call. = FALSE)
134-
}
135-
} else {
136-
if (length(input_data$nuisance_estimators) == 0) {
137-
stop(paste0(
138-
"To estimate the average value, the following must be estimated:",
139-
" the optimal treatment rule (pass this in as named element f_n of the list);",
140-
" the propensity score under the optimal treatment rule (pass this in as named element g_n of the list);",
141-
" and the outcome regression when treatment is assigned according to the optimal rule (pass this in as named element q_n of the list)."
142-
), call. = FALSE)
143-
} else {
144-
if (length(input_data$nuisance_estimators$f_n) != length(input_data$y)) {
145-
stop("The optimal treatment assignment must have the same dimension as the outcome.", call. = FALSE)
146-
}
147-
if (length(input_data$nuisance_estimators$g_n) != length(input_data$y)) {
148-
stop("The estimated propensity score must have the same dimension as the outcome.", call. = FALSE)
149-
}
150-
if (length(input_data$nuisance_estimators$q_n) != length(input_data$y)) {
151-
stop("The estimated outcome regression must have the same dimension as the outcome.", call. = FALSE)
152-
}
153-
}
154-
}
155-
if (length(input_data$cross_fitting_folds) != length(input_data$fitted_values)) {
156-
stop("If cross-fitting is desired, each observation must be put into a fold.")
157-
}
158-
if (length(input_data$a) != 0) {
159-
if (length(input_data$y) != length(input_data$a)) {
160-
stop("The outcome data must have the same dimension as the exposure data",
161-
call. = FALSE)
162-
}
163-
}
164-
if (length(input_data$ipc_weights) != length(input_data$C)) {
165-
stop("The full dataset must have the same dimension as the inverse probability weights",
166-
call. = FALSE)
167-
}
168-
if (!is.null(input_data$Z)) {
169-
if (nrow(input_data$Z) != length(input_data$C)) {
170-
stop("The data that are always measured (i.e., are not coarsened) must be the same dimension as the coarsening variable",
171-
call. = FALSE)
172-
}
173-
}
174-
x
175-
}
176-
177-
is.predictiveness_measure <- function(x) {
178-
inherits(x, "predictiveness_measure")
179-
}
1+
#' Construct a Predictiveness Measure
2+
#'
3+
#' @param type the measure of interest (e.g., "accuracy", "auc", "r_squared")
4+
#' @param y the outcome of interest
5+
#' @param a the exposure of interest (only used if \code{type = "average_value"})
6+
#' @param fitted_values fitted values from a regression function using the
7+
#' observed data (may be within a specified fold, for cross-fitted estimates).
8+
#' @param cross_fitting_folds folds for cross-fitting, if used to obtain the
9+
#' fitted values. If not used, a vector of ones.
10+
#' @param full_y the observed outcome (not used, defaults to \code{NULL}).
11+
#' @param nuisance_estimators a list of nuisance function estimators on the
12+
#' observed data (may be within a specified fold, for cross-fitted estimates).
13+
#' For the average value measure: an estimator of the optimal treatment rule (\code{f_n}); an estimator of the
14+
#' propensity score under the estimated optimal treatment rule (\code{g_n}); and an estimator
15+
#' of the outcome regression when treatment is assigned according to the estimated optimal rule (\code{q_n}).
16+
#' @param C the indicator of coarsening (1 denotes observed, 0 denotes
17+
#' unobserved).
18+
#' @param Z either \code{NULL} (if no coarsening) or a matrix-like object
19+
#' containing the fully observed data.
20+
#' @param folds_Z either the cross-validation folds for the observed data
21+
#' (no coarsening) or a vector of folds for the fully observed data Z.
22+
#' @param ipc_weights weights for inverse probability of coarsening (IPC)
23+
#' (e.g., inverse weights from a two-phase sample) weighted estimation.
24+
#' Assumed to be already inverted.
25+
#' (i.e., ipc_weights = 1 / [estimated probability weights]).
26+
#' @param ipc_fit_type if "external", then use \code{ipc_eif_preds}; if "SL",
27+
#' fit a SuperLearner to determine the IPC correction to the efficient
28+
#' influence function.
29+
#' @param ipc_eif_preds if \code{ipc_fit_type = "external"}, the fitted values
30+
#' from a regression of the full-data EIF on the fully observed
31+
#' covariates/outcome; otherwise, not used.
32+
#' @param ipc_est_type IPC correction, either \code{"ipw"} (for classical
33+
#' inverse probability weighting) or \code{"aipw"} (for augmented inverse
34+
#' probability weighting; the default).
35+
#' @param scale if doing an IPC correction, then the scale that the correction
36+
#' should be computed on (e.g., "identity"; or "logit" to logit-transform,
37+
#' apply the correction, and back-transform).
38+
#' @param na.rm logical; should \code{NA}s be removed in computation?
39+
#' (defaults to \code{FALSE})
40+
#' @param ... other arguments to SuperLearner, if \code{ipc_fit_type = "SL"}.
41+
#'
42+
#' @return An object of class \code{"predictiveness_measure"}, with the following
43+
#' attributes:
44+
#' @export
45+
predictiveness_measure <- function(type = character(),
46+
y = numeric(),
47+
a = numeric(),
48+
fitted_values = numeric(),
49+
cross_fitting_folds = rep(1, length(fitted_values)),
50+
full_y = NULL,
51+
nuisance_estimators = list(),
52+
C = rep(1, length(y)),
53+
Z = NULL,
54+
folds_Z = cross_fitting_folds,
55+
ipc_weights = rep(1, length(y)),
56+
ipc_fit_type = "SL",
57+
ipc_eif_preds = numeric(),
58+
ipc_est_type = "aipw",
59+
scale = "identity",
60+
na.rm = TRUE,
61+
...) {
62+
validate_predictiveness_measure(new_predictiveness_measure(
63+
type = type, y = y, a = a, fitted_values = fitted_values, cross_fitting_folds = cross_fitting_folds,
64+
full_y = full_y, nuisance_estimators = nuisance_estimators, C = C, Z = Z, folds_Z = folds_Z,
65+
ipc_weights = ipc_weights, ipc_fit_type = ipc_fit_type, ipc_eif_preds = ipc_eif_preds,
66+
ipc_est_type = ipc_est_type, scale = scale, na.rm = na.rm, ...
67+
))
68+
}
69+
70+
71+
new_predictiveness_measure <- function(type = character(),
72+
y = numeric(),
73+
a = numeric(),
74+
fitted_values = numeric(),
75+
cross_fitting_folds = numeric(),
76+
full_y = NULL,
77+
nuisance_estimators = list(),
78+
C = numeric(),
79+
Z = NULL,
80+
folds_Z = NULL,
81+
ipc_weights = numeric(),
82+
ipc_fit_type = character(),
83+
ipc_eif_preds = numeric(),
84+
ipc_est_type = character(),
85+
scale = character(),
86+
na.rm = logical(),
87+
...) {
88+
stopifnot(type %in% c("accuracy", "anova", "auc", "average_value",
89+
"cross_entropy", "deviance", "mse", "r_squared",
90+
"sensitivity", "specificity", "ppv", "npv"))
91+
stopifnot(is.numeric(y))
92+
stopifnot(is.numeric(a))
93+
stopifnot(is.numeric(fitted_values))
94+
stopifnot(is.numeric(cross_fitting_folds))
95+
stopifnot(is.numeric(C))
96+
stopifnot(is.numeric(ipc_weights))
97+
stopifnot(is.character(ipc_fit_type))
98+
stopifnot(is.numeric(ipc_eif_preds))
99+
stopifnot(is.character(ipc_est_type))
100+
stopifnot(scale %in% c("identity", "log", "logit"))
101+
stopifnot(is.logical(na.rm))
102+
103+
arg_lst <- list(...)
104+
if (length(ipc_weights) == 0) {
105+
ipc_weights <- rep(1, length(y))
106+
}
107+
if (length(C) == 0) {
108+
C <- rep(1, length(y))
109+
folds_Z <- cross_fitting_folds
110+
}
111+
structure(
112+
c(list(y = y, a = a, fitted_values = fitted_values, cross_fitting_folds = cross_fitting_folds,
113+
K = length(unique(cross_fitting_folds)), full_y = full_y,
114+
nuisance_estimators = nuisance_estimators, point_est = NA, eif = rep(NA, length(y)),
115+
C = C, Z = Z, folds_Z = folds_Z, ipc_weights = ipc_weights, ipc_eif_preds = ipc_eif_preds),
116+
arg_lst),
117+
type = type, ipc_fit_type = ipc_fit_type, ipc_est_type = ipc_est_type,
118+
scale = scale, na.rm = na.rm,
119+
class = "predictiveness_measure"
120+
)
121+
}
122+
123+
validate_predictiveness_measure <- function(x) {
124+
input_data <- unclass(x)
125+
type <- attr(x, "type")
126+
ipc_fit_type <- attr(x, "ipc_fit_type")
127+
ipc_est_type <- attr(x, "ipc_est_type")
128+
scale <- attr(x, "scale")
129+
na.rm <- attr(x, "na.rm")
130+
131+
if (!any(grepl("average_value", type))) {
132+
if (length(input_data$y) != length(input_data$fitted_values)) {
133+
stop("The outcome data must have the same dimension as the fitted values",
134+
call. = FALSE)
135+
}
136+
} else {
137+
if (length(input_data$nuisance_estimators) == 0) {
138+
stop(paste0(
139+
"To estimate the average value, the following must be estimated:",
140+
" the optimal treatment rule (pass this in as named element f_n of the list);",
141+
" the propensity score under the optimal treatment rule (pass this in as named element g_n of the list);",
142+
" and the outcome regression when treatment is assigned according to the optimal rule (pass this in as named element q_n of the list)."
143+
), call. = FALSE)
144+
} else {
145+
if (length(input_data$nuisance_estimators$f_n) != length(input_data$y)) {
146+
stop("The optimal treatment assignment must have the same dimension as the outcome.", call. = FALSE)
147+
}
148+
if (length(input_data$nuisance_estimators$g_n) != length(input_data$y)) {
149+
stop("The estimated propensity score must have the same dimension as the outcome.", call. = FALSE)
150+
}
151+
if (length(input_data$nuisance_estimators$q_n) != length(input_data$y)) {
152+
stop("The estimated outcome regression must have the same dimension as the outcome.", call. = FALSE)
153+
}
154+
}
155+
}
156+
if (length(input_data$cross_fitting_folds) != length(input_data$fitted_values)) {
157+
stop("If cross-fitting is desired, each observation must be put into a fold.")
158+
}
159+
if (length(input_data$a) != 0) {
160+
if (length(input_data$y) != length(input_data$a)) {
161+
stop("The outcome data must have the same dimension as the exposure data",
162+
call. = FALSE)
163+
}
164+
}
165+
if (length(input_data$ipc_weights) != length(input_data$C)) {
166+
stop("The full dataset must have the same dimension as the inverse probability weights",
167+
call. = FALSE)
168+
}
169+
if (!is.null(input_data$Z)) {
170+
if (nrow(input_data$Z) != length(input_data$C)) {
171+
stop("The data that are always measured (i.e., are not coarsened) must be the same dimension as the coarsening variable",
172+
call. = FALSE)
173+
}
174+
}
175+
x
176+
}
177+
178+
is.predictiveness_measure <- function(x) {
179+
inherits(x, "predictiveness_measure")
180+
}

0 commit comments

Comments
 (0)