-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathMakeTestHigherWithinMoreObservations.R
64 lines (55 loc) · 2.68 KB
/
MakeTestHigherWithinMoreObservations.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
MakeTestHigherWithinMoreObservations <- function() {
# Return a test function that verifies that models with higher (mean.within.10.percent) tend
# to have more observations
Test <- function(cv.result) {
# verify that models with higher (mean.within.10.percent) have more observations
# ARG:
# cv.result : the returned value from CrossValidate, which has these fields
# $best.model.index : num scalar
# $all.assessment : data.frame
# RETURN a list satisfying the API for function Cv in compare-models.R
# $hypothesis : char scalar
# $passed : logical scalar, TRUE caller will stop if not TRUE
# $support : arbitrary object that justified value of $passed
#cat('starting Test2\n'); browser()
verbose <- FALSE
# determine statistics for each model across folds
fold.assessment <- cv.result$fold.assessment
MeanWithin10Percent <- function(model.index) {
values <- fold.assessment[model.index == fold.assessment$model.index,
'assessment.within.10.percent']
result <- mean(values)
result
}
MeanNumTrainingSamples <- function(model.index) {
values <- fold.assessment[model.index == fold.assessment$model.index,
'assessment.num.training.samples']
result <- mean(values)
result
}
nModels <- max(fold.assessment$model.index)
mean.within.10.percent <- sapply(1:nModels, MeanWithin10Percent)
mean.num.training.samples <- sapply(1:nModels, MeanNumTrainingSamples)
reduced.data <- data.frame(model.index = 1:nModels,
mean.within.10.percent = mean.within.10.percent,
mean.num.training.samples = mean.num.training.samples)
if (verbose) {
print(reduced.data)
}
# regress mean.num.training.samples ~ mean.within.10.percent
fitted.lm <- lm(formula = mean.num.training.samples ~ 0 + mean.within.10.percent,
data = reduced.data)
if (verbose) {
print(fitted.lm)
}
coefficient <- fitted.lm$coefficients # there is only one coefficient
passed <- coefficient > 0
result = list(hypothesis = 'models with higher mean within 10 percent have more observations',
passed = passed,
support = list(cv.result = cv.result,
reduced.data = reduced.data,
fitted.lm = fitted.lm))
result
}
Test
}