@@ -89,17 +89,17 @@ FitLasso <- function(X, Y, intercept = TRUE) {
89
89
# Input:
90
90
# X: a design matrix of size km by M (the number of candidate strings).
91
91
# Y: a vector of size km with estimated counts from EstimateBloomCounts(),
92
- # representing constraints
92
+ # representing constraints
93
93
# intercept: whether to fit with intercept or not.
94
94
#
95
95
# Output:
96
96
# a vector of size ncol(X) of coefficients.
97
97
98
98
# TODO(mironov): Test cv.glmnet instead of glmnet
99
99
100
- # Cap the number of non-zero coefficients to 500 or 80% of the number of
101
- # constraints, whichever is less. The 500 cap is for performance reasons, 80%
102
- # is to avoid overfitting.
100
+ # Cap the number of non-zero coefficients to 500 or 80% of the number of
101
+ # constraints, whichever is less. The 500 cap is for performance reasons, 80%
102
+ # is to avoid overfitting.
103
103
cap <- min(500 , nrow(X ) * .8 , ncol(X ))
104
104
105
105
mod <- glmnet(X , Y , standardize = FALSE , intercept = intercept ,
@@ -110,10 +110,10 @@ FitLasso <- function(X, Y, intercept = TRUE) {
110
110
coefs <- coefs [- 1 , , drop = FALSE ] # drop the intercept
111
111
l1cap <- sum(colSums(coefs ) < = 1.0 ) # find all columns with L1 norm <= 1
112
112
if (l1cap > 0 )
113
- coefs <- coefs [, l1cap ] # return the last set of coefficients with L1 <= 1
113
+ distr <- coefs [, l1cap ] # return the last set of coefficients with L1 <= 1
114
114
else
115
- coefs <- setNames(rep(0 , ncol(X )), colnames(X ))
116
- coefs
115
+ distr <- setNames(rep(0 , ncol(X )), colnames(X ))
116
+ distr
117
117
}
118
118
119
119
PerformInference <- function (X , Y , N , mod , params , alpha , correction ) {
@@ -230,17 +230,10 @@ FitDistribution <- function(estimates_stds, map, quiet = FALSE) {
230
230
if (! quiet )
231
231
cat(" LASSO selected " , sum(lasso > 0 ), " non-zero coefficients.\n " )
232
232
233
- coefs <- setNames( lasso , colnames(map ) )
233
+ names( lasso ) <- colnames(map )
234
234
235
- # if(length(support_coefs) > 0) { # LASSO may return an empty list
236
- # constrained_coefs <- ConstrainedLinModel(map[, support_coefs, drop = FALSE],
237
- # estimates_stds)
238
- #
239
- # coefs[support_coefs] <- constrained_coefs
240
- # }
241
-
242
- coefs
243
- }
235
+ lasso
236
+ }
244
237
245
238
Resample <- function (e ) {
246
239
# Simulate resampling of the Bloom filter estimates by adding Gaussian noise
@@ -282,7 +275,7 @@ Decode <- function(counts, map, params, alpha = 0.05,
282
275
283
276
# Run the fitting procedure several times (5 seems to be sufficient and not
284
277
# too many) to estimate standard deviation of the output.
285
- for (r in 1 : 5 ) {
278
+ for (r in 1 : 10 ) {
286
279
if (r > 1 )
287
280
e <- Resample(estimates_stds_filtered )
288
281
else
0 commit comments