@@ -270,72 +270,84 @@ test_that("error for unrecognized container", {
270
270
}, " unrecognized container" )
271
271
})
272
272
273
+ container.values <- c(" priority_queue" , " multiset" , " list" )
273
274
test_that(" variance estimates and loss correct" , {
274
275
x <- c(0 ,0.1 , 1 ,1.2 )
275
- fit <- binsegRcpp :: binseg(" meanvar_norm" , x , max.segments = 2L )
276
- expect_equal(fit $ splits $ before.mean , c(mean(x ), mean(x [1 : 2 ])))
277
- expect_equal(fit $ splits $ after.mean , c(NA , mean(x [3 : 4 ])))
278
- expect_equal(fit $ splits $ before.var , c(myvar(x ), myvar(x [1 : 2 ])))
279
- expect_equal(fit $ splits $ after.var , c(NA , myvar(x [3 : 4 ])))
280
- nll <- function (y )- sum(dnorm(y , mean(y ), sqrt(myvar(y )), log = TRUE ))
281
- expect_equal(fit $ splits $ loss , c(nll(x ), nll(x [1 : 2 ])+ nll(x [3 : 4 ])))
282
- seg.dt <- coef(fit , 2L )
283
- expect_equal(seg.dt $ end , c(2 ,4 ))
284
- expect_equal(seg.dt $ mean , c(mean(x [1 : 2 ]),mean(x [3 : 4 ])))
285
- expect_equal(seg.dt $ var , c(myvar(x [1 : 2 ]),myvar(x [3 : 4 ])))
276
+ for (container in container.values ){
277
+ fit <- binsegRcpp :: binseg(" meanvar_norm" , x , max.segments = 2L , container.str = container )
278
+ expect_equal(fit $ splits $ before.mean , c(mean(x ), mean(x [1 : 2 ])))
279
+ expect_equal(fit $ splits $ after.mean , c(NA , mean(x [3 : 4 ])))
280
+ expect_equal(fit $ splits $ before.var , c(myvar(x ), myvar(x [1 : 2 ])))
281
+ expect_equal(fit $ splits $ after.var , c(NA , myvar(x [3 : 4 ])))
282
+ nll <- function (y )- sum(dnorm(y , mean(y ), sqrt(myvar(y )), log = TRUE ))
283
+ expect_equal(fit $ splits $ loss , c(nll(x ), nll(x [1 : 2 ])+ nll(x [3 : 4 ])))
284
+ seg.dt <- coef(fit , 2L )
285
+ expect_equal(seg.dt $ end , c(2 ,4 ))
286
+ expect_equal(seg.dt $ mean , c(mean(x [1 : 2 ]),mean(x [3 : 4 ])))
287
+ expect_equal(seg.dt $ var , c(myvar(x [1 : 2 ]),myvar(x [3 : 4 ])))
288
+ }
286
289
})
287
290
288
291
test_that(" meanvar_norm does not have segs with size 1" , {
289
292
data.per.seg <- 10
290
293
sim <- function (mu ,sigma )rnorm(data.per.seg ,mu ,sigma )
291
294
set.seed(1 )
292
295
data.vec <- c(sim(10 ,1 ), sim(0 , 5 ))
293
- fit <- binsegRcpp :: binseg(" meanvar_norm" , data.vec )
294
- expect_lte(nrow(fit $ splits ), data.per.seg )
296
+ for (container in container.values ){
297
+ fit <- binsegRcpp :: binseg(" meanvar_norm" , data.vec , container.str = container )
298
+ expect_lte(nrow(fit $ splits ), data.per.seg )
299
+ }
295
300
})
296
301
297
302
test_that(" l1loss param is median" , {
298
303
data.vec <- c(1.3 , 1.0 , 1.1 , 2.0 , 2.1 , 3.1 )
299
- l1fit <- binsegRcpp :: binseg(" l1" , data.vec , max.segments = 2L )
300
- seg.dt <- coef(l1fit )
301
- expected.median <- c(
302
- median(data.vec ),
303
- median(data.vec [1 : 3 ]),
304
- median(data.vec [4 : 6 ]))
305
- expect_equal(seg.dt $ median , expected.median )
306
- expected.loss <- sum(abs(median(data.vec )- data.vec ))
307
- expect_equal(l1fit $ splits $ loss [1 ], expected.loss )
304
+ for (container in container.values ){
305
+ l1fit <- binsegRcpp :: binseg(" l1" , data.vec , max.segments = 2L , container.str = container )
306
+ seg.dt <- coef(l1fit )
307
+ expected.median <- c(
308
+ median(data.vec ),
309
+ median(data.vec [1 : 3 ]),
310
+ median(data.vec [4 : 6 ]))
311
+ expect_equal(seg.dt $ median , expected.median )
312
+ expected.loss <- sum(abs(median(data.vec )- data.vec ))
313
+ expect_equal(l1fit $ splits $ loss [1 ], expected.loss )
314
+ }
308
315
})
309
316
310
317
test_that(" laplace params median,scale" , {
311
318
data.vec <- c(1.3 , 1.0 , 1.1 , 2.0 , 2.1 , 3.1 )
312
- l1fit <- binsegRcpp :: binseg(" laplace" , data.vec , max.segments = 2L )
313
- seg.dt <- coef(l1fit )
314
- expected.median <- c(
315
- median(data.vec ),
316
- median(data.vec [1 : 3 ]),
317
- median(data.vec [4 : 6 ]))
318
- expect_equal(seg.dt $ median , expected.median )
319
- sum.abs.dev <- sum(abs(median(data.vec )- data.vec ))
320
- N.data <- length(data.vec )
321
- est.scale <- sum.abs.dev / N.data
322
- expect_equal(l1fit $ splits $ before.scale [1 ], est.scale )
323
- expected.loss <- N.data * log(2 * est.scale )+ sum.abs.dev / est.scale
324
- expect_equal(l1fit $ splits $ loss [1 ], expected.loss )
319
+ for (container in container.values ){
320
+ l1fit <- binsegRcpp :: binseg(" laplace" , data.vec , max.segments = 2L , container.str = container )
321
+ seg.dt <- coef(l1fit )
322
+ expected.median <- c(
323
+ median(data.vec ),
324
+ median(data.vec [1 : 3 ]),
325
+ median(data.vec [4 : 6 ]))
326
+ expect_equal(seg.dt $ median , expected.median )
327
+ sum.abs.dev <- sum(abs(median(data.vec )- data.vec ))
328
+ N.data <- length(data.vec )
329
+ est.scale <- sum.abs.dev / N.data
330
+ expect_equal(l1fit $ splits $ before.scale [1 ], est.scale )
331
+ expected.loss <- N.data * log(2 * est.scale )+ sum.abs.dev / est.scale
332
+ expect_equal(l1fit $ splits $ loss [1 ], expected.loss )
333
+ }
325
334
})
326
335
327
336
test_that(" laplace validation loss ok" , {
328
337
data.vec <- c(1.3 , 1.0 , 1.1 , 2.0 , 2.1 , 3.1 )
329
338
is.validation.vec <- c(FALSE ,FALSE ,TRUE ,TRUE ,FALSE ,FALSE )
330
- l1fit <- binsegRcpp :: binseg(
331
- " laplace" , data.vec , max.segments = 2L ,
332
- is.validation.vec = is.validation.vec )
333
- subtrain.vec <- data.vec [! is.validation.vec ]
334
- est.median <- median(subtrain.vec )
335
- est.scale <- sum(abs(est.median - subtrain.vec ))/ length(subtrain.vec )
336
- sum.abs.dev <- sum(abs(est.median - data.vec [is.validation.vec ]))
337
- vloss1 <- sum(is.validation.vec )* log(2 * est.scale )+ sum.abs.dev / est.scale
338
- expect_equal(l1fit $ splits $ validation.loss [1 ], vloss1 )
339
+ for (container in container.values ){
340
+ l1fit <- binsegRcpp :: binseg(
341
+ " laplace" , data.vec , max.segments = 2L ,
342
+ container.str = container ,
343
+ is.validation.vec = is.validation.vec )
344
+ subtrain.vec <- data.vec [! is.validation.vec ]
345
+ est.median <- median(subtrain.vec )
346
+ est.scale <- sum(abs(est.median - subtrain.vec ))/ length(subtrain.vec )
347
+ sum.abs.dev <- sum(abs(est.median - data.vec [is.validation.vec ]))
348
+ vloss1 <- sum(is.validation.vec )* log(2 * est.scale )+ sum.abs.dev / est.scale
349
+ expect_equal(l1fit $ splits $ validation.loss [1 ], vloss1 )
350
+ }
339
351
})
340
352
341
353
test_that(" laplace correct split for int data" , {
@@ -346,40 +358,48 @@ test_that("laplace correct split for int data", {
346
358
b = mean(d )
347
359
sum(d / b + log(2 * b ))
348
360
}
349
- fit <- binseg(" laplace" , 1 : 8 )
350
- splits <- fit [[" splits" ]]
351
- computed.loss <- splits [[" loss" ]]
352
- expected.loss <- c(
353
- laplace(8 ),
354
- laplace(3 )+ laplace(5 ),
355
- laplace(3 )+ laplace(3 )+ laplace(2 ))
356
- expect_equal(computed.loss , expected.loss )
361
+ for (container in container.values ){
362
+ fit <- binseg(" laplace" , 1 : 8 , container.str = container )
363
+ splits <- fit [[" splits" ]]
364
+ computed.loss <- splits [[" loss" ]]
365
+ expected.loss <- c(
366
+ laplace(8 ),
367
+ laplace(3 )+ laplace(5 ),
368
+ laplace(3 )+ laplace(3 )+ laplace(2 ))
369
+ expect_equal(computed.loss , expected.loss )
370
+ }
357
371
})
358
372
359
373
test_that(" meanvar_norm validation loss ok" , {
360
374
data.vec <- c(1.3 , 1.0 , 1.1 , 2.0 , 2.1 , 3.1 )
361
375
is.validation.vec <- c(FALSE ,FALSE ,TRUE ,TRUE ,FALSE ,FALSE )
362
- l1fit <- binsegRcpp :: binseg(
363
- " meanvar_norm" , data.vec , max.segments = 2L ,
364
- is.validation.vec = is.validation.vec )
365
- subtrain.vec <- data.vec [! is.validation.vec ]
366
- vloss1 <- - sum(dnorm(
367
- data.vec [is.validation.vec ],
368
- mean(subtrain.vec ),
369
- sqrt(myvar(subtrain.vec )),
370
- log = TRUE ))
371
- expect_equal(l1fit $ splits $ validation.loss [1 ], vloss1 )
376
+ for (container in container.values ){
377
+ l1fit <- binsegRcpp :: binseg(
378
+ " meanvar_norm" , data.vec , max.segments = 2L ,
379
+ container.str = container ,
380
+ is.validation.vec = is.validation.vec )
381
+ subtrain.vec <- data.vec [! is.validation.vec ]
382
+ vloss1 <- - sum(dnorm(
383
+ data.vec [is.validation.vec ],
384
+ mean(subtrain.vec ),
385
+ sqrt(myvar(subtrain.vec )),
386
+ log = TRUE ))
387
+ expect_equal(l1fit $ splits $ validation.loss [1 ], vloss1 )
388
+ }
372
389
})
373
390
374
391
test_that(" l1 validation loss ok" , {
375
392
data.vec <- c(1.3 , 1.0 , 1.1 , 2.0 , 2.1 , 3.1 )
376
393
is.validation.vec <- c(FALSE ,FALSE ,TRUE ,TRUE ,FALSE ,FALSE )
377
- l1fit <- binsegRcpp :: binseg(
378
- " l1" , data.vec , max.segments = 2L ,
379
- is.validation.vec = is.validation.vec )
380
- vloss1 <- sum(abs(
381
- data.vec [is.validation.vec ]- median(data.vec [! is.validation.vec ])))
382
- expect_equal(l1fit $ splits $ validation.loss [1 ], vloss1 )
394
+ for (container in container.values ){
395
+ l1fit <- binsegRcpp :: binseg(
396
+ " l1" , data.vec , max.segments = 2L ,
397
+ container.str = container ,
398
+ is.validation.vec = is.validation.vec )
399
+ vloss1 <- sum(abs(
400
+ data.vec [is.validation.vec ]- median(data.vec [! is.validation.vec ])))
401
+ expect_equal(l1fit $ splits $ validation.loss [1 ], vloss1 )
402
+ }
383
403
})
384
404
385
405
test_that(" poisson loss ok for simple ex with zero" , {
@@ -389,11 +409,13 @@ test_that("poisson loss ok for simple ex with zero", {
389
409
expected.loss <- c(
390
410
N.data * mu - log(mu )* sum(data.vec ),
391
411
1 )
392
- fit <- binsegRcpp :: binseg(" poisson" , data.vec )
393
- expect_equal(fit $ splits $ end , 2 : 1 )
394
- expect_equal(fit $ splits $ loss , expected.loss )
395
- segs <- coef(fit )
396
- expect_equal(segs $ mean , c(mu , data.vec ))
412
+ for (container in container.values ){
413
+ fit <- binsegRcpp :: binseg(" poisson" , data.vec , container.str = container )
414
+ expect_equal(fit $ splits $ end , 2 : 1 )
415
+ expect_equal(fit $ splits $ loss , expected.loss )
416
+ segs <- coef(fit )
417
+ expect_equal(segs $ mean , c(mu , data.vec ))
418
+ }
397
419
})
398
420
399
421
test_that(" error for poisson loss with bad data" , {
@@ -408,22 +430,33 @@ test_that("error for poisson loss with bad data", {
408
430
test_that(" get_complexity respects min.segment.length" , {
409
431
n.data <- 8
410
432
zero.one <- rep(0 : 1 , l = n.data )
411
- fit <- binsegRcpp :: binseg(" mean_norm" , zero.one )
412
- clist <- binsegRcpp :: get_complexity(fit )
413
- worst <- clist $ iterations [case == " worst" ]
414
- expect_equal(worst $ splits , seq(n.data - 1 , 0 ))
415
- zero.ten <- rep(c(0 ,1 ,10 ,11 ), l = n.data )
416
- mvfit <- binsegRcpp :: binseg(" meanvar_norm" , zero.ten )
417
- mvlist <- binsegRcpp :: get_complexity(mvfit )
418
- mvworst <- mvlist $ iterations [case == " worst" ]
419
- expect_equal(mvworst $ splits , c(5 ,3 ,1 ,0 ))
433
+ for (container in container.values ){
434
+ fit <- binsegRcpp :: binseg(" mean_norm" , zero.one , container.str = container )
435
+ clist <- binsegRcpp :: get_complexity(fit )
436
+ worst <- clist $ iterations [case == " worst" ]
437
+ expect_equal(worst $ splits , seq(n.data - 1 , 0 ))
438
+ zero.ten <- rep(c(0 ,1 ,10 ,11 ), l = n.data )
439
+ mvfit <- binsegRcpp :: binseg(" meanvar_norm" , zero.ten )
440
+ mvlist <- binsegRcpp :: get_complexity(mvfit )
441
+ mvworst <- mvlist $ iterations [case == " worst" ]
442
+ expect_equal(mvworst $ splits , c(5 ,3 ,1 ,0 ))
443
+ }
420
444
})
421
445
422
446
test_that(" empirical splits not negative" , {
423
- fit <- binsegRcpp :: binseg(" meanvar_norm" , 1 : 8 )
424
- clist <- binsegRcpp :: get_complexity(fit )
425
- esplits <- clist $ iterations [case == " empirical" , splits ]
426
- expect_equal(esplits , c(5 ,2 ,0 ,0 ))
447
+ for (container in container.values ){
448
+ fit <- binsegRcpp :: binseg(" meanvar_norm" , 1 : 8 , container.str = container )
449
+ clist <- binsegRcpp :: get_complexity(fit )
450
+ esplits <- clist $ iterations [case == " empirical" , splits ]
451
+ expect_equal(esplits , c(5 ,2 ,0 ,0 ))
452
+ }
453
+ })
454
+
455
+ test_that(" first three splits result in four segments, two data each" , {
456
+ for (container in container.values ){
457
+ fit <- binsegRcpp :: binseg(" mean_norm" , 1 : 8 , max.segments = 4 , container.str = container )
458
+ expect_equal(sort(fit $ splits $ end ), c(2 ,4 ,6 ,8 ))
459
+ }
427
460
})
428
461
429
462
test_that(" l1 loss chooses even split if equal loss" , {
@@ -437,8 +470,10 @@ test_that("l1 loss chooses even split if equal loss", {
437
470
sum(c(seg(1 ,end ),seg(end + 1 ,N.max )))
438
471
}
439
472
sapply(seq(1 ,N.max - 1 ), two.segs )
440
- fit <- binsegRcpp :: binseg(" l1" , data.vec , max.segments = 2L )
441
- expect_equal(fit $ splits $ end , c(8 ,4 ))
473
+ for (container in container.values ){
474
+ fit <- binsegRcpp :: binseg(" l1" , data.vec , max.segments = 2L , container.str = container )
475
+ expect_equal(fit $ splits $ end , c(8 ,4 ))
476
+ }
442
477
})
443
478
444
479
test_that(" l1 loss chooses even splits after storage" , {
@@ -462,19 +497,21 @@ test_that("l1 loss chooses even splits after storage", {
462
497
test_that(" poisson split is not in middle" , {
463
498
N.max <- 8
464
499
data.vec <- 1 : N.max
465
- fit <- binsegRcpp :: binseg(" poisson" , data.vec , max.segments = 2L )
466
- seg <- function (first ,last ){
467
- sdata <- data.vec [first : last ]
468
- ploss(sdata , mean(sdata ))
469
- }
470
- two.segs <- function (end ){
471
- sum(c(seg(1 ,end ),seg(end + 1 ,N.max )))
500
+ for (container in container.values ){
501
+ fit <- binsegRcpp :: binseg(" poisson" , data.vec , max.segments = 2L , container.str = container )
502
+ seg <- function (first ,last ){
503
+ sdata <- data.vec [first : last ]
504
+ ploss(sdata , mean(sdata ))
505
+ }
506
+ two.segs <- function (end ){
507
+ sum(c(seg(1 ,end ),seg(end + 1 ,N.max )))
508
+ }
509
+ loss.vec <- sapply(1 : 7 , two.segs )
510
+ expected.loss <- c(seg(1 ,N.max ),min(loss.vec ))
511
+ expect_equal(fit $ splits $ loss , expected.loss )
512
+ expected.end <- c(N.max ,which.min(loss.vec ))
513
+ expect_equal(fit $ splits $ end , expected.end )
472
514
}
473
- loss.vec <- sapply(1 : 7 , two.segs )
474
- expected.loss <- c(seg(1 ,N.max ),min(loss.vec ))
475
- expect_equal(fit $ splits $ loss , expected.loss )
476
- expected.end <- c(N.max ,which.min(loss.vec ))
477
- expect_equal(fit $ splits $ end , expected.end )
478
515
})
479
516
480
517
test_that(" max_segs=N/2 possible for N=2^10" , {
@@ -484,12 +521,14 @@ test_that("max_segs=N/2 possible for N=2^10", {
484
521
cum.squares <- cumsum(data.vec ^ 2 )
485
522
mu <- data.vec
486
523
v <- data.vec ^ 2 + data.vec * (data.vec - 2 * data.vec )
487
- fit <- binsegRcpp :: binseg(" meanvar_norm" ,data.vec )
488
- max.segs <- fit $ splits [.N , segments ]
489
- seg.dt <- coef(fit , max.segs )
490
- seg.dt [, n.data : = end - start + 1 ]
491
- table(seg.dt $ n.data )
492
- expect_equal(nrow(fit $ splits ), N.data / 2 )
524
+ for (container in container.values ){
525
+ fit <- binsegRcpp :: binseg(" meanvar_norm" ,data.vec , container.str = container )
526
+ max.segs <- fit $ splits [.N , segments ]
527
+ seg.dt <- coef(fit , max.segs )
528
+ seg.dt [, n.data : = end - start + 1 ]
529
+ table(seg.dt $ n.data )
530
+ expect_equal(nrow(fit $ splits ), N.data / 2 )
531
+ }
493
532
})
494
533
495
534
test_that(" error when number of data smaller than min segment length" , {
0 commit comments