diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R new file mode 100644 index 0000000..842fb80 --- /dev/null +++ b/.ci/atime/tests.R @@ -0,0 +1,9 @@ +test.list <- atime::atime_test_list( + binseg_normal_best=atime::atime_test( + setup={ + max.segs <- as.integer(N/2) + data_vec <- 1:N + }, + expr=binsegRcpp::binseg_normal(data_vec, max.segs) + ) +) diff --git a/.github/workflows/performance-tests.yml b/.github/workflows/performance-tests.yml new file mode 100644 index 0000000..8f52140 --- /dev/null +++ b/.github/workflows/performance-tests.yml @@ -0,0 +1,32 @@ +name: atime performance tests + +on: + pull_request: + types: + - opened + - reopened + - synchronize + paths: + - 'R/**' + - 'src/**' + - '.ci/atime/**' + +jobs: + comment: + runs-on: ubuntu-latest + container: ghcr.io/iterative/cml:0-dvc2-base1 + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + repo_token: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v3 + - uses: r-lib/actions/setup-r@v2 + with: + r-version: release + http-user-agent: release + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + - uses: Anirban166/Autocomment-atime-results@v1.4.1 diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 3c0da1c..07badcc 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -1,4 +1,4 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: @@ -15,16 +15,36 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 with: use-public-rspm: true - - uses: r-lib/actions/setup-r-dependencies@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: covr + extra-packages: any::covr + needs: coverage - name: Test coverage - run: covr::codecov() + run: | + covr::codecov( + quiet = FALSE, + clean = FALSE, + install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + ) shell: Rscript {0} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/DESCRIPTION b/DESCRIPTION index d20cd20..aa62c05 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: binsegRcpp Type: Package Title: Efficient Implementation of Binary Segmentation -Version: 2025.4.29 +Version: 2025.5.6 Authors@R: person(given = c("Toby", "Dylan"), family = "Hocking", role = c("aut", "cre"), diff --git a/NEWS b/NEWS index 2496ef7..9994e20 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,7 @@ +Changes in version 2025.5.6 + +- add new container: STL priority_queue (heap), almost no difference with multiset. + Changes in version 2025.4.29 - update Rcpp code in interface.cpp to avoid access warnings: use .begin() instead of &vec[0] to get pointer to first element of vector. diff --git a/R/binseg.R b/R/binseg.R index a342593..36f550b 100644 --- a/R/binseg.R +++ b/R/binseg.R @@ -25,10 +25,10 @@ binseg <- structure(function # Binary segmentation min.segment.length=NULL, ### Positive integer, minimum number of data points per ### segment. Default NULL means to use min given distribution.str. - container.str="multiset" + container.str="priority_queue" ### C++ container to use for storing breakpoints/cost. Most users -### should leave this at the default "multiset" for efficiency but you -### could use "list" if you want to study the time complexity of a +### should leave this at the default "priority_queue" for efficiency, but you +### could use "list" if you want to study the time complexity of an asymptotically ### slower implementation of binary segmentation. ){ ##alias<< binsegRcpp diff --git a/man/binseg.Rd b/man/binseg.Rd index baf51a0..d6af013 100644 --- a/man/binseg.Rd +++ b/man/binseg.Rd @@ -15,7 +15,7 @@ log-linear time, using coef method.} weight.vec = rep(1, length(data.vec)), min.segment.length = NULL, - container.str = "multiset")} + container.str = "priority_queue")} \arguments{ \item{distribution.str}{String indicating distribution/loss function, use \code{\link{get_distribution_info}} to see possible values.} @@ -32,8 +32,8 @@ default=1:length(\code{data.vec}).} \item{min.segment.length}{Positive integer, minimum number of data points per segment. Default NULL means to use min given \code{distribution.str}.} \item{container.str}{C++ container to use for storing breakpoints/cost. Most users -should leave this at the default "multiset" for efficiency but you -could use "list" if you want to study the time complexity of a +should leave this at the default "priority_queue" for efficiency, but you +could use "list" if you want to study the time complexity of an asymptotically slower implementation of binary segmentation.} } \details{Each iteration involves first computing and storing the diff --git a/src/binseg.cpp b/src/binseg.cpp index 29c16ac..cd7bdf6 100644 --- a/src/binseg.cpp +++ b/src/binseg.cpp @@ -384,18 +384,9 @@ template class MyContainer : public Container { public: T segment_container; - typename T::iterator best; int get_size(void){ return segment_container.size(); } - void remove_best(void){ - segment_container.erase(best); - } - virtual typename T::iterator get_best_it(void) = 0; - const Segment* set_best(void){ - best = get_best_it(); - return &(*best); - } }; typedef std::multiset segment_set_type; @@ -410,14 +401,17 @@ container_umap_type* get_container_umap(void){ return &container_umap; } -#define CMAKER(CONTAINER, INSERT, BEST) \ - class CONCAT(CONTAINER,Wrapper) : public MyContainer< std::CONTAINER > { \ +#define CMAKER(CONTAINER, STRUCT, INSERT, SET_IT, GET_SEG, ERASE) \ + class CONCAT(CONTAINER,Wrapper) : public MyContainer< STRUCT > { \ public: \ void insert(Segment& new_seg){ \ segment_container.INSERT(new_seg); \ } \ - std::CONTAINER::iterator get_best_it(void){ \ - return BEST; \ + Segment get_best(void){ \ + SET_IT; \ + Segment seg = GET_SEG; \ + ERASE; \ + return seg; \ } \ }; \ Container* CONCAT(CONTAINER,construct) (){ \ @@ -429,9 +423,21 @@ container_umap_type* get_container_umap(void){ static ContainerFactory CONCAT(CONTAINER,_instance) \ ( #CONTAINER, CONCAT(CONTAINER,construct), CONCAT(CONTAINER,destruct) ); -CMAKER(multiset, insert, segment_container.begin()) +#define CIT(CONTAINER, INSERT, BEST) \ + CMAKER(CONTAINER, std::CONTAINER, INSERT, std::CONTAINER::iterator it = BEST, *it, segment_container.erase(it)) + +CIT(multiset, insert, segment_container.begin()) + +CIT(list, push_back, std::min_element(segment_container.begin(),segment_container.end())) -CMAKER(list, push_back, std::min_element(segment_container.begin(),segment_container.end())) +class PQ_Compare { +public: + bool operator()(Segment a, Segment b){ + return !(a < b); + } +}; +#define PQ_STRUCT std::priority_queue,PQ_Compare> +CMAKER(priority_queue, PQ_STRUCT, push, , segment_container.top(), segment_container.pop()) class Candidates { public: @@ -729,35 +735,32 @@ int binseg int seg_i = 0; while(V.container_ptr->not_empty() && ++seg_i < max_segments){ // Store loss and model parameters associated with this split. - const Segment *seg_ptr = V.container_ptr->set_best(); + const Segment seg = V.container_ptr->get_best(); out_arrays.save (seg_i, - subtrain_loss[seg_i-1] + seg_ptr->best_decrease, - validation_loss[seg_i-1] + seg_ptr->validation_decrease, - seg_ptr->best_split.this_end, - seg_ptr->depth, - seg_ptr->best_split.before, - seg_ptr->best_split.after, - seg_ptr->invalidates_index, - seg_ptr->invalidates_after, - seg_ptr->best_split.this_end - seg_ptr->first_i + 1, - seg_ptr->last_i - seg_ptr->best_split.this_end); + subtrain_loss[seg_i-1] + seg.best_decrease, + validation_loss[seg_i-1] + seg.validation_decrease, + seg.best_split.this_end, + seg.depth, + seg.best_split.before, + seg.best_split.after, + seg.invalidates_index, + seg.invalidates_after, + seg.best_split.this_end - seg.first_i + 1, + seg.last_i - seg.best_split.this_end); // Finally add new split candidates if necessary. V.maybe_add - (seg_ptr->first_i, seg_ptr->best_split.this_end, + (seg.first_i, seg.best_split.this_end, 0,//invalidates_after=0 => before_mean invalidated. - seg_i, seg_ptr->best_split.before.loss, - seg_ptr->before_validation_loss, - seg_ptr->depth); + seg_i, seg.best_split.before.loss, + seg.before_validation_loss, + seg.depth); V.maybe_add - (seg_ptr->best_split.this_end+1, seg_ptr->last_i, + (seg.best_split.this_end+1, seg.last_i, 1,//invalidates_after=1 => after_mean invalidated. - seg_i, seg_ptr->best_split.after.loss, - seg_ptr->after_validation_loss, - seg_ptr->depth); - // Erase at end because we need seg_ptr->values during maybe_add - // inserts above. - V.container_ptr->remove_best(); + seg_i, seg.best_split.after.loss, + seg.after_validation_loss, + seg.depth); } return 0;//SUCCESS. } diff --git a/src/binseg.h b/src/binseg.h index aa7bd5c..bc0ad95 100644 --- a/src/binseg.h +++ b/src/binseg.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include //multiset #include @@ -129,8 +130,7 @@ class Container { public: virtual void insert(Segment&) = 0; virtual int get_size(void) = 0; - virtual const Segment* set_best(void) = 0; - virtual void remove_best(void) = 0; + virtual Segment get_best(void) = 0; virtual ~Container() {}; bool not_empty(void){ return get_size() > 0; diff --git a/tests/testthat/test-CRAN.R b/tests/testthat/test-CRAN.R index 72d2938..ec4696b 100644 --- a/tests/testthat/test-CRAN.R +++ b/tests/testthat/test-CRAN.R @@ -270,19 +270,22 @@ test_that("error for unrecognized container", { }, "unrecognized container") }) +container.values <- c("priority_queue", "multiset", "list") test_that("variance estimates and loss correct", { x <- c(0,0.1, 1,1.2) - fit <- binsegRcpp::binseg("meanvar_norm", x, max.segments=2L) - expect_equal(fit$splits$before.mean, c(mean(x), mean(x[1:2]))) - expect_equal(fit$splits$after.mean, c(NA, mean(x[3:4]))) - expect_equal(fit$splits$before.var, c(myvar(x), myvar(x[1:2]))) - expect_equal(fit$splits$after.var, c(NA, myvar(x[3:4]))) - nll <- function(y)-sum(dnorm(y, mean(y), sqrt(myvar(y)), log=TRUE)) - expect_equal(fit$splits$loss, c(nll(x), nll(x[1:2])+nll(x[3:4]))) - seg.dt <- coef(fit, 2L) - expect_equal(seg.dt$end, c(2,4)) - expect_equal(seg.dt$mean, c(mean(x[1:2]),mean(x[3:4]))) - expect_equal(seg.dt$var, c(myvar(x[1:2]),myvar(x[3:4]))) + for(container in container.values){ + fit <- binsegRcpp::binseg("meanvar_norm", x, max.segments=2L, container.str = container) + expect_equal(fit$splits$before.mean, c(mean(x), mean(x[1:2]))) + expect_equal(fit$splits$after.mean, c(NA, mean(x[3:4]))) + expect_equal(fit$splits$before.var, c(myvar(x), myvar(x[1:2]))) + expect_equal(fit$splits$after.var, c(NA, myvar(x[3:4]))) + nll <- function(y)-sum(dnorm(y, mean(y), sqrt(myvar(y)), log=TRUE)) + expect_equal(fit$splits$loss, c(nll(x), nll(x[1:2])+nll(x[3:4]))) + seg.dt <- coef(fit, 2L) + expect_equal(seg.dt$end, c(2,4)) + expect_equal(seg.dt$mean, c(mean(x[1:2]),mean(x[3:4]))) + expect_equal(seg.dt$var, c(myvar(x[1:2]),myvar(x[3:4]))) + } }) test_that("meanvar_norm does not have segs with size 1", { @@ -290,52 +293,61 @@ test_that("meanvar_norm does not have segs with size 1", { sim <- function(mu,sigma)rnorm(data.per.seg,mu,sigma) set.seed(1) data.vec <- c(sim(10,1), sim(0, 5)) - fit <- binsegRcpp::binseg("meanvar_norm", data.vec) - expect_lte(nrow(fit$splits), data.per.seg) + for(container in container.values){ + fit <- binsegRcpp::binseg("meanvar_norm", data.vec, container.str=container) + expect_lte(nrow(fit$splits), data.per.seg) + } }) test_that("l1loss param is median", { data.vec <- c(1.3, 1.0, 1.1, 2.0, 2.1, 3.1) - l1fit <- binsegRcpp::binseg("l1", data.vec, max.segments=2L) - seg.dt <- coef(l1fit) - expected.median <- c( - median(data.vec), - median(data.vec[1:3]), - median(data.vec[4:6])) - expect_equal(seg.dt$median, expected.median) - expected.loss <- sum(abs(median(data.vec)-data.vec)) - expect_equal(l1fit$splits$loss[1], expected.loss) + for(container in container.values){ + l1fit <- binsegRcpp::binseg("l1", data.vec, max.segments=2L, container.str=container) + seg.dt <- coef(l1fit) + expected.median <- c( + median(data.vec), + median(data.vec[1:3]), + median(data.vec[4:6])) + expect_equal(seg.dt$median, expected.median) + expected.loss <- sum(abs(median(data.vec)-data.vec)) + expect_equal(l1fit$splits$loss[1], expected.loss) + } }) test_that("laplace params median,scale", { data.vec <- c(1.3, 1.0, 1.1, 2.0, 2.1, 3.1) - l1fit <- binsegRcpp::binseg("laplace", data.vec, max.segments=2L) - seg.dt <- coef(l1fit) - expected.median <- c( - median(data.vec), - median(data.vec[1:3]), - median(data.vec[4:6])) - expect_equal(seg.dt$median, expected.median) - sum.abs.dev <- sum(abs(median(data.vec)-data.vec)) - N.data <- length(data.vec) - est.scale <- sum.abs.dev/N.data - expect_equal(l1fit$splits$before.scale[1], est.scale) - expected.loss <- N.data*log(2*est.scale)+sum.abs.dev/est.scale - expect_equal(l1fit$splits$loss[1], expected.loss) + for(container in container.values){ + l1fit <- binsegRcpp::binseg("laplace", data.vec, max.segments=2L, container.str=container) + seg.dt <- coef(l1fit) + expected.median <- c( + median(data.vec), + median(data.vec[1:3]), + median(data.vec[4:6])) + expect_equal(seg.dt$median, expected.median) + sum.abs.dev <- sum(abs(median(data.vec)-data.vec)) + N.data <- length(data.vec) + est.scale <- sum.abs.dev/N.data + expect_equal(l1fit$splits$before.scale[1], est.scale) + expected.loss <- N.data*log(2*est.scale)+sum.abs.dev/est.scale + expect_equal(l1fit$splits$loss[1], expected.loss) + } }) test_that("laplace validation loss ok", { data.vec <- c(1.3, 1.0, 1.1, 2.0, 2.1, 3.1) is.validation.vec <- c(FALSE,FALSE,TRUE,TRUE,FALSE,FALSE) - l1fit <- binsegRcpp::binseg( - "laplace", data.vec, max.segments=2L, - is.validation.vec = is.validation.vec) - subtrain.vec <- data.vec[!is.validation.vec] - est.median <- median(subtrain.vec) - est.scale <- sum(abs(est.median-subtrain.vec))/length(subtrain.vec) - sum.abs.dev <- sum(abs(est.median-data.vec[is.validation.vec])) - vloss1 <- sum(is.validation.vec)*log(2*est.scale)+sum.abs.dev/est.scale - expect_equal(l1fit$splits$validation.loss[1], vloss1) + for(container in container.values){ + l1fit <- binsegRcpp::binseg( + "laplace", data.vec, max.segments=2L, + container.str=container, + is.validation.vec = is.validation.vec) + subtrain.vec <- data.vec[!is.validation.vec] + est.median <- median(subtrain.vec) + est.scale <- sum(abs(est.median-subtrain.vec))/length(subtrain.vec) + sum.abs.dev <- sum(abs(est.median-data.vec[is.validation.vec])) + vloss1 <- sum(is.validation.vec)*log(2*est.scale)+sum.abs.dev/est.scale + expect_equal(l1fit$splits$validation.loss[1], vloss1) + } }) test_that("laplace correct split for int data", { @@ -346,40 +358,48 @@ test_that("laplace correct split for int data", { b=mean(d) sum(d/b+log(2*b)) } - fit <- binseg("laplace", 1:8) - splits <- fit[["splits"]] - computed.loss <- splits[["loss"]] - expected.loss <- c( - laplace(8), - laplace(3)+laplace(5), - laplace(3)+laplace(3)+laplace(2)) - expect_equal(computed.loss, expected.loss) + for(container in container.values){ + fit <- binseg("laplace", 1:8, container.str=container) + splits <- fit[["splits"]] + computed.loss <- splits[["loss"]] + expected.loss <- c( + laplace(8), + laplace(3)+laplace(5), + laplace(3)+laplace(3)+laplace(2)) + expect_equal(computed.loss, expected.loss) + } }) test_that("meanvar_norm validation loss ok", { data.vec <- c(1.3, 1.0, 1.1, 2.0, 2.1, 3.1) is.validation.vec <- c(FALSE,FALSE,TRUE,TRUE,FALSE,FALSE) - l1fit <- binsegRcpp::binseg( - "meanvar_norm", data.vec, max.segments=2L, - is.validation.vec = is.validation.vec) - subtrain.vec <- data.vec[!is.validation.vec] - vloss1 <- -sum(dnorm( - data.vec[is.validation.vec], - mean(subtrain.vec), - sqrt(myvar(subtrain.vec)), - log=TRUE)) - expect_equal(l1fit$splits$validation.loss[1], vloss1) + for(container in container.values){ + l1fit <- binsegRcpp::binseg( + "meanvar_norm", data.vec, max.segments=2L, + container.str=container, + is.validation.vec = is.validation.vec) + subtrain.vec <- data.vec[!is.validation.vec] + vloss1 <- -sum(dnorm( + data.vec[is.validation.vec], + mean(subtrain.vec), + sqrt(myvar(subtrain.vec)), + log=TRUE)) + expect_equal(l1fit$splits$validation.loss[1], vloss1) + } }) test_that("l1 validation loss ok", { data.vec <- c(1.3, 1.0, 1.1, 2.0, 2.1, 3.1) is.validation.vec <- c(FALSE,FALSE,TRUE,TRUE,FALSE,FALSE) - l1fit <- binsegRcpp::binseg( - "l1", data.vec, max.segments=2L, - is.validation.vec = is.validation.vec) - vloss1 <- sum(abs( - data.vec[is.validation.vec]-median(data.vec[!is.validation.vec]))) - expect_equal(l1fit$splits$validation.loss[1], vloss1) + for(container in container.values){ + l1fit <- binsegRcpp::binseg( + "l1", data.vec, max.segments=2L, + container.str=container, + is.validation.vec = is.validation.vec) + vloss1 <- sum(abs( + data.vec[is.validation.vec]-median(data.vec[!is.validation.vec]))) + expect_equal(l1fit$splits$validation.loss[1], vloss1) + } }) test_that("poisson loss ok for simple ex with zero", { @@ -389,11 +409,13 @@ test_that("poisson loss ok for simple ex with zero", { expected.loss <- c( N.data*mu - log(mu)*sum(data.vec), 1) - fit <- binsegRcpp::binseg("poisson", data.vec) - expect_equal(fit$splits$end, 2:1) - expect_equal(fit$splits$loss, expected.loss) - segs <- coef(fit) - expect_equal(segs$mean, c(mu, data.vec)) + for(container in container.values){ + fit <- binsegRcpp::binseg("poisson", data.vec, container.str=container) + expect_equal(fit$splits$end, 2:1) + expect_equal(fit$splits$loss, expected.loss) + segs <- coef(fit) + expect_equal(segs$mean, c(mu, data.vec)) + } }) test_that("error for poisson loss with bad data", { @@ -408,22 +430,33 @@ test_that("error for poisson loss with bad data", { test_that("get_complexity respects min.segment.length", { n.data <- 8 zero.one <- rep(0:1, l=n.data) - fit <- binsegRcpp::binseg("mean_norm", zero.one) - clist <- binsegRcpp::get_complexity(fit) - worst <- clist$iterations[case=="worst"] - expect_equal(worst$splits, seq(n.data-1, 0)) - zero.ten <- rep(c(0,1,10,11), l=n.data) - mvfit <- binsegRcpp::binseg("meanvar_norm", zero.ten) - mvlist <- binsegRcpp::get_complexity(mvfit) - mvworst <- mvlist$iterations[case=="worst"] - expect_equal(mvworst$splits, c(5,3,1,0)) + for(container in container.values){ + fit <- binsegRcpp::binseg("mean_norm", zero.one, container.str=container) + clist <- binsegRcpp::get_complexity(fit) + worst <- clist$iterations[case=="worst"] + expect_equal(worst$splits, seq(n.data-1, 0)) + zero.ten <- rep(c(0,1,10,11), l=n.data) + mvfit <- binsegRcpp::binseg("meanvar_norm", zero.ten) + mvlist <- binsegRcpp::get_complexity(mvfit) + mvworst <- mvlist$iterations[case=="worst"] + expect_equal(mvworst$splits, c(5,3,1,0)) + } }) test_that("empirical splits not negative", { - fit <- binsegRcpp::binseg("meanvar_norm", 1:8) - clist <- binsegRcpp::get_complexity(fit) - esplits <- clist$iterations[case=="empirical", splits] - expect_equal(esplits, c(5,2,0,0)) + for(container in container.values){ + fit <- binsegRcpp::binseg("meanvar_norm", 1:8, container.str=container) + clist <- binsegRcpp::get_complexity(fit) + esplits <- clist$iterations[case=="empirical", splits] + expect_equal(esplits, c(5,2,0,0)) + } +}) + +test_that("first three splits result in four segments, two data each", { + for(container in container.values){ + fit <- binsegRcpp::binseg("mean_norm", 1:8, max.segments=4, container.str=container) + expect_equal(sort(fit$splits$end), c(2,4,6,8)) + } }) test_that("l1 loss chooses even split if equal loss", { @@ -437,8 +470,10 @@ test_that("l1 loss chooses even split if equal loss", { sum(c(seg(1,end),seg(end+1,N.max))) } sapply(seq(1,N.max-1), two.segs) - fit <- binsegRcpp::binseg("l1", data.vec, max.segments=2L) - expect_equal(fit$splits$end, c(8,4)) + for(container in container.values){ + fit <- binsegRcpp::binseg("l1", data.vec, max.segments=2L, container.str=container) + expect_equal(fit$splits$end, c(8,4)) + } }) test_that("l1 loss chooses even splits after storage", { @@ -462,19 +497,21 @@ test_that("l1 loss chooses even splits after storage", { test_that("poisson split is not in middle", { N.max <- 8 data.vec <- 1:N.max - fit <- binsegRcpp::binseg("poisson", data.vec, max.segments=2L) - seg <- function(first,last){ - sdata <- data.vec[first:last] - ploss(sdata, mean(sdata)) - } - two.segs <- function(end){ - sum(c(seg(1,end),seg(end+1,N.max))) + for(container in container.values){ + fit <- binsegRcpp::binseg("poisson", data.vec, max.segments=2L, container.str=container) + seg <- function(first,last){ + sdata <- data.vec[first:last] + ploss(sdata, mean(sdata)) + } + two.segs <- function(end){ + sum(c(seg(1,end),seg(end+1,N.max))) + } + loss.vec <- sapply(1:7, two.segs) + expected.loss <- c(seg(1,N.max),min(loss.vec)) + expect_equal(fit$splits$loss, expected.loss) + expected.end <- c(N.max,which.min(loss.vec)) + expect_equal(fit$splits$end, expected.end) } - loss.vec <- sapply(1:7, two.segs) - expected.loss <- c(seg(1,N.max),min(loss.vec)) - expect_equal(fit$splits$loss, expected.loss) - expected.end <- c(N.max,which.min(loss.vec)) - expect_equal(fit$splits$end, expected.end) }) test_that("max_segs=N/2 possible for N=2^10", { @@ -484,12 +521,14 @@ test_that("max_segs=N/2 possible for N=2^10", { cum.squares <- cumsum(data.vec^2) mu <- data.vec v <- data.vec^2 + data.vec*(data.vec-2*data.vec) - fit <- binsegRcpp::binseg("meanvar_norm",data.vec) - max.segs <- fit$splits[.N, segments] - seg.dt <- coef(fit, max.segs) - seg.dt[, n.data := end-start+1] - table(seg.dt$n.data) - expect_equal(nrow(fit$splits), N.data/2) + for(container in container.values){ + fit <- binsegRcpp::binseg("meanvar_norm",data.vec, container.str=container) + max.segs <- fit$splits[.N, segments] + seg.dt <- coef(fit, max.segs) + seg.dt[, n.data := end-start+1] + table(seg.dt$n.data) + expect_equal(nrow(fit$splits), N.data/2) + } }) test_that("error when number of data smaller than min segment length", {