Skip to content

Commit 9e1a1a6

Browse files
committed
Small fixes and improvements
1 parent 6834d1c commit 9e1a1a6

File tree

3 files changed

+80
-41
lines changed

3 files changed

+80
-41
lines changed

R/XcmsExperimentHdf5-functions.R

+44-25
Original file line numberDiff line numberDiff line change
@@ -175,15 +175,15 @@ NULL
175175
is_filled = rep(FALSE, nr))
176176
if (add) {
177177
## Need to load previous results and append to that.
178-
pks <- .h5_read_data(h5_file, index = sid, name = "chrom_peaks",
178+
pks <- .h5_read_data(h5_file, id = sid, name = "chrom_peaks",
179179
ms_level = msLevel, read_colnames = TRUE,
180180
read_rownames = TRUE)[[1L]]
181181
rnames <- rownames(pks)
182182
max_index <- max(
183183
as.integer(sub(paste0("CP", msLevel, sid), "", rnames)))
184184
res[[i]] <- rbindFill(pks, res[[i]])
185185
pkd <- rbindFill(.h5_read_data(
186-
h5_file, index = sid, name = "chrom_peak_data",
186+
h5_file, id = sid, name = "chrom_peak_data",
187187
ms_level = msLevel, read_rownames = FALSE)[[1L]], pkd)
188188
}
189189
pkdl[[i]] <- pkd
@@ -217,7 +217,7 @@ NULL
217217
else rt <- rtime(spectra(x))[keep]
218218
## Get the list of chromPeak data for x.
219219
pksl <- .h5_read_data(
220-
x@hdf5_file, index = x@sample_id, name = "chrom_peaks",
220+
x@hdf5_file, id = x@sample_id, name = "chrom_peaks",
221221
ms_level = rep(msLevel, length(x@sample_id)),
222222
read_colnames = TRUE, read_rownames = TRUE)
223223
## Get the max index of a chrom peak per sample
@@ -228,7 +228,7 @@ NULL
228228
rownames(pksl[[i]])))))
229229
## Get the list of chromPeakData for x.
230230
pkdl <- .h5_read_data(
231-
x@hdf5_file, index = x@sample_id, name = "chrom_peak_data",
231+
x@hdf5_file, id = x@sample_id, name = "chrom_peak_data",
232232
ms_level = rep(msLevel, length(x@sample_id)), read_rownames = TRUE)
233233
## Do refinement (in parallel)
234234
res <- bpmapply(
@@ -302,22 +302,24 @@ NULL
302302
} else idx_columns <- NULL
303303
ids <- rep(x@sample_id, length(msLevel))
304304
msl <- rep(msLevel, each = length(x@sample_id))
305-
res <- .h5_read_data(x@hdf5_file, index = ids, name = "chrom_peaks",
305+
res <- .h5_read_data(x@hdf5_file, id = ids, name = "chrom_peaks",
306306
ms_level = msl, read_colnames = TRUE,
307-
read_rownames = TRUE, j = idx_columns)
308-
if (length(mz) | length(rt))
309-
res <- lapply(res, function(z, rt, mz, ppm, type) {
310-
z[.is_chrom_peaks_within_mz_rt(
311-
z, rt = rt, mz = mz, ppm = ppm, type = type), , drop = FALSE]
312-
}, rt = rt, mz = mz, ppm = ppm, type = type)
307+
read_rownames = TRUE, j = idx_columns,
308+
rt = rt, mz = mz, ppm = ppm, type = type)
309+
## ## Might be better (memory wise) to pass this to the import function
310+
## ## instead
311+
## if (length(mz) | length(rt))
312+
## res <- lapply(res, function(z, rt, mz, ppm, type) {
313+
## z[.is_chrom_peaks_within_mz_rt(
314+
## z, rt = rt, mz = mz, ppm = ppm, type = type), , drop = FALSE]
315+
## }, rt = rt, mz = mz, ppm = ppm, type = type)
313316
if (by_sample) {
314317
names(res) <- ids
318+
res
315319
} else {
316320
l <- vapply(res, nrow, 1L)
317-
res <- cbind(do.call(rbind, res),
318-
sample = rep(match(ids, x@sample_id), l))
321+
cbind(do.call(rbind, res), sample = rep(match(ids, x@sample_id), l))
319322
}
320-
res
321323
}
322324

323325
.h5_chrom_peak_data <- function(x, columns = character(), by_sample = TRUE) {
@@ -354,7 +356,7 @@ NULL
354356
cnt <- 0L
355357
for (msl in ms_level) {
356358
## read chrom peaks
357-
cp <- .h5_read_data(hdf5_file, index = id, name = "chrom_peaks",
359+
cp <- .h5_read_data(hdf5_file, id = id, name = "chrom_peaks",
358360
ms_level = msl, read_colnames = TRUE,
359361
read_rownames = FALSE)[[1L]]
360362
## adjust chrom peak rt - use .applyRtAdjToChromPeaks for that.
@@ -487,7 +489,7 @@ NULL
487489
.h5_feature_values_ms_level <- function(ms_level, x, method, value, intensity,
488490
filled = TRUE) {
489491
cn <- .h5_chrom_peaks_colnames(x, ms_level)
490-
col <- switch(method,
492+
col <- switch(method,
491493
sum = value,
492494
medret = c(value, "rt"),
493495
maxint = c(value, intensity))
@@ -598,6 +600,21 @@ NULL
598600
d
599601
}
600602

603+
.h5_read_chrom_peaks_matrix <- function(name, h5, index = list(NULL, NULL),
604+
read_colnames = FALSE,
605+
read_rownames = FALSE,
606+
rownames = paste0(name, "_rownames"),
607+
rt = numeric(), mz = numeric(),
608+
ppm = 0, type = "any") {
609+
read_colnames <- read_colnames || length(rt) > 0 || length(mz) > 0
610+
d <- .h5_read_matrix2(name, h5, index, read_colnames, read_rownames,
611+
rownames)
612+
if (length(rt) | length(mz))
613+
d[.is_chrom_peaks_within_mz_rt(d, rt = rt, mz = mz,
614+
ppm = ppm, type = type), , drop = FALSE]
615+
else d
616+
}
617+
601618
#' Read a single `data.frame` from the HDF5 file. With
602619
#' `read_rownames = TRUE` also the row names are read and set, which requires
603620
#' an additional reading step. Note that for a `data.frame` each column
@@ -679,12 +696,12 @@ NULL
679696
#'
680697
#' @param h5_file `character(1)` with the HDF5 file name
681698
#'
682-
#' @param index `integer` with the indices/IDs of the data sets to read.
699+
#' @param id `character` with the ID(s) of the data sets to read.
683700
#'
684701
#' @param name `character(1)` specifying which data should be read.
685702
#'
686703
#' @param ms_level `integer` with the MS level of each sample/data set that
687-
#' should be read. Has to have the same length than `index`.
704+
#' should be read. Has to have the same length than `id`.
688705
#'
689706
#' @param read_colnames `logical(1)` whether column names should be read and
690707
#' set for each `matrix`.
@@ -701,40 +718,42 @@ NULL
701718
#' select a **single** column to read. For `name = "chrom_peaks"`: `integer`
702719
#' with the indices of the column(s) that should be imported.
703720
#'
721+
#' @param ... additional parameters passed to `FUN`
722+
#'
704723
#' @return `list()` with the read datasets. Will be a `list` of `numeric`
705724
#' matrices for `name = "chrom_peaks"` or a `list` with `data.frame`s for
706725
#' `name = "chrom_peak_data"`.
707726
#'
708727
#' @noRd
709728
.h5_read_data <- function(h5_file = character(),
710-
index = integer(),
729+
id = character(),
711730
name = c("chrom_peaks", "chrom_peak_data",
712731
"feature_definitions",
713732
"feature_to_chrom_peaks"),
714733
ms_level = integer(),
715734
read_colnames = FALSE,
716735
read_rownames = FALSE,
717-
i = NULL, j = NULL) {
718-
if (!length(index)) return(list())
719-
stopifnot(length(ms_level) == length(index))
736+
i = NULL, j = NULL, ...) {
737+
if (!length(id)) return(list())
738+
stopifnot(length(ms_level) == length(id))
720739
name <- match.arg(name)
721740
FUN <- switch(name,
722741
chrom_peak_data = .h5_read_chrom_peak_data,
723742
feature_definitions = .h5_read_data_frame,
743+
chrom_peaks = .h5_read_chrom_peaks_matrix,
724744
.h5_read_matrix2)
725745
h5 <- rhdf5::H5Fopen(h5_file)
726746
on.exit(invisible(rhdf5::H5Fclose(h5)))
727-
d <- paste0("/", index, "/ms_", ms_level, "/", name)
747+
d <- paste0("/", id, "/ms_", ms_level, "/", name)
728748
index <- list(i, j)
729749
if (is.character(j) && length(j) == 1L) {
730750
d <- paste0(d, "/", j)
731751
index <- list(i, NULL)
732752
}
733753
lapply(d, FUN = FUN, read_colnames = read_colnames,
734-
read_rownames = read_rownames, index = index, h5 = h5)
754+
read_rownames = read_rownames, index = index, h5 = h5, ...)
735755
}
736756

737-
738757
## -------- VALIDITY --------
739758

740759
#' Compares the "mod_count" attribute from an h5 file with the expected

tests/testthat/test_XcmsExperimentHdf5-functions.R

+35-15
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,15 @@ test_that(".h5_chrom_peaks_chunk works", {
7272
c("chrom_peak_data", "chrom_peaks",
7373
"chrom_peaks_colnames", "chrom_peaks_rownames"))
7474
H5Fclose(h5)
75-
a <- .h5_read_data(h5_file, index = "S2", name = "chrom_peaks",
75+
a <- .h5_read_data(h5_file, id = "S2", name = "chrom_peaks",
7676
ms_level = 1L, read_colnames = TRUE,
7777
read_rownames = TRUE)[[1L]]
7878
## add = TRUE
7979
res <- .h5_find_chrom_peaks_chunk(
8080
sps, msLevel = 1L, param = p, h5_file = h5_file, add = TRUE,
8181
sample_id = xmse_h5@sample_id)
8282
expect_equal(res, 4L)
83-
b <- .h5_read_data(h5_file, index = "S2", name = "chrom_peaks",
83+
b <- .h5_read_data(h5_file, id = "S2", name = "chrom_peaks",
8484
ms_level = 1L, read_colnames = TRUE,
8585
read_rownames = TRUE)[[1L]]
8686
expect_equal(nrow(b), 2 * nrow(a))
@@ -205,14 +205,14 @@ test_that(".h5_xmse_merge_neighboring_peaks works", {
205205
h5f <- tempfile()
206206
ref <- loadXcmsData("faahko_sub2")
207207
x <- .xcms_experiment_to_hdf5(ref, h5f)
208-
ref <- .h5_read_data(x@hdf5_file, index = x@sample_id,
208+
ref <- .h5_read_data(x@hdf5_file, id = x@sample_id,
209209
ms_level = rep(1L, length(x)),
210210
read_colnames = TRUE, read_rownames = TRUE)
211211
.h5_xmse_merge_neighboring_peaks(x)
212212
mod_count <- as.vector(rhdf5::h5read(h5f, "/header/modcount"))
213213
expect_true(mod_count > x@hdf5_mod_count)
214214
## Check that content was changed.
215-
res <- .h5_read_data(x@hdf5_file, index = x@sample_id,
215+
res <- .h5_read_data(x@hdf5_file, id = x@sample_id,
216216
ms_level = rep(1L, length(x)),
217217
read_colnames = TRUE, read_rownames = TRUE)
218218
expect_true(nrow(ref[[1L]]) > nrow(res[[1L]]))
@@ -235,7 +235,7 @@ test_that(".h5_xmse_merge_neighboring_peaks works", {
235235
res <- .xcms_experiment_to_hdf5(ref, h5f)
236236

237237
.h5_xmse_merge_neighboring_peaks(res)
238-
res <- .h5_read_data(res@hdf5_file, index = res@sample_id,
238+
res <- .h5_read_data(res@hdf5_file, id = res@sample_id,
239239
ms_level = rep(1L, length(res)),
240240
read_colnames = TRUE, read_rownames = TRUE)
241241
ref <- .xmse_merge_neighboring_peaks(ref)
@@ -250,7 +250,7 @@ test_that(".h5_xmse_merge_neighboring_peaks works", {
250250

251251
test_that(".h5_read_matrix works", {
252252
h5f <- tempfile()
253-
xcms:::.h5_initialize_file(h5f)
253+
.h5_initialize_file(h5f)
254254

255255
a <- cbind(a = c(1.2, 1.4), b = c(3.5, 3.6), c = c(5.3, 5.1))
256256
rownames(a) <- c("CP1", "CP2")
@@ -319,6 +319,26 @@ test_that(".h5_read_matrix works", {
319319
file.remove(h5f)
320320
})
321321

322+
test_that(".h5_read_chrom_peaks_matrix works", {
323+
res <- .h5_read_chrom_peaks_matrix(
324+
"/S2/ms_1/chrom_peaks", xmse_h5@hdf5_file,
325+
read_colnames = FALSE, read_rownames = FALSE)
326+
expect_true(is.matrix(res))
327+
expect_true(is.numeric(res))
328+
res <- .h5_read_chrom_peaks_matrix(
329+
"/S2/ms_1/chrom_peaks", xmse_h5@hdf5_file,
330+
read_colnames = TRUE, read_rownames = FALSE,
331+
mz = c(300, 350), rt = c(3000, 3500), type = "within")
332+
expect_true(all(res[, "mz"] > 300 & res[, "mz"] < 350))
333+
expect_true(all(res[, "rt"] > 3000 & res[, "rt"] < 3500))
334+
335+
res <- .h5_read_chrom_peaks_matrix(
336+
"/S2/ms_1/chrom_peaks", xmse_h5@hdf5_file,
337+
read_colnames = TRUE, read_rownames = FALSE,
338+
mz = c(300, 350), type = "within")
339+
expect_true(all(res[, "mz"] > 300 & res[, "mz"] < 350))
340+
})
341+
322342
test_that(".h5_read_data_frame works", {
323343
h5f <- tempfile()
324344
.h5_initialize_file(h5f)
@@ -401,32 +421,32 @@ test_that(".h5_read_data works", {
401421
## chrom peaks
402422
res <- .h5_read_data(h5f)
403423
expect_equal(res, list())
404-
res <- .h5_read_data(h5f, index = 2, name = "chrom_peaks", ms_level = 2L)
424+
res <- .h5_read_data(h5f, id = 2, name = "chrom_peaks", ms_level = 2L)
405425
expect_equal(length(res), 1L)
406426
expect_equal(res[[1L]], unname(b2))
407-
res <- .h5_read_data(h5f, index = 1, name = "chrom_peaks", ms_level = 2L,
427+
res <- .h5_read_data(h5f, id = 1, name = "chrom_peaks", ms_level = 2L,
408428
read_colnames = TRUE)
409429
expect_equal(unname(res[[1L]]), unname(a2))
410430
expect_equal(colnames(res[[1L]]), colnames(a2))
411431
expect_true(is.null(rownames(res[[1L]])))
412-
res <- .h5_read_data(h5f, index = 1, name = "chrom_peaks", ms_level = 2L,
432+
res <- .h5_read_data(h5f, id = 1, name = "chrom_peaks", ms_level = 2L,
413433
read_rownames = TRUE)
414434
expect_equal(unname(res[[1L]]), unname(a2))
415435
expect_equal(rownames(res[[1L]]), rownames(a2))
416436
expect_true(is.null(colnames(res[[1L]])))
417437
## single column
418-
res <- .h5_read_data(h5f, index = c(2, 1), name = "chrom_peaks",
438+
res <- .h5_read_data(h5f, id = c(2, 1), name = "chrom_peaks",
419439
ms_level = c(2L, 2L), j = 2)
420440
expect_equal(length(res), 2L)
421441
expect_true(ncol(res[[1L]]) == 1L)
422442
expect_equal(res[[1L]][, 1], unname(b2[, 2]))
423-
res <- .h5_read_data(h5f, index = c(2, 1), name = "chrom_peaks",
443+
res <- .h5_read_data(h5f, id = c(2, 1), name = "chrom_peaks",
424444
ms_level = c(2L, 2L), j = 2, read_colnames = TRUE,
425445
read_rownames = TRUE)
426446
expect_equal(length(res), 2L)
427447
expect_true(ncol(res[[1L]]) == 1L)
428448
expect_equal(res[[1L]][, 1, drop = FALSE], b2[, 2, drop = FALSE])
429-
res <- .h5_read_data(h5f, index = c(1, 2, 1), name = "chrom_peaks",
449+
res <- .h5_read_data(h5f, id = c(1, 2, 1), name = "chrom_peaks",
430450
ms_level = c(2, 2, 2), j = 1L,
431451
read_colnames = TRUE,
432452
read_rownames = TRUE)
@@ -435,7 +455,7 @@ test_that(".h5_read_data works", {
435455
expect_equal(res[[2]], b2[, 1, drop = FALSE])
436456

437457
## selected rows.
438-
res <- .h5_read_data(h5f, index = c(1, 2, 1), name = "chrom_peaks",
458+
res <- .h5_read_data(h5f, id = c(1, 2, 1), name = "chrom_peaks",
439459
ms_level = c(2, 2, 2), j = 1L, i = c(2, 1, 2),
440460
read_colnames = TRUE,
441461
read_rownames = TRUE)
@@ -445,13 +465,13 @@ test_that(".h5_read_data works", {
445465
expect_equal(res[[1]], a2[c(2, 1, 2), 1, drop = FALSE])
446466

447467
## chrom peak data
448-
res <- .h5_read_data(h5f, index = c(2, 1), name = "chrom_peak_data",
468+
res <- .h5_read_data(h5f, id = c(2, 1), name = "chrom_peak_data",
449469
ms_level = c(2L, 2L), read_colnames = TRUE,
450470
read_rownames = TRUE)
451471
expect_equal(length(res), 2)
452472
rownames(b) <- c("CP3", "CP4", "CP5")
453473
expect_equal(unname(res[[1L]]), unname(b))
454-
res <- .h5_read_data(h5f, index = 1, name = "chrom_peak_data",
474+
res <- .h5_read_data(h5f, id = 1, name = "chrom_peak_data",
455475
ms_level = 2L, j = "is_filled")
456476
expect_equal(length(res), 1L)
457477
expect_equal(res[[1L]][, 1], a$is_filled)

tests/testthat/test_XcmsExperimentHdf5.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ test_that("refineChromPeaks,XcmsExperimentHdf5,MergeNeighboringPeaksParam", {
124124
## Compare results from both. Need chromPeaks() function first.
125125
ref <- refineChromPeaks(ref, MergeNeighboringPeaksParam())
126126
ref_pks <- chromPeaks(ref)
127-
res_pks <- .h5_read_data(res@hdf5_file, index = res@sample_id,
127+
res_pks <- .h5_read_data(res@hdf5_file, id = res@sample_id,
128128
ms_level = rep(1L, length(res)),
129129
read_colnames = TRUE, read_rownames = TRUE)
130130
res_pks <- do.call(

0 commit comments

Comments
 (0)