Skip to content

Commit 71129d6

Browse files
committed
feat: add featureValues,XcmsExperimentHdf5
1 parent f8fb0e0 commit 71129d6

8 files changed

+385
-11
lines changed

R/XcmsExperimentHdf5-functions.R

+103-2
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,108 @@ NULL
327327
}
328328

329329

330+
################################################################################
331+
##
332+
## FEATURES THINGS
333+
##
334+
################################################################################
335+
336+
#' Extracts feature values for one sample summing intensities for features
337+
#' with multiple peaks assigned.
338+
#'
339+
#' @param hdf5_file `character(1)` with the HDF5 file name.
340+
#'
341+
#' @param sample_id `character(1)` with the sample ID.
342+
#'
343+
#' @param ms_level `integer(1)` with the MS level.
344+
#'
345+
#' @param n_features `integer(1)` with the total number of features for that
346+
#' MS level.
347+
#'
348+
#' @param method `character(1)` defining the method to be used to tackle
349+
#' features with multiple peaks.
350+
#'
351+
#' @param col_idx `integer` with the index of the peak columns that should be
352+
#' loaded and processed by the functions. The first index **must** be the
353+
#' index of the `value` column (i.e. the column that should be reported).
354+
#' For `method = "maxint"`, the second column should be the column defined
355+
#' with parameter `intensity`, i.e. the column with the intensity values
356+
#' to select the *larger* peak. For `method = "rtmed"` it should be the
357+
#' index of the column `"rt"`.
358+
#'
359+
#' @param filled `logical(1)` whether gap-filled values should be reported or
360+
#' removed.
361+
#'
362+
#' @param rtmed `numeric` with the `"rtmed"` column of the feature definitions.
363+
#' Only used (but required) for `method = "medret"`.
364+
#'
365+
#' @noRd
366+
.h5_feature_values_sample <- function(sample_id, hdf5_file, ms_level,
367+
n_features, method,
368+
col_idx = integer(),
369+
filled = TRUE, rtmed, ...) {
370+
res <- rep(NA_real_, n_features)
371+
sid <- paste0("/", sample_id, "/ms_", ms_level)
372+
vals <- .h5_read_data(hdf5_file, sample_id, name = "chrom_peaks",
373+
ms_level = ms_level, column = col_idx)[[1L]]
374+
fidx <- .h5_read_data(hdf5_file, sample_id, name = "feature_to_chrom_peaks",
375+
ms_level = ms_level)[[1L]]
376+
## remove gap-filled values
377+
if (!filled) {
378+
is_filled <- rhdf5::h5read(hdf5_file,
379+
paste0(sid, "/chrom_peak_data/is_filled"),
380+
drop = TRUE)
381+
vals[is_filled, 1L] <- NA_real_
382+
}
383+
## set/assign single and multiple values.
384+
res[fidx[, 1L]] <- vals[fidx[, 2L], 1L]
385+
if (method == "medret") {
386+
## calculate difference between feature and peak rt
387+
vals[fidx[, 2L], 2L] <- vals[fidx[, 2L], 2L] - rtmed[fidx[, 1L]]
388+
}
389+
## handle duplicates
390+
f <- factor(fidx[, 1L], levels = seq_len(n_features))
391+
pk_idx <- split(fidx[, 2L], f)
392+
idx_multi <- which(lengths(pk_idx) > 1L)
393+
FUN <- switch(
394+
method,
395+
sum = function(z) sum(vals[z, 1L]),
396+
maxint = function(z) vals[z, 1L][which.max(vals[z, 2L])],
397+
medret = function(z) vals[z, 1L][which.min(abs(vals[z, 2L]))])
398+
res[idx_multi] <- vapply(pk_idx[idx_multi], FUN, 1.1)
399+
res
400+
}
330401

402+
#' Get feature values for a specific MS level.
403+
#'
404+
#' @noRd
405+
.h5_feature_values_ms_level <- function(ms_level, x, method, value, intensity,
406+
filled = TRUE) {
407+
cn <- h5read(x@hdf5_file, paste0("/", x@sample_id[1L], "/ms_", ms_level,
408+
"/chrom_peaks_colnames"), drop = TRUE)
409+
col <- switch(method,
410+
sum = value,
411+
medret = c(value, "rt"),
412+
maxint = c(value, intensity))
413+
if (!all(col %in% cn))
414+
stop("Not all requested columns available. Please make sure 'value' ",
415+
"and 'intensity' (if defined) are available columns in the ",
416+
"chrom peak matrix.", call. = FALSE)
417+
col_idx <- match(col, cn)
418+
rtmed <- rhdf5::h5read(x@hdf5_file,
419+
paste0("/features/ms_", ms_level,
420+
"/feature_definitions/rtmed"), drop = TRUE)
421+
rn <- rhdf5::h5read(x@hdf5_file,
422+
paste0("/features/ms_", ms_level,
423+
"/feature_definitions_rownames"), drop = TRUE)
424+
res <- do.call(
425+
cbind, lapply(x@sample_id, .h5_feature_values_sample,
426+
hdf5_file = x@hdf5_file, ms_level = ms_level,
427+
n_features = length(rtmed), method = method,
428+
col_idx = col_idx, filled = filled, rtmed = rtmed))
429+
rownames(res) <- rn
430+
res
431+
}
331432

332433
################################################################################
333434
##
@@ -709,7 +810,7 @@ NULL
709810
h5 <- rhdf5::H5Fopen(h5_file)
710811
on.exit(invisible(rhdf5::H5Fclose(h5)))
711812
FUN <- .h5_write_matrix
712-
if (name %in% c("chrom_peak_data", "feature_definition"))
813+
if (name %in% c("chrom_peak_data", "feature_definitions"))
713814
FUN <- .h5_write_data_frame
714815
comp_level <- .h5_compression_level()
715816
for (i in seq_along(data_list)) {
@@ -725,4 +826,4 @@ NULL
725826
replace = replace)
726827
}
727828
.h5_increment_mod_count(h5)
728-
}
829+
}

R/XcmsExperimentHdf5.R

+82-3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
#' @title xcms result object for very large data sets
44
#'
5+
#' @aliases XcmsExperimentHdf5-class
6+
#'
57
#' @name XcmsExperimentHdf5
68
#'
79
#' @description
@@ -40,6 +42,18 @@
4042
#' chromatographic peak in the chrom peak matrix **of that sample** and
4143
#' MS level.
4244
#'
45+
#' @section Correspondence analysis results:
46+
#'
47+
#' - `featureDefinitions()`: similarly to `featureDefinitions()` for
48+
#' [XcmsExperiment] objects, this method returns a `data.frame` with the
49+
#' characteristics for the defined LC-MS features. The function for
50+
#' `XcmsExperimentHdf5` does however **not** return the `"peakidx"` column
51+
#' with the indices of the chromatographic peaks per feature. Also, the
52+
#' columns are usually returned in alphabetic order.
53+
#'
54+
#' - `featureValues()`: parameter `value = "index"` (i.e. returning the index
55+
#' of the chromatographic peaks per feature) is not supported.
56+
#'
4357
#' @author Johannes Rainerr, Philippine Louail
4458
NULL
4559

@@ -483,7 +497,8 @@ setMethod(
483497
res <- .xmse_group_cpeaks(cps, param = param)
484498
if (!nrow(res))
485499
return(object)
486-
object@features_ms_level <- unique(c(object@features_ms_level, msLevel))
500+
object@features_ms_level <- as.integer(
501+
unique(c(object@features_ms_level, msLevel)))
487502
cpk_idx <- res$peakidx
488503
res$peakidx <- NULL
489504
rownames(res) <- .featureIDs(
@@ -524,5 +539,69 @@ setMethod(
524539
object
525540
})
526541

527-
## featureDefinitions
528-
## featureValues; of only one MS level?
542+
#' @rdname hidden_aliases
543+
setReplaceMethod("featureDefinitions", "XcmsExperimentHdf5",
544+
function(object, value) {
545+
stop("Not implemented for ", class(object)[1L])
546+
})
547+
548+
#' @rdname hidden_aliases
549+
setMethod(
550+
"featureDefinitions", "XcmsExperimentHdf5",
551+
function(object, mz = numeric(), rt = numeric(), ppm = 0,
552+
type = c("any", "within", "apex_within"), msLevel = integer()) {
553+
if (!hasFeatures(object))
554+
return(object@featureDefinitions)
555+
type <- match.arg(type)
556+
if (length(msLevel))
557+
msLevel <- intersect(msLevel, object@features_ms_level)
558+
else msLevel <- object@features_ms_level
559+
if (!length(msLevel))
560+
return(object@featureDefinitions)
561+
fd <- .h5_read_data(object@hdf5_file, rep("features", length(msLevel)),
562+
name = "feature_definitions", ms_level = msLevel,
563+
read_rownames = TRUE)
564+
msl <- rep(msLevel, vapply(fd, nrow, 1L))
565+
fd <- do.call(rbind, fd)
566+
fd$ms_level <- msl
567+
.subset_feature_definitions(fd, mz = mz, rt = rt,
568+
ppm = ppm, type = type)
569+
})
570+
571+
#' @rdname hidden_aliases
572+
setMethod(
573+
"featureValues", "XcmsExperimentHdf5",
574+
function(object, method = c("medret", "maxint", "sum"), value = "into",
575+
intensity = "into", filled = TRUE, missing = NA_real_,
576+
msLevel = integer()) {
577+
if (!length(msLevel)) msLevel <- object@features_ms_level
578+
if (!hasFeatures(object, msLevel = msLevel))
579+
stop("No feature definitions for MS level(s) ", msLevel," present.")
580+
method <- match.arg(method)
581+
if (value == "index")
582+
stop("'featureValues' for 'XcmsExperimentHdf5' does not support ",
583+
"'value = \"index\"'.", call. = FALSE)
584+
if (is.character(missing) && !(missing %in% c("rowmin_half")))
585+
stop("if 'missing' is not 'NA' or a numeric it should",
586+
" be one of: \"rowmin_half\".")
587+
msLevel <- intersect(msLevel, object@features_ms_level)
588+
vals <- do.call(
589+
rbindFill,
590+
lapply(msLevel, .h5_feature_values_ms_level, x = object,
591+
method = method, value = value, intensity = intensity,
592+
filled = filled)
593+
)
594+
colnames(vals) <- basename(fileNames(object))
595+
missing <- missing[1L]
596+
if (!is.na(missing)) {
597+
if (is.numeric(missing))
598+
vals[is.na(vals)] <- missing
599+
if (missing == "rowmin_half")
600+
for (i in seq_len(nrow(vals))) {
601+
nas <- is.na(vals[i, ])
602+
if (any(nas))
603+
vals[i, nas] <- min(vals[i, ], na.rm = TRUE) / 2
604+
}
605+
}
606+
vals
607+
})

man/XcmsExperiment.Rd

+4-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/XcmsExperimentHdf5.Rd

+26-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/findChromPeaks.Rd

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)