Skip to content

Commit 182bb8e

Browse files
author
Karin Schork
committed
started working on onoff-heatmap (WIP)
1 parent 39c4afc commit 182bb8e

File tree

2 files changed

+152
-0
lines changed

2 files changed

+152
-0
lines changed

R/WIP_OnOff_Heatmap.R

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
2+
3+
#' Calculate on/off proteins
4+
#'
5+
#' @param D data set containg only protein intensities
6+
#' @param id data frame containing ID columns
7+
#' @param group factor containing the groups
8+
#' @param max_vv_off off: < max_vv_off valid values
9+
#' @param min_vv_on on: > min_vv_on valid values
10+
#' @param protein_id_col column on id containing the protein IDs used for mapping
11+
#'
12+
#' @return data frame with number of valid values per group (absolute and relative) and on/off status
13+
#' @export
14+
#'
15+
#' @examples # TODO
16+
calculate_onoff <- function(D, id, group, max_vv_off, min_vv_on, protein_id_col = 1) {
17+
18+
### TODO: check that protein_id_col has only unique entries, otherwise the on/off calculation will fail
19+
20+
group <- droplevels(group)
21+
nr_groups <- length(levels(group))
22+
23+
#Gene.names <- id[, gene_names_col]
24+
Protein.IDs <- id[, protein_id_col]
25+
26+
## converting to long format
27+
D_long <- tidyr::pivot_longer(data = cbind(Protein.IDs = Protein.IDs, D), cols = colnames(D))
28+
D_long$group <- group[match(D_long$name, colnames(D))]
29+
30+
## calculate on/off values
31+
D_onoff <- D_long %>% dplyr::group_by(group, Protein.IDs) %>%
32+
dplyr::summarise(valid_values = sum(!is.na(value)), valid_values_rel = mean(!is.na(value)))
33+
34+
## convert to wide format again
35+
D_onoff_wide <- tidyr::pivot_wider(D_onoff,
36+
id_cols = Protein.IDs,
37+
values_from = c(valid_values, valid_values_rel),
38+
names_from = group)
39+
ind <- match(D_onoff_wide$Protein.IDs, Protein.IDs)
40+
41+
cols <- colnames(D_onoff_wide)[2:(nr_groups+1)]
42+
43+
44+
### calculate, if protein is on/off
45+
res_onoff <- apply(D_onoff_wide[,cols], 1, function(x) {
46+
isonoff <- any(x <= max_vv_off) & any(x >= min_vv_on)
47+
return(isonoff)
48+
})
49+
50+
51+
D_onoff_wide_tmp <- cbind(D_onoff_wide, isonoff = res_onoff)
52+
D_onoff_wide_tmp2 <- cbind(id[ind,], D_onoff_wide_tmp[,-1])
53+
return(D_onoff_wide_tmp2)
54+
}
55+
56+
57+
58+
59+
60+
61+
################################################################################
62+
################################################################################
63+
################################################################################
64+
#
65+
66+
Onoff_plus_heatmap <- function(RES_onoff,
67+
protein_name_column = "Gene.names",
68+
relative = FALSE){
69+
70+
require(tidyverse)
71+
72+
## choose only the rows with on/off proteins
73+
RES_onoff2 <- RES_onoff[RES_onoff$isonoff, ]
74+
75+
#### TODO: D_onoff_wide2 ist leer, weil isonoff für alles Falsch ist
76+
77+
validvalue_cols <- setdiff(colnames(RES_onoff2)[grep("valid_values_", colnames(RES_onoff2))], colnames(RES_onoff2)[grep("valid_values_rel_", colnames(RES_onoff2))])
78+
79+
### TODO: schlauere Methode um doppelte Proteinname zu behandeln? (Das sind meist die leeren! -> überschreiben mit protein accession z.B.)
80+
RES_onoff2[, protein_name_column] <- make.names(RES_onoff2[, protein_name_column], unique = TRUE)
81+
82+
83+
RES_onoff2_long <- as.data.frame(pivot_longer(RES_onoff2, cols = all_of(validvalue_cols), names_to = "group"))
84+
85+
86+
if (relative) {
87+
RES_onoff2_long$group <- str_replace(RES_onoff2_long$group, "valid_values_rel_", "")
88+
} else {
89+
RES_onoff2_long$group <- str_replace(RES_onoff2_long$group, "valid_values_", "")
90+
}
91+
92+
### TODO: level Reihenfolge der Gruppe nutzen statt alphabetisch
93+
#RES_onoff2_long$group <- factor(RES_onoff2_long$group, levels = levels(group))
94+
95+
96+
### TODO: clustering für Reihenfolge/order der Proteine?
97+
ord <- do.call(order, args = c(as.list(RES_onoff2[, validvalue_cols]), decreasing = TRUE))
98+
#cl <- hclust(dist(D_onoff_wide2[, cols], method = "manhattan"), method="complete")
99+
100+
101+
RES_onoff2_long[, protein_name_column] <- factor(RES_onoff2_long[, protein_name_column],
102+
levels = RES_onoff2[, protein_name_column][ord])
103+
104+
pl <- ggplot(data = RES_onoff2_long, aes(x = group, y = Gene.names, fill = value)) + ## TODO: Gene.names
105+
geom_tile() + ylab("Gene name") + xlab("group") + theme_bw()
106+
107+
#if (onoffGreaterThanEqual < 1 | !is.null(onoffdiff)) {
108+
pl <- pl + scale_fill_gradient(limits = c(0,max(RES_onoff2_long$value)), low = "white", high = "forestgreen") #
109+
pl <- pl + theme(axis.text = element_text(size = rel(1.8)),
110+
axis.title = element_text(size = rel(1.8)),
111+
legend.title = element_text(size=rel(1.8)),
112+
legend.text = element_text(size=rel(1.8)))
113+
pl
114+
115+
return(pl)
116+
117+
}
118+
119+
120+
121+
122+

man/calculate_onoff.Rd

Lines changed: 30 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)