Description
Hello,
I am having an issue when I try to add colors for continuous clinical features to my Oncoplot.
When I order the samples in a way other than based on the continuous clinical feature, some of the colors of the continuous value are not correctly anymore.
More specifically, in my oncoplot I want to add gene expression values as clinical features. I want to add them as continuous values.
When I use sortByAnnotation, the colors applied to the continuous clinical feature are correct.
However, when I sort the samples in another way, for example with sampleOrder, the colors do not all correspond with the continuous clinical feature anymore.
R does not give any error.
I have added some example data that can be used to reproduce the issue.
Thanks for a great package!
Session info:
R version 4.4.1 (2024-06-14 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 10 x64 (build 19045)
Matrix products: default
locale:
[1] LC_COLLATE=Dutch_Netherlands.utf8 LC_CTYPE=Dutch_Netherlands.utf8 LC_MONETARY=Dutch_Netherlands.utf8
[4] LC_NUMERIC=C LC_TIME=Dutch_Netherlands.utf8
time zone: Europe/Amsterdam
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] wesanderson_0.3.7 RColorBrewer_1.1-3 ggsci_3.2.0 admisc_0.37 maftools_2.22.0 lubridate_1.9.3
[7] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
[13] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0 readxl_1.4.3 data.table_1.16.0
loaded via a namespace (and not attached):
[1] utf8_1.2.4 generics_0.1.3 stringi_1.8.4 lattice_0.22-6 hms_1.1.3 magrittr_2.0.3
[7] grid_4.4.1 timechange_0.3.0 cellranger_1.1.0 plyr_1.8.9 Matrix_1.7-0 survival_3.6-4
[13] fansi_1.0.6 scales_1.3.0 cli_3.6.3 crayon_1.5.3 rlang_1.1.4 munsell_0.5.1
[19] splines_4.4.1 withr_3.0.1 DNAcopy_1.80.0 tools_4.4.1 reshape2_1.4.4 tzdb_0.4.0
[25] colorspace_2.1-1 vctrs_0.6.5 R6_2.5.1 lifecycle_1.0.4 pkgconfig_2.0.3 pillar_1.9.0
[31] gtable_0.3.5 glue_1.7.0 Rcpp_1.0.14 tidyselect_1.2.1 rstudioapi_0.16.0 compiler_4.4.1
Command:
library(data.table)
library(readxl)
library(tidyverse)
library(maftools)
library(dplyr)
library(admisc)
library(ggsci)
library(RColorBrewer)
# Get data
Data <- read_excel("Data_oncoplot_example.xlsx", sheet = 1)
# FUNCTION
getLevels = function(data,col){
levels = levels(factor(data[[col]]))
return(levels)
}
# Get stats and types
somMutStats_codeEff = Data %>% group_by(`Study ID`, Gene, Effect) %>% tally()
somMutStats_type = Data %>% group_by(`Study ID`, Gene, type) %>% tally()
typeSom = getLevels(Data, "type")
codeEffSom = getLevels(Data, "Effect")
# Get Summary
summary = merge(reshape2::dcast(somMutStats_type, `Study ID` + Gene~type),
reshape2::dcast(somMutStats_codeEff, `Study ID` + Gene ~ Effect),by = c("Study ID","Gene"),all = TRUE)
summary[is.na(summary)] = 0
summary$totalSomaticMutations = apply(summary[,codeEffSom],1,sum)
summary = summary %>% dplyr::rename(Tumor_Sample_Barcode = `Study ID`, Hugo_Symbol = Gene)
# MAF Formatted data
maf = new("MAF")
maf@data = data.table(Data)
maf@data <- maf@data %>% dplyr::rename(Tumor_Sample_Barcode = `Study ID`,
Hugo_Symbol = Gene,
Variant_Type = type,
Variant_Classification = Effect,
Protein_Change = canonicalHgvsProteinImpact)
[email protected] = data.table(summary %>% group_by(Tumor_Sample_Barcode) %>%
summarise(Variants = sum(totalSomaticMutations)))
[email protected] = data.table(summary %>% group_by(Tumor_Sample_Barcode) %>%
summarise_at(c(typeSom,"totalSomaticMutations"),sum)) %>%
dplyr::rename(total = totalSomaticMutations)
[email protected] = data.table(maf@data %>%
dplyr::select(Tumor_Sample_Barcode,Variant_Classification) %>%
group_by(Tumor_Sample_Barcode,Variant_Classification) %>%
tally() %>% reshape2::dcast(Tumor_Sample_Barcode~Variant_Classification))
[email protected][is.na([email protected])] = 0
[email protected]$total = rowSums([email protected][,2:5])
t1 = summary %>% group_by(Hugo_Symbol) %>% filter(totalSomaticMutations > 0) %>%
summarise(MutatedSamples = length(unique(Tumor_Sample_Barcode))) %>% mutate(AlteredSamples = MutatedSamples)
sumCheck = maf@data
sumCheck_t = merge(reshape2::dcast(sumCheck[,c("Hugo_Symbol","Variant_Type")],Hugo_Symbol~Variant_Type),reshape2::dcast(sumCheck[,c("Hugo_Symbol",
"Variant_Classification")],Hugo_Symbol~Variant_Classification),by = "Hugo_Symbol",all = TRUE) %>%
mutate(total = rowSums(select_at(.,codeEffSom)))
t = sumCheck_t %>% group_by(Hugo_Symbol) %>% summarise_at(c(codeEffSom,"total"),sum)
[email protected] = data.table(merge(t,t1,by = "Hugo_Symbol",all = TRUE))
temp = data.frame(Summary = rbind(NCBI_Build = 37, Center = NA,t([email protected][,1]
%>% summarise(Samples = n())),t([email protected][,1] %>% summarise(nGenes = n())),
t([email protected][,-c("Hugo_Symbol","AlteredSamples","MutatedSamples")] %>% summarise_all(funs(sum)))))
maf@summary = data.table(cbind(ID = row.names(temp),summary = temp$Summary))
[email protected] = maf@data
Clinical_data <- read_excel("Data_oncoplot_example.xlsx", sheet = 2)
Clinical_data <- Clinical_data %>% dplyr::rename(Tumor_Sample_Barcode = `Study ID`)
Clinical_data <- as.data.table(Clinical_data[,c(1,3,4,6:9,10,11)])
[email protected] = Clinical_data
colsVC1 = pal_jco()(10)
names(colsVC1) = c(
'Splice-site',
'CNV - amplification',
'CNV - deletion',
'Nonsense',
'Multi_Hit',
'Gene fusion',
'Missense',
'Frameshift',
'In-frame indel',
'Promoter mutations')
Genes <- c("TP53",
"PIK3CA",
"HRAS",
"ERBB2",
"PTEN",
"CDK12",
"NOTCH1",
"BRAF",
"NF1",
"AKT1",
"BRCA2",
"MAP3K1",
"NOTCH2",
"JAK2",
"NRAS",
"MYB::NFIB")
GrowthPatterncol = brewer.pal(n = 3, name = "Dark2")
GrowthPatterncol = c("#394165","#DCA258", "#EEDA9D")
HGTcol = c("Red","Blue")
names(GrowthPatterncol)= c("Cribriform", "Tubular", "Solid")
names(HGTcol)= c("Yes", "No" )
annocol = list(Growth_pattern = GrowthPatterncol, High_grade_transformation = HGTcol, TP63_expression = "Blues", MYC_expression = "YlGnBu")
nsTMB <- data.frame([email protected][,c("Tumor_Sample_Barcode","nsTMB")])
nsTMB <- dplyr::distinct(nsTMB)
sorted_samples <- Clinical_data$Tumor_Sample_Barcode
oncoplot(maf = maf,
colors = colsVC1,
showTitle = FALSE,
bgCol = "grey90",
genes = Genes,
keepGeneOrder = TRUE,
clinicalFeatures = c("TP63_expression", "MYC_expression", "Growth_pattern", "High_grade_transformation"),
sortByAnnotation = TRUE,
sampleOrder = sorted_samples,
annotationColor = annocol,
topBarData = nsTMB,
gene_mar = 10,
fontSize = 0.7,
legendFontSize = 1.1,
annotationFontSize = 1.1,
drawColBar = TRUE,
drawRowBar = FALSE,
showPct = FALSE,
showTumorSampleBarcodes = TRUE)