Skip to content

Colors applied to continuous clinicalFeatures in Oncoplot are not right #1088

Open
@Willem50

Description

@Willem50

Hello,

I am having an issue when I try to add colors for continuous clinical features to my Oncoplot.

When I order the samples in a way other than based on the continuous clinical feature, some of the colors of the continuous value are not correctly anymore.

More specifically, in my oncoplot I want to add gene expression values as clinical features. I want to add them as continuous values.

When I use sortByAnnotation, the colors applied to the continuous clinical feature are correct.
However, when I sort the samples in another way, for example with sampleOrder, the colors do not all correspond with the continuous clinical feature anymore.

R does not give any error.

I have added some example data that can be used to reproduce the issue.

Data_oncoplot_example.xlsx

Thanks for a great package!

Session info:
R version 4.4.1 (2024-06-14 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 10 x64 (build 19045)

Matrix products: default

locale:
[1] LC_COLLATE=Dutch_Netherlands.utf8 LC_CTYPE=Dutch_Netherlands.utf8 LC_MONETARY=Dutch_Netherlands.utf8
[4] LC_NUMERIC=C LC_TIME=Dutch_Netherlands.utf8

time zone: Europe/Amsterdam
tzcode source: internal

attached base packages:
[1] stats graphics grDevices utils datasets methods base

other attached packages:
[1] wesanderson_0.3.7 RColorBrewer_1.1-3 ggsci_3.2.0 admisc_0.37 maftools_2.22.0 lubridate_1.9.3
[7] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
[13] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0 readxl_1.4.3 data.table_1.16.0

loaded via a namespace (and not attached):
[1] utf8_1.2.4 generics_0.1.3 stringi_1.8.4 lattice_0.22-6 hms_1.1.3 magrittr_2.0.3
[7] grid_4.4.1 timechange_0.3.0 cellranger_1.1.0 plyr_1.8.9 Matrix_1.7-0 survival_3.6-4
[13] fansi_1.0.6 scales_1.3.0 cli_3.6.3 crayon_1.5.3 rlang_1.1.4 munsell_0.5.1
[19] splines_4.4.1 withr_3.0.1 DNAcopy_1.80.0 tools_4.4.1 reshape2_1.4.4 tzdb_0.4.0
[25] colorspace_2.1-1 vctrs_0.6.5 R6_2.5.1 lifecycle_1.0.4 pkgconfig_2.0.3 pillar_1.9.0
[31] gtable_0.3.5 glue_1.7.0 Rcpp_1.0.14 tidyselect_1.2.1 rstudioapi_0.16.0 compiler_4.4.1

Command:

library(data.table)
library(readxl)
library(tidyverse)
library(maftools)
library(dplyr)
library(admisc)
library(ggsci)
library(RColorBrewer)

# Get data
Data <- read_excel("Data_oncoplot_example.xlsx", sheet = 1)

# FUNCTION
getLevels = function(data,col){
  levels = levels(factor(data[[col]]))
  return(levels)
}

# Get stats and types
somMutStats_codeEff = Data %>% group_by(`Study ID`, Gene, Effect) %>% tally()
somMutStats_type = Data %>% group_by(`Study ID`, Gene, type) %>% tally()

typeSom = getLevels(Data, "type")
codeEffSom = getLevels(Data, "Effect")

# Get Summary
summary = merge(reshape2::dcast(somMutStats_type, `Study ID` + Gene~type),
                reshape2::dcast(somMutStats_codeEff, `Study ID` + Gene ~ Effect),by = c("Study ID","Gene"),all = TRUE)
summary[is.na(summary)] = 0
summary$totalSomaticMutations = apply(summary[,codeEffSom],1,sum)
summary = summary %>% dplyr::rename(Tumor_Sample_Barcode = `Study ID`, Hugo_Symbol = Gene)

# MAF Formatted data
maf = new("MAF")

maf@data = data.table(Data)
maf@data <- maf@data %>% dplyr::rename(Tumor_Sample_Barcode = `Study ID`,
                                       Hugo_Symbol = Gene, 
                                       Variant_Type = type,
                                       Variant_Classification = Effect,
                                       Protein_Change = canonicalHgvsProteinImpact)

[email protected] = data.table(summary %>% group_by(Tumor_Sample_Barcode) %>%
                                       summarise(Variants = sum(totalSomaticMutations)))

[email protected] = data.table(summary %>% group_by(Tumor_Sample_Barcode) %>%
                                        summarise_at(c(typeSom,"totalSomaticMutations"),sum)) %>% 
  dplyr::rename(total = totalSomaticMutations)


[email protected] = data.table(maf@data %>% 
                                                  dplyr::select(Tumor_Sample_Barcode,Variant_Classification) %>% 
                                                  group_by(Tumor_Sample_Barcode,Variant_Classification) %>% 
                                                  tally() %>% reshape2::dcast(Tumor_Sample_Barcode~Variant_Classification))

[email protected][is.na([email protected])] = 0
[email protected]$total = rowSums([email protected][,2:5])

t1 = summary %>% group_by(Hugo_Symbol) %>% filter(totalSomaticMutations > 0) %>% 
  summarise(MutatedSamples = length(unique(Tumor_Sample_Barcode))) %>% mutate(AlteredSamples = MutatedSamples)

sumCheck = maf@data

sumCheck_t = merge(reshape2::dcast(sumCheck[,c("Hugo_Symbol","Variant_Type")],Hugo_Symbol~Variant_Type),reshape2::dcast(sumCheck[,c("Hugo_Symbol",
                                                                                                                                    "Variant_Classification")],Hugo_Symbol~Variant_Classification),by = "Hugo_Symbol",all = TRUE) %>% 
  mutate(total = rowSums(select_at(.,codeEffSom)))

t = sumCheck_t %>% group_by(Hugo_Symbol) %>% summarise_at(c(codeEffSom,"total"),sum)

[email protected] = data.table(merge(t,t1,by = "Hugo_Symbol",all = TRUE)) 

temp = data.frame(Summary = rbind(NCBI_Build = 37, Center = NA,t([email protected][,1] 
                                                                 %>% summarise(Samples = n())),t([email protected][,1] %>% summarise(nGenes = n())),
                                  t([email protected][,-c("Hugo_Symbol","AlteredSamples","MutatedSamples")] %>% summarise_all(funs(sum)))))

maf@summary = data.table(cbind(ID = row.names(temp),summary = temp$Summary))
[email protected] = maf@data

Clinical_data <- read_excel("Data_oncoplot_example.xlsx", sheet = 2)
Clinical_data <- Clinical_data %>% dplyr::rename(Tumor_Sample_Barcode = `Study ID`)
Clinical_data <- as.data.table(Clinical_data[,c(1,3,4,6:9,10,11)])
[email protected] = Clinical_data

colsVC1 = pal_jco()(10)
names(colsVC1) = c(
  'Splice-site',
  'CNV - amplification',
  'CNV - deletion',
  'Nonsense',  
  'Multi_Hit',
  'Gene fusion',
  'Missense',
  'Frameshift',
  'In-frame indel',
  'Promoter mutations')

Genes <- c("TP53",
           "PIK3CA",
           "HRAS",
           "ERBB2",
           "PTEN",
           "CDK12",
           "NOTCH1",
           "BRAF",
           "NF1",
           "AKT1",
           "BRCA2",
           "MAP3K1",
           "NOTCH2",
           "JAK2",
           "NRAS",
           "MYB::NFIB")


GrowthPatterncol = brewer.pal(n = 3, name = "Dark2")
GrowthPatterncol = c("#394165","#DCA258", "#EEDA9D")

HGTcol = c("Red","Blue")

names(GrowthPatterncol)= c("Cribriform", "Tubular", "Solid")
names(HGTcol)= c("Yes", "No" )
annocol = list(Growth_pattern = GrowthPatterncol, High_grade_transformation = HGTcol, TP63_expression = "Blues", MYC_expression = "YlGnBu")

nsTMB <- data.frame([email protected][,c("Tumor_Sample_Barcode","nsTMB")])
nsTMB <- dplyr::distinct(nsTMB)

sorted_samples <- Clinical_data$Tumor_Sample_Barcode

oncoplot(maf = maf,
         colors = colsVC1,
         showTitle = FALSE,
         bgCol = "grey90",
         genes = Genes,
         keepGeneOrder = TRUE,
         clinicalFeatures = c("TP63_expression", "MYC_expression", "Growth_pattern", "High_grade_transformation"),
         sortByAnnotation = TRUE,
         sampleOrder = sorted_samples,
         annotationColor = annocol,
         topBarData = nsTMB,
         gene_mar = 10,
         fontSize = 0.7,
         legendFontSize = 1.1,
         annotationFontSize = 1.1,
         drawColBar = TRUE,
         drawRowBar = FALSE,
         showPct = FALSE,
         showTumorSampleBarcodes = TRUE)

Metadata

Metadata

Assignees

No one assigned

    Labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions