|
| 1 | +--- |
| 2 | +title: "Report of Parameters used with V-SVA" |
| 3 | +output: html_document |
| 4 | +date: "`r Sys.time()`" |
| 5 | +fig_width: 12 |
| 6 | +fig_height: 12 |
| 7 | +params: |
| 8 | + dt: NA |
| 9 | +--- |
| 10 | + |
| 11 | +##Expression Data and Sample Metadata Provided |
| 12 | +```{r} |
| 13 | +# dimensions of expression matrix |
| 14 | +if (!is.null(params$dt$exp_norm)) { |
| 15 | + dim(params$dt$exp_norm) |
| 16 | +} |
| 17 | +# dimensions of metadata, if not provided only Genes_Detected and Log_Total_Counts determined |
| 18 | +if (!is.null(params$dt$meta_df)) { |
| 19 | + dim(params$dt$meta_df) |
| 20 | + head(params$dt$meta_df) |
| 21 | +} |
| 22 | +
|
| 23 | +``` |
| 24 | + |
| 25 | +##Quality Control of Expression Data: Number of Features Detected in each Sample |
| 26 | +```{r, eval=FALSE} |
| 27 | +# binarize data to determine which features are detected in each sample |
| 28 | +bin_data <- params$dt$exp_norm |
| 29 | +bin_data[bin_data < 1] <- 0 |
| 30 | +bin_data[bin_data >= 1] <- 1 |
| 31 | +num.exp <- apply(bin_data,2,sum) |
| 32 | +params$dt$detect_num <- num.exp |
| 33 | +``` |
| 34 | + |
| 35 | +```{r} |
| 36 | +if (!is.null(params$dt$detect_num)) { |
| 37 | + summ <- summary(params$dt$detect_num) |
| 38 | + # histogram of number of features detected in each sample |
| 39 | + hist(params$dt$detect_num, col = "dodgerblue", main="", |
| 40 | + ylab = "Samples (n)", xlab = "Number of features detected in each sample") |
| 41 | + legend("topright", legend = paste(names(summ), round(summ, digits = 2), sep = " "), title = "Summary of features detected") |
| 42 | +} |
| 43 | +``` |
| 44 | + |
| 45 | +##Feature Pre-processing Options (normalization, sample and feature filtering) |
| 46 | +#### Remove samples based on number of detected features |
| 47 | +```{r} |
| 48 | +# were cells filtered based on genes detected? |
| 49 | +if (params$dt$cell_filter_choice) { |
| 50 | + print(paste("Removing cells that have less than ", params$dt$cellfilt_number, " features detected,", sep = "")) |
| 51 | +} else { |
| 52 | + print("Cells were not filtered based on features detected") |
| 53 | +} |
| 54 | +``` |
| 55 | + |
| 56 | +```{r, eval=FALSE} |
| 57 | +# code to filter cells based on features detected |
| 58 | +num.sel <- params$dt$detect_num[params$dt$detect_num >= params$dt$cellfilt_number] |
| 59 | +# subset data |
| 60 | +params$dt$exp_norm <- params$dt$exp_norm[, names(num.sel)] |
| 61 | +params$dt$meta_df <- params$dt$meta_df[names(num.sel), ] |
| 62 | +
|
| 63 | +``` |
| 64 | + |
| 65 | +#### Down-sample the number of samples included in the analysis (to increase computational efficiency) |
| 66 | +```{r} |
| 67 | +# were cells down-sampled to a certain number to speed up computation? |
| 68 | +if (params$dt$cell_downsample_choice) { |
| 69 | + print(paste("Cells were down-sampled to ", params$dt$cell_downsample_number, " cells", sep = "")) |
| 70 | +} else { |
| 71 | + print("Cells were not down-sampled") |
| 72 | +} |
| 73 | +``` |
| 74 | + |
| 75 | +```{r, eval=FALSE} |
| 76 | +# code to down-sample cells |
| 77 | +set.seed(1) |
| 78 | +dw_samp <- base::sample(x = 1:ncol(params$dt$exp_norm), size = params$dt$cell_downsample_number) |
| 79 | +# subset data |
| 80 | +params$dt$exp_norm <- params$dt$exp_norm[, dw_samp] |
| 81 | +params$dt$meta_df <- params$dt$meta_df[dw_samp, ] |
| 82 | +``` |
| 83 | + |
| 84 | +#### Remove features not detected in samples |
| 85 | +```{r} |
| 86 | +# were features filtered? |
| 87 | +if (params$dt$gene_filter_choice) { |
| 88 | + print(paste("Features were filtered using ", |
| 89 | + params$dt$gene_filter_method, |
| 90 | + ".", nrow(params$dt$exp_norm), " Features with ", params$dt$gene_count_num, |
| 91 | + " or more counts in at least ", params$dt$gene_cell_num, |
| 92 | + " cells were retained" , sep = "")) |
| 93 | +} else { |
| 94 | + print("Features were not filtered based on detection rate in samples") |
| 95 | +} |
| 96 | + |
| 97 | +``` |
| 98 | + |
| 99 | +#### Choose normalization method for expression data |
| 100 | +```{r, eval=FALSE} |
| 101 | +# code for gene filtering methods: all code is displayed regardless of method chosen |
| 102 | +filter <- apply(params$dt$exp_norm, 1, function(x) length(x[x>isolate(input$Count_num)])>=isolate(input$Cell_num)) |
| 103 | +params$dt$exp_norm <- params$dt$exp_norm[filter,] |
| 104 | +# normalize the data |
| 105 | +# using CPM method |
| 106 | +if (isolate(input$norm_method) == "CPM") { |
| 107 | + params$dt$exp_norm <- edgeR::cpm(params$dt$exp_norm) |
| 108 | + # using quantile normalization method |
| 109 | +} else if (isolate(input$norm_method) == "Quantile") { |
| 110 | + params$dt$exp_norm <- normalize.quantiles(params$dt$exp_norm) |
| 111 | + # scran method |
| 112 | +} else if (isolate(input$norm_method) == "scran") { |
| 113 | + sce <- SingleCellExperiment(list(counts=params$dt$exp_norm)) |
| 114 | + sce <- computeSumFactors(sce) |
| 115 | + sce <- normalize(sce) |
| 116 | + params$dt$exp_norm <- exprs(sce) |
| 117 | + # no normalization |
| 118 | +} else if (isolate(input$norm_method) == "None") { |
| 119 | + params$dt$exp_norm <- params$dt$exp_norm |
| 120 | +} |
| 121 | +``` |
| 122 | + |
| 123 | +##Surrogate Variable Analysis (SVA) |
| 124 | +```{r} |
| 125 | +# which method was used |
| 126 | +if (!is.null(params$dt$sva_method_use)) { |
| 127 | + print(paste("The surrogate variable analysis method chosen was: ", params$dt$sva_method_use, sep = "")) |
| 128 | +} |
| 129 | +
|
| 130 | +# which known factors were adjusted for |
| 131 | +if (!is.null(params$dt$known_factors_use)) { |
| 132 | + print(paste("The following known factor(s) were adjusted for: ", params$dt$known_factors_use, sep = "")) |
| 133 | +} |
| 134 | +
|
| 135 | +# the number of SV's |
| 136 | +if (!is.null(params$dt$iasva.res)) { |
| 137 | + print(paste("The number of SV's/Factors identified: ", ncol(params$dt$iasva.res$sv), sep = "")) |
| 138 | +} |
| 139 | +``` |
| 140 | + |
| 141 | +```{r, eval=FALSE} |
| 142 | +# code for SVA methods |
| 143 | +# create model matrix with known factors to adjust for |
| 144 | +id_mod <- which(colnames(params$dt$meta_df) %in% params$dt$known_factors_use) |
| 145 | +if (length(id_mod) > 1) { |
| 146 | + formdf1 <- as.formula(paste("~", colnames(params$dt$meta_df)[id_mod][1], "+", paste(colnames(params$dt$meta_df)[id_mod[2:length(id_mod)]],collapse="+"), sep = "")) |
| 147 | + mod <- model.matrix(formdf1, data = params$dt$meta_df) |
| 148 | +} else { |
| 149 | + varf1 <- as.factor(params$dt$meta_df[, id_mod]) |
| 150 | + mod <- model.matrix(~varf1, data = params$dt$meta_df) |
| 151 | +} |
| 152 | +# create summarized experiment for expression matrix to later use for marker gene identification |
| 153 | +summ_exp <- SummarizedExperiment(assays = as.matrix(params$dt$exp_norm)) |
| 154 | +params$dt$summ_exp <- summ_exp |
| 155 | + |
| 156 | +# if user chose IA-SVA, then perform following |
| 157 | +if (isolate(params$dt$sva_method_use == "IA-SVA")) { |
| 158 | +# depending on which ia-sva parameters were chosen, evaluate |
| 159 | + if (isolate(input$iasva_param == "Percentage Threshold")) { |
| 160 | + params$dt$iasva.res <- fast_iasva(summ_exp, mod[,-1, drop = F], verbose=FALSE, |
| 161 | + pct.cutoff = isolate(input$pct_cutt), num.sv = NULL) |
| 162 | + } else if (isolate(input$iasva_param == "Number of SVs")) { |
| 163 | + params$dt$iasva.res <- fast_iasva(summ_exp, mod[,-1, drop = F], verbose=FALSE, |
| 164 | + pct.cutoff = isolate(input$pct_cutt), num.sv = isolate(input$num_of_svs)) |
| 165 | + } |
| 166 | + # else if choose SVA method |
| 167 | + } else if (isolate(params$dt$sva_method_use == "SVA")) { |
| 168 | + # perform sva analysis with specified svs |
| 169 | + sva.res <- svaseq(params$dt$exp_norm, mod = mod, mod0 = mod[,1], n.sv = isolate(input$sva_num)) |
| 170 | + colnames(sva.res$sv) <- paste("SV", 1:ncol(sva.res$sv), sep = "") |
| 171 | + params$dt$iasva.res <- sva.res |
| 172 | + |
| 173 | + # else if choose zinb-wave method |
| 174 | + } else if (isolate(params$dt$sva_method_use == "ZINB-WaVE")) { |
| 175 | + # perform analysis with specified latent factors |
| 176 | + zinb.matrix <- params$dt$exp_norm |
| 177 | + # coerce to integer |
| 178 | + mode(zinb.matrix) <- "integer" |
| 179 | + zinb.res <- zinbFit(Y = zinb.matrix, X = mod[,-1, drop = F], K = isolate(input$zinb_num)) |
| 180 | + # extract factors |
| 181 | + zinb.fac <- getW(zinb.res) |
| 182 | + colnames(zinb.fac) <- paste("SV", 1:ncol(zinb.fac), sep = "") |
| 183 | + params$dt$iasva.res <- list(sv = zinb.fac) |
| 184 | + } |
| 185 | + |
| 186 | +``` |
| 187 | + |
| 188 | +## Correlation Plot of SV's and Sample Metadata |
| 189 | +```{r} |
| 190 | +# change factors to numeric for correlation |
| 191 | +if (!is.null(params$dt$meta_df) & !is.null(params$dt$iasva.res)) { |
| 192 | + meta_sel <- params$dt$meta_df |
| 193 | +for (jcol in 1:ncol(meta_sel)) { |
| 194 | + meta_sel[,jcol] <- as.numeric(as.factor(meta_sel[,jcol])) |
| 195 | +} |
| 196 | +iasva_vars <- cbind(params$dt$iasva.res$sv, meta_sel) |
| 197 | +# need to append column names to matrix |
| 198 | +colnames(iasva_vars) <- c(paste("SV", 1:ncol(params$dt$iasva.res$sv), sep = ""), |
| 199 | + colnames(params$dt$meta_df)) |
| 200 | +col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA")) |
| 201 | +corrplot(abs(cor(iasva_vars)), type = "upper", method = "color", |
| 202 | + col = col(200), number.cex = 1, |
| 203 | + addCoef.col = "black", |
| 204 | + tl.col = "black", tl.srt = 90, diag = FALSE) |
| 205 | +} |
| 206 | +
|
| 207 | +``` |
| 208 | + |
| 209 | +## Paired SV Plots |
| 210 | +```{r} |
| 211 | +if (!is.null(params$dt$iasva.res)) { |
| 212 | + iasva.sv <- as.data.frame(params$dt$iasva.res$sv) |
| 213 | +rownames(iasva.sv) <- colnames(params$dt$exp_norm) |
| 214 | +pairs(iasva.sv, main="", pch=20, cex=0.5, lower.panel = NULL) |
| 215 | +} |
| 216 | +
|
| 217 | +``` |
| 218 | + |
| 219 | +## Interactive Paired SV Plots |
| 220 | +```{r} |
| 221 | +if (!is.null(params$dt$iasva.res)) { |
| 222 | + iasva.sv <- as.data.frame(params$dt$iasva.res$sv) |
| 223 | + rownames(iasva.sv) <- colnames(params$dt$exp_norm) |
| 224 | + plot_ly(iasva.sv, x = ~SV1, y = ~SV2, type = "scatter", |
| 225 | + mode = "markers", text = paste("Cell ID: ", rownames(iasva.sv), sep = ""), |
| 226 | + marker = list( |
| 227 | + opacity = 0.5 |
| 228 | + ) |
| 229 | + ) |
| 230 | +} |
| 231 | +``` |
| 232 | + |
| 233 | +## Identifying Marker Features associated with SV's |
| 234 | +```{r, eval=FALSE} |
| 235 | +# identify which svs were chosen for marker analysis |
| 236 | +id_sv_mark <- which(colnames(params$dt$iasva.res$sv) %in% isolate(input$SV_marks)) |
| 237 | +marker_genes <- iasva::find_markers(Y = params$dt$summ_exp, |
| 238 | + iasva.sv = as.matrix(params$dt$iasva.res$sv[, id_sv_mark, drop=FALSE]), |
| 239 | + rsq.cutoff = isolate(input$rsqcutoff), method = isolate(input$mark_sig), sig.cutoff = isolate(input$mark_cutoff)) |
| 240 | +params$dt$markers <- marker_genes |
| 241 | +``` |
| 242 | + |
| 243 | +## Heatmap of Marker Features Determined from SVA Analysis |
| 244 | +```{r} |
| 245 | +if (!is.null(params$dt$markers)) { |
| 246 | + all_marks <- params$dt$markers$All_Unique_Markers |
| 247 | +log_mat <- log(as.matrix(params$dt$exp_norm[all_marks,])+1) |
| 248 | +# remove any NA's from matrix |
| 249 | +log_mat <- log_mat[complete.cases(log_mat),] |
| 250 | +pheatmap(log_mat, show_colnames = FALSE, |
| 251 | + show_rownames = TRUE, |
| 252 | + clustering_method = "ward.D2") |
| 253 | +} |
| 254 | +``` |
| 255 | + |
| 256 | +## Dimension Reduction |
| 257 | +```{r, eval=FALSE} |
| 258 | +set.seed(1) |
| 259 | +# transpose matrix |
| 260 | +trans_orig <- t(log(params$dt$exp_norm+1)) |
| 261 | +# remove any zeros |
| 262 | +params$dt$pre_dim_orig <- trans_orig[, apply(trans_orig, 2, var, na.rm = TRUE) != 0] |
| 263 | +# Principal component analysis (PCA) for all genes |
| 264 | +dim_orig <- prcomp(x = params$dt$pre_dim_orig, center = TRUE, scale. = TRUE) |
| 265 | +dim_orig_mat <- dim_orig$x |
| 266 | +rownames(dim_orig_mat) <- colnames(params$dt$exp_norm) |
| 267 | +params$dt$dim_orig <- as.data.frame(dim_orig_mat) |
| 268 | +
|
| 269 | +### Code for T-SNE #### |
| 270 | +# T-SNE analysis (PCA) for all genes |
| 271 | +# not comupted |
| 272 | +# dim_orig <- Rtsne(X = params$dt$pre_dim_orig, dims = 3) |
| 273 | +# dim_orig_mat <- dim_orig$Y |
| 274 | + |
| 275 | +#### Code for MDS ### |
| 276 | +# dim_orig <- cmdscale(d = dist(params$dt$pre_dim_orig), k = 3) |
| 277 | +# dim_orig_mat <- dim_orig |
| 278 | +# rownames(dim_orig_mat) <- colnames(params$dt$exp_norm) |
| 279 | +# colnames(dim_orig_mat) <- c("MDS1", "MDS2", "MDS3") |
| 280 | +
|
| 281 | +# Principal component analysis for SV-selected genes |
| 282 | +# transpose matrix |
| 283 | +trans_mark <- t(params$dt$exp_norm[params$dt$markers_formatted[,1],]) |
| 284 | +# remove any zeros |
| 285 | +params$dt$pre_dim_mark <- trans_mark[, apply(trans_mark, 2, var, na.rm = TRUE) != 0] |
| 286 | +dim_mark <- prcomp(x = params$dt$pre_dim_mark, center = TRUE, scale. = TRUE) |
| 287 | +dim_mark_mat <- dim_mark$x |
| 288 | +rownames(dim_mark_mat) <- colnames(params$dt$exp_norm) |
| 289 | +params$dt$dim_mark <- as.data.frame(dim_mark_mat) |
| 290 | +
|
| 291 | +``` |
| 292 | + |
| 293 | +### Dimension Reduction Visualization (using all features) |
| 294 | +```{r} |
| 295 | +if (!is.null(params$dt$dim_method)) { |
| 296 | + # print your dimension reduction method |
| 297 | + print(paste("Dimension reduction method chosen: ", params$dt$dim_method, sep = "")) |
| 298 | +
|
| 299 | + # if chosen PCA |
| 300 | + if (params$dt$dim_method == "PCA") { |
| 301 | + # 3D interactive dimension reduction plot using all features |
| 302 | + plot_ly(params$dt$dim_orig, x = ~PC1, y = ~PC2, z = ~PC3, type = "scatter3d", |
| 303 | + mode = "markers", text = paste("Cell ID: ", rownames(params$dt$dim_orig), sep = ""), |
| 304 | + marker = list( |
| 305 | + opacity = 0.5 |
| 306 | + )) %>% layout(title = paste("All Genes (n = ", nrow(params$dt$exp_norm), ")", sep = "")) |
| 307 | + } else if (params$dt$dim_method == "t-SNE") { |
| 308 | + plot_ly(params$dt$dim_orig, x = ~tSNE1, y = ~tSNE2, z = ~tSNE3, type = "scatter3d", |
| 309 | + mode = "markers", text = paste("Cell ID: ", rownames(params$dt$dim_orig), sep = ""), |
| 310 | + marker = list( |
| 311 | + opacity = 0.5 |
| 312 | + )) %>% layout(title = paste("All Genes (n = ", nrow(params$dt$exp_norm), ")", sep = "")) |
| 313 | + } else if (params$dt$dim_method == "Classical Metric MDS") { |
| 314 | + plot_ly(params$dt$dim_orig, x = ~MDS1, y = ~MDS2, z = ~MDS3, type = "scatter3d", |
| 315 | + mode = "markers", text = paste("Cell ID: ", rownames(params$dt$dim_orig), sep = ""), |
| 316 | + marker = list( |
| 317 | + opacity = 0.5 |
| 318 | + )) %>% layout(title = paste("All Genes (n = ", nrow(params$dt$exp_norm), ")", sep = "")) |
| 319 | + } |
| 320 | +} |
| 321 | +``` |
| 322 | + |
| 323 | +### Dimension Reduction Visualization (using SV-associated features) |
| 324 | +```{r} |
| 325 | +if (!is.null(params$dt$dim_method)) { |
| 326 | + # 3D interactive dimension reduction plot using SV selected features |
| 327 | + if (params$dt$dim_method == "PCA") { |
| 328 | + plot_ly(params$dt$dim_mark, x = ~PC1, y = ~PC2, z = ~PC3, type = "scatter3d", |
| 329 | + mode = "markers", text = paste("Cell ID: ", rownames(params$dt$dim_mark), sep = ""), |
| 330 | + marker = list( |
| 331 | + opacity = 0.5 |
| 332 | + )) %>% layout(title = paste(params$dt$sva_method_use, " Genes (n = ", nrow(params$dt$exp_norm[params$dt$markers_formatted[,1],]), |
| 333 | + ";",params$dt$chosen_svs, ")", sep = "")) |
| 334 | + } else if (params$dt$dim_method == "t-SNE") { |
| 335 | + plot_ly(params$dt$dim_mark, x = ~tSNE1, y = ~tSNE2, z = ~tSNE3, type = "scatter3d", |
| 336 | + mode = "markers", text = paste("Cell ID: ", rownames(params$dt$dim_mark), sep = ""), |
| 337 | + marker = list( |
| 338 | + opacity = 0.5 |
| 339 | + )) %>% layout(title = paste(params$dt$sva_method_use, " Genes (n = ", nrow(params$dt$exp_norm[params$dt$markers_formatted[,1],]), "; ", |
| 340 | + params$dt$chosen_svs, ")", sep = "")) |
| 341 | + } else if (params$dt$dim_method == "Classical Metric MDS") { |
| 342 | + plot_ly(params$dt$dim_mark, x = ~MDS1, y = ~MDS2, z = ~MDS3, type = "scatter3d", |
| 343 | + mode = "markers", text = paste("Cell ID: ", rownames(params$dt$dim_mark), sep = ""), |
| 344 | + marker = list( |
| 345 | + opacity = 0.5 |
| 346 | + )) %>% layout(title = paste(params$dt$sva_method_use, " Genes (n = ", nrow(params$dt$exp_norm[params$dt$markers_formatted[,1],]), "; ", |
| 347 | + params$dt$chosen_svs, ")", sep = "")) |
| 348 | + } |
| 349 | +} |
| 350 | +
|
| 351 | +``` |
| 352 | + |
| 353 | +## Gene Enrichment Analysis |
| 354 | +```{r, eval=FALSE} |
| 355 | +# convert gene symbols to Entrez ID (for example for human data) |
| 356 | +gene.df <- bitr(gene, fromType = "SYMBOL", |
| 357 | + toType = c("ENSEMBL", "ENTREZID"), |
| 358 | + OrgDb = org.Hs.eg.db) |
| 359 | +params$dt$species <- org.Hs.eg.db |
| 360 | +params$dt$gene.df <- gene.df |
| 361 | +
|
| 362 | +# analysis for GO biological process terms (as an example) |
| 363 | +ego <- enrichGO(gene = params$dt$gene.df$ENTREZID, |
| 364 | + OrgDb = params$dt$species, |
| 365 | + keyType = "ENTREZID", |
| 366 | + ont = "BP", |
| 367 | + pvalueCutoff = isolate(input$pvalue_cutoff), pAdjustMethod = isolate(input$pvalue_correct), |
| 368 | + qvalueCutoff = isolate(input$path_cutoff), |
| 369 | + minGSSize = 5, |
| 370 | + readable = TRUE) |
| 371 | +params$dt$enrich_res <- ego |
| 372 | +params$dt$category_number <- input$path_viz_num |
| 373 | +params$dt$pathway_name <- input$Path_Type |
| 374 | +``` |
| 375 | + |
| 376 | +```{r} |
| 377 | +# visualize pathway results |
| 378 | +if (!is.null(params$dt$enrich_res)) { |
| 379 | + print(paste("Type of gene enrichment analysis performed: ", params$dt$pathway_name, sep = "")) |
| 380 | + print(paste("Show this many categories: ", params$dt$category_number, sep = "")) |
| 381 | +
|
| 382 | + dp <- clusterProfiler::dotplot(object = params$dt$enrich_res, showCategory = params$dt$category_number) + ggtitle(params$dt$pathway_name) |
| 383 | + plot(dp) |
| 384 | +} |
| 385 | +``` |
| 386 | + |
| 387 | + |
| 388 | +## Session Information |
| 389 | +```{r} |
| 390 | +sessionInfo() |
| 391 | +``` |
| 392 | + |
0 commit comments