Skip to content

Commit dcd0da1

Browse files
authored
Added Rmd file
To produce HTML reports
1 parent 38fca36 commit dcd0da1

File tree

1 file changed

+392
-0
lines changed

1 file changed

+392
-0
lines changed

report.Rmd

+392
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,392 @@
1+
---
2+
title: "Report of Parameters used with V-SVA"
3+
output: html_document
4+
date: "`r Sys.time()`"
5+
fig_width: 12
6+
fig_height: 12
7+
params:
8+
dt: NA
9+
---
10+
11+
##Expression Data and Sample Metadata Provided
12+
```{r}
13+
# dimensions of expression matrix
14+
if (!is.null(params$dt$exp_norm)) {
15+
dim(params$dt$exp_norm)
16+
}
17+
# dimensions of metadata, if not provided only Genes_Detected and Log_Total_Counts determined
18+
if (!is.null(params$dt$meta_df)) {
19+
dim(params$dt$meta_df)
20+
head(params$dt$meta_df)
21+
}
22+
23+
```
24+
25+
##Quality Control of Expression Data: Number of Features Detected in each Sample
26+
```{r, eval=FALSE}
27+
# binarize data to determine which features are detected in each sample
28+
bin_data <- params$dt$exp_norm
29+
bin_data[bin_data < 1] <- 0
30+
bin_data[bin_data >= 1] <- 1
31+
num.exp <- apply(bin_data,2,sum)
32+
params$dt$detect_num <- num.exp
33+
```
34+
35+
```{r}
36+
if (!is.null(params$dt$detect_num)) {
37+
summ <- summary(params$dt$detect_num)
38+
# histogram of number of features detected in each sample
39+
hist(params$dt$detect_num, col = "dodgerblue", main="",
40+
ylab = "Samples (n)", xlab = "Number of features detected in each sample")
41+
legend("topright", legend = paste(names(summ), round(summ, digits = 2), sep = " "), title = "Summary of features detected")
42+
}
43+
```
44+
45+
##Feature Pre-processing Options (normalization, sample and feature filtering)
46+
#### Remove samples based on number of detected features
47+
```{r}
48+
# were cells filtered based on genes detected?
49+
if (params$dt$cell_filter_choice) {
50+
print(paste("Removing cells that have less than ", params$dt$cellfilt_number, " features detected,", sep = ""))
51+
} else {
52+
print("Cells were not filtered based on features detected")
53+
}
54+
```
55+
56+
```{r, eval=FALSE}
57+
# code to filter cells based on features detected
58+
num.sel <- params$dt$detect_num[params$dt$detect_num >= params$dt$cellfilt_number]
59+
# subset data
60+
params$dt$exp_norm <- params$dt$exp_norm[, names(num.sel)]
61+
params$dt$meta_df <- params$dt$meta_df[names(num.sel), ]
62+
63+
```
64+
65+
#### Down-sample the number of samples included in the analysis (to increase computational efficiency)
66+
```{r}
67+
# were cells down-sampled to a certain number to speed up computation?
68+
if (params$dt$cell_downsample_choice) {
69+
print(paste("Cells were down-sampled to ", params$dt$cell_downsample_number, " cells", sep = ""))
70+
} else {
71+
print("Cells were not down-sampled")
72+
}
73+
```
74+
75+
```{r, eval=FALSE}
76+
# code to down-sample cells
77+
set.seed(1)
78+
dw_samp <- base::sample(x = 1:ncol(params$dt$exp_norm), size = params$dt$cell_downsample_number)
79+
# subset data
80+
params$dt$exp_norm <- params$dt$exp_norm[, dw_samp]
81+
params$dt$meta_df <- params$dt$meta_df[dw_samp, ]
82+
```
83+
84+
#### Remove features not detected in samples
85+
```{r}
86+
# were features filtered?
87+
if (params$dt$gene_filter_choice) {
88+
print(paste("Features were filtered using ",
89+
params$dt$gene_filter_method,
90+
".", nrow(params$dt$exp_norm), " Features with ", params$dt$gene_count_num,
91+
" or more counts in at least ", params$dt$gene_cell_num,
92+
" cells were retained" , sep = ""))
93+
} else {
94+
print("Features were not filtered based on detection rate in samples")
95+
}
96+
97+
```
98+
99+
#### Choose normalization method for expression data
100+
```{r, eval=FALSE}
101+
# code for gene filtering methods: all code is displayed regardless of method chosen
102+
filter <- apply(params$dt$exp_norm, 1, function(x) length(x[x>isolate(input$Count_num)])>=isolate(input$Cell_num))
103+
params$dt$exp_norm <- params$dt$exp_norm[filter,]
104+
# normalize the data
105+
# using CPM method
106+
if (isolate(input$norm_method) == "CPM") {
107+
params$dt$exp_norm <- edgeR::cpm(params$dt$exp_norm)
108+
# using quantile normalization method
109+
} else if (isolate(input$norm_method) == "Quantile") {
110+
params$dt$exp_norm <- normalize.quantiles(params$dt$exp_norm)
111+
# scran method
112+
} else if (isolate(input$norm_method) == "scran") {
113+
sce <- SingleCellExperiment(list(counts=params$dt$exp_norm))
114+
sce <- computeSumFactors(sce)
115+
sce <- normalize(sce)
116+
params$dt$exp_norm <- exprs(sce)
117+
# no normalization
118+
} else if (isolate(input$norm_method) == "None") {
119+
params$dt$exp_norm <- params$dt$exp_norm
120+
}
121+
```
122+
123+
##Surrogate Variable Analysis (SVA)
124+
```{r}
125+
# which method was used
126+
if (!is.null(params$dt$sva_method_use)) {
127+
print(paste("The surrogate variable analysis method chosen was: ", params$dt$sva_method_use, sep = ""))
128+
}
129+
130+
# which known factors were adjusted for
131+
if (!is.null(params$dt$known_factors_use)) {
132+
print(paste("The following known factor(s) were adjusted for: ", params$dt$known_factors_use, sep = ""))
133+
}
134+
135+
# the number of SV's
136+
if (!is.null(params$dt$iasva.res)) {
137+
print(paste("The number of SV's/Factors identified: ", ncol(params$dt$iasva.res$sv), sep = ""))
138+
}
139+
```
140+
141+
```{r, eval=FALSE}
142+
# code for SVA methods
143+
# create model matrix with known factors to adjust for
144+
id_mod <- which(colnames(params$dt$meta_df) %in% params$dt$known_factors_use)
145+
if (length(id_mod) > 1) {
146+
formdf1 <- as.formula(paste("~", colnames(params$dt$meta_df)[id_mod][1], "+", paste(colnames(params$dt$meta_df)[id_mod[2:length(id_mod)]],collapse="+"), sep = ""))
147+
mod <- model.matrix(formdf1, data = params$dt$meta_df)
148+
} else {
149+
varf1 <- as.factor(params$dt$meta_df[, id_mod])
150+
mod <- model.matrix(~varf1, data = params$dt$meta_df)
151+
}
152+
# create summarized experiment for expression matrix to later use for marker gene identification
153+
summ_exp <- SummarizedExperiment(assays = as.matrix(params$dt$exp_norm))
154+
params$dt$summ_exp <- summ_exp
155+
156+
# if user chose IA-SVA, then perform following
157+
if (isolate(params$dt$sva_method_use == "IA-SVA")) {
158+
# depending on which ia-sva parameters were chosen, evaluate
159+
if (isolate(input$iasva_param == "Percentage Threshold")) {
160+
params$dt$iasva.res <- fast_iasva(summ_exp, mod[,-1, drop = F], verbose=FALSE,
161+
pct.cutoff = isolate(input$pct_cutt), num.sv = NULL)
162+
} else if (isolate(input$iasva_param == "Number of SVs")) {
163+
params$dt$iasva.res <- fast_iasva(summ_exp, mod[,-1, drop = F], verbose=FALSE,
164+
pct.cutoff = isolate(input$pct_cutt), num.sv = isolate(input$num_of_svs))
165+
}
166+
# else if choose SVA method
167+
} else if (isolate(params$dt$sva_method_use == "SVA")) {
168+
# perform sva analysis with specified svs
169+
sva.res <- svaseq(params$dt$exp_norm, mod = mod, mod0 = mod[,1], n.sv = isolate(input$sva_num))
170+
colnames(sva.res$sv) <- paste("SV", 1:ncol(sva.res$sv), sep = "")
171+
params$dt$iasva.res <- sva.res
172+
173+
# else if choose zinb-wave method
174+
} else if (isolate(params$dt$sva_method_use == "ZINB-WaVE")) {
175+
# perform analysis with specified latent factors
176+
zinb.matrix <- params$dt$exp_norm
177+
# coerce to integer
178+
mode(zinb.matrix) <- "integer"
179+
zinb.res <- zinbFit(Y = zinb.matrix, X = mod[,-1, drop = F], K = isolate(input$zinb_num))
180+
# extract factors
181+
zinb.fac <- getW(zinb.res)
182+
colnames(zinb.fac) <- paste("SV", 1:ncol(zinb.fac), sep = "")
183+
params$dt$iasva.res <- list(sv = zinb.fac)
184+
}
185+
186+
```
187+
188+
## Correlation Plot of SV's and Sample Metadata
189+
```{r}
190+
# change factors to numeric for correlation
191+
if (!is.null(params$dt$meta_df) & !is.null(params$dt$iasva.res)) {
192+
meta_sel <- params$dt$meta_df
193+
for (jcol in 1:ncol(meta_sel)) {
194+
meta_sel[,jcol] <- as.numeric(as.factor(meta_sel[,jcol]))
195+
}
196+
iasva_vars <- cbind(params$dt$iasva.res$sv, meta_sel)
197+
# need to append column names to matrix
198+
colnames(iasva_vars) <- c(paste("SV", 1:ncol(params$dt$iasva.res$sv), sep = ""),
199+
colnames(params$dt$meta_df))
200+
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
201+
corrplot(abs(cor(iasva_vars)), type = "upper", method = "color",
202+
col = col(200), number.cex = 1,
203+
addCoef.col = "black",
204+
tl.col = "black", tl.srt = 90, diag = FALSE)
205+
}
206+
207+
```
208+
209+
## Paired SV Plots
210+
```{r}
211+
if (!is.null(params$dt$iasva.res)) {
212+
iasva.sv <- as.data.frame(params$dt$iasva.res$sv)
213+
rownames(iasva.sv) <- colnames(params$dt$exp_norm)
214+
pairs(iasva.sv, main="", pch=20, cex=0.5, lower.panel = NULL)
215+
}
216+
217+
```
218+
219+
## Interactive Paired SV Plots
220+
```{r}
221+
if (!is.null(params$dt$iasva.res)) {
222+
iasva.sv <- as.data.frame(params$dt$iasva.res$sv)
223+
rownames(iasva.sv) <- colnames(params$dt$exp_norm)
224+
plot_ly(iasva.sv, x = ~SV1, y = ~SV2, type = "scatter",
225+
mode = "markers", text = paste("Cell ID: ", rownames(iasva.sv), sep = ""),
226+
marker = list(
227+
opacity = 0.5
228+
)
229+
)
230+
}
231+
```
232+
233+
## Identifying Marker Features associated with SV's
234+
```{r, eval=FALSE}
235+
# identify which svs were chosen for marker analysis
236+
id_sv_mark <- which(colnames(params$dt$iasva.res$sv) %in% isolate(input$SV_marks))
237+
marker_genes <- iasva::find_markers(Y = params$dt$summ_exp,
238+
iasva.sv = as.matrix(params$dt$iasva.res$sv[, id_sv_mark, drop=FALSE]),
239+
rsq.cutoff = isolate(input$rsqcutoff), method = isolate(input$mark_sig), sig.cutoff = isolate(input$mark_cutoff))
240+
params$dt$markers <- marker_genes
241+
```
242+
243+
## Heatmap of Marker Features Determined from SVA Analysis
244+
```{r}
245+
if (!is.null(params$dt$markers)) {
246+
all_marks <- params$dt$markers$All_Unique_Markers
247+
log_mat <- log(as.matrix(params$dt$exp_norm[all_marks,])+1)
248+
# remove any NA's from matrix
249+
log_mat <- log_mat[complete.cases(log_mat),]
250+
pheatmap(log_mat, show_colnames = FALSE,
251+
show_rownames = TRUE,
252+
clustering_method = "ward.D2")
253+
}
254+
```
255+
256+
## Dimension Reduction
257+
```{r, eval=FALSE}
258+
set.seed(1)
259+
# transpose matrix
260+
trans_orig <- t(log(params$dt$exp_norm+1))
261+
# remove any zeros
262+
params$dt$pre_dim_orig <- trans_orig[, apply(trans_orig, 2, var, na.rm = TRUE) != 0]
263+
# Principal component analysis (PCA) for all genes
264+
dim_orig <- prcomp(x = params$dt$pre_dim_orig, center = TRUE, scale. = TRUE)
265+
dim_orig_mat <- dim_orig$x
266+
rownames(dim_orig_mat) <- colnames(params$dt$exp_norm)
267+
params$dt$dim_orig <- as.data.frame(dim_orig_mat)
268+
269+
### Code for T-SNE ####
270+
# T-SNE analysis (PCA) for all genes
271+
# not comupted
272+
# dim_orig <- Rtsne(X = params$dt$pre_dim_orig, dims = 3)
273+
# dim_orig_mat <- dim_orig$Y
274+
275+
#### Code for MDS ###
276+
# dim_orig <- cmdscale(d = dist(params$dt$pre_dim_orig), k = 3)
277+
# dim_orig_mat <- dim_orig
278+
# rownames(dim_orig_mat) <- colnames(params$dt$exp_norm)
279+
# colnames(dim_orig_mat) <- c("MDS1", "MDS2", "MDS3")
280+
281+
# Principal component analysis for SV-selected genes
282+
# transpose matrix
283+
trans_mark <- t(params$dt$exp_norm[params$dt$markers_formatted[,1],])
284+
# remove any zeros
285+
params$dt$pre_dim_mark <- trans_mark[, apply(trans_mark, 2, var, na.rm = TRUE) != 0]
286+
dim_mark <- prcomp(x = params$dt$pre_dim_mark, center = TRUE, scale. = TRUE)
287+
dim_mark_mat <- dim_mark$x
288+
rownames(dim_mark_mat) <- colnames(params$dt$exp_norm)
289+
params$dt$dim_mark <- as.data.frame(dim_mark_mat)
290+
291+
```
292+
293+
### Dimension Reduction Visualization (using all features)
294+
```{r}
295+
if (!is.null(params$dt$dim_method)) {
296+
# print your dimension reduction method
297+
print(paste("Dimension reduction method chosen: ", params$dt$dim_method, sep = ""))
298+
299+
# if chosen PCA
300+
if (params$dt$dim_method == "PCA") {
301+
# 3D interactive dimension reduction plot using all features
302+
plot_ly(params$dt$dim_orig, x = ~PC1, y = ~PC2, z = ~PC3, type = "scatter3d",
303+
mode = "markers", text = paste("Cell ID: ", rownames(params$dt$dim_orig), sep = ""),
304+
marker = list(
305+
opacity = 0.5
306+
)) %>% layout(title = paste("All Genes (n = ", nrow(params$dt$exp_norm), ")", sep = ""))
307+
} else if (params$dt$dim_method == "t-SNE") {
308+
plot_ly(params$dt$dim_orig, x = ~tSNE1, y = ~tSNE2, z = ~tSNE3, type = "scatter3d",
309+
mode = "markers", text = paste("Cell ID: ", rownames(params$dt$dim_orig), sep = ""),
310+
marker = list(
311+
opacity = 0.5
312+
)) %>% layout(title = paste("All Genes (n = ", nrow(params$dt$exp_norm), ")", sep = ""))
313+
} else if (params$dt$dim_method == "Classical Metric MDS") {
314+
plot_ly(params$dt$dim_orig, x = ~MDS1, y = ~MDS2, z = ~MDS3, type = "scatter3d",
315+
mode = "markers", text = paste("Cell ID: ", rownames(params$dt$dim_orig), sep = ""),
316+
marker = list(
317+
opacity = 0.5
318+
)) %>% layout(title = paste("All Genes (n = ", nrow(params$dt$exp_norm), ")", sep = ""))
319+
}
320+
}
321+
```
322+
323+
### Dimension Reduction Visualization (using SV-associated features)
324+
```{r}
325+
if (!is.null(params$dt$dim_method)) {
326+
# 3D interactive dimension reduction plot using SV selected features
327+
if (params$dt$dim_method == "PCA") {
328+
plot_ly(params$dt$dim_mark, x = ~PC1, y = ~PC2, z = ~PC3, type = "scatter3d",
329+
mode = "markers", text = paste("Cell ID: ", rownames(params$dt$dim_mark), sep = ""),
330+
marker = list(
331+
opacity = 0.5
332+
)) %>% layout(title = paste(params$dt$sva_method_use, " Genes (n = ", nrow(params$dt$exp_norm[params$dt$markers_formatted[,1],]),
333+
";",params$dt$chosen_svs, ")", sep = ""))
334+
} else if (params$dt$dim_method == "t-SNE") {
335+
plot_ly(params$dt$dim_mark, x = ~tSNE1, y = ~tSNE2, z = ~tSNE3, type = "scatter3d",
336+
mode = "markers", text = paste("Cell ID: ", rownames(params$dt$dim_mark), sep = ""),
337+
marker = list(
338+
opacity = 0.5
339+
)) %>% layout(title = paste(params$dt$sva_method_use, " Genes (n = ", nrow(params$dt$exp_norm[params$dt$markers_formatted[,1],]), "; ",
340+
params$dt$chosen_svs, ")", sep = ""))
341+
} else if (params$dt$dim_method == "Classical Metric MDS") {
342+
plot_ly(params$dt$dim_mark, x = ~MDS1, y = ~MDS2, z = ~MDS3, type = "scatter3d",
343+
mode = "markers", text = paste("Cell ID: ", rownames(params$dt$dim_mark), sep = ""),
344+
marker = list(
345+
opacity = 0.5
346+
)) %>% layout(title = paste(params$dt$sva_method_use, " Genes (n = ", nrow(params$dt$exp_norm[params$dt$markers_formatted[,1],]), "; ",
347+
params$dt$chosen_svs, ")", sep = ""))
348+
}
349+
}
350+
351+
```
352+
353+
## Gene Enrichment Analysis
354+
```{r, eval=FALSE}
355+
# convert gene symbols to Entrez ID (for example for human data)
356+
gene.df <- bitr(gene, fromType = "SYMBOL",
357+
toType = c("ENSEMBL", "ENTREZID"),
358+
OrgDb = org.Hs.eg.db)
359+
params$dt$species <- org.Hs.eg.db
360+
params$dt$gene.df <- gene.df
361+
362+
# analysis for GO biological process terms (as an example)
363+
ego <- enrichGO(gene = params$dt$gene.df$ENTREZID,
364+
OrgDb = params$dt$species,
365+
keyType = "ENTREZID",
366+
ont = "BP",
367+
pvalueCutoff = isolate(input$pvalue_cutoff), pAdjustMethod = isolate(input$pvalue_correct),
368+
qvalueCutoff = isolate(input$path_cutoff),
369+
minGSSize = 5,
370+
readable = TRUE)
371+
params$dt$enrich_res <- ego
372+
params$dt$category_number <- input$path_viz_num
373+
params$dt$pathway_name <- input$Path_Type
374+
```
375+
376+
```{r}
377+
# visualize pathway results
378+
if (!is.null(params$dt$enrich_res)) {
379+
print(paste("Type of gene enrichment analysis performed: ", params$dt$pathway_name, sep = ""))
380+
print(paste("Show this many categories: ", params$dt$category_number, sep = ""))
381+
382+
dp <- clusterProfiler::dotplot(object = params$dt$enrich_res, showCategory = params$dt$category_number) + ggtitle(params$dt$pathway_name)
383+
plot(dp)
384+
}
385+
```
386+
387+
388+
## Session Information
389+
```{r}
390+
sessionInfo()
391+
```
392+

0 commit comments

Comments
 (0)