Skip to content

Commit ff8b3e0

Browse files
authored
Merge pull request #2 from robinweide/master
post-biorxiv update 1
2 parents 689a7e4 + 774dc28 commit ff8b3e0

34 files changed

+468
-1955
lines changed

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ Suggests:
2424
RhpcBLASctl,
2525
RCurl,
2626
strawr (>= 0.0.1),
27-
rhdf5 (>= 2.30.0)
27+
rhdf5 (>= 2.30.0),
28+
pkgfilecache
2829
VignetteBuilder: knitr
2930
Remotes: rnabioco/RcppLibBigWig,
3031
aidenlab/straw/R,

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,10 @@ export(compartment_matrixplot)
144144
export(compartment_score)
145145
export(direct_index)
146146
export(draw.centromere.telomere)
147+
export(erase_GENOVA_cache)
147148
export(expnames)
148149
export(find_centromeres)
150+
export(get_test_data)
149151
export(ggplot_add.genomescore_discovery)
150152
export(ggplot_add.mark_layer)
151153
export(ggplot_add.track_layer)

R/ARMLA.R

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,10 @@ ARA <- function(explist, bed, shift = 1e6,
459459
#' the \code{size_bp} is also \code{NULL}. \code{size_bp} is an alternative
460460
#' parametrisation for the lookup regions, expressed in basepairs.
461461
#' \code{size_bp} is not used when the argument \code{size_bin} is set.
462+
#' @param group_direction A \code{logical} of length 1 which when \code{TRUE}
463+
#' will mirror groups for anchors, where the left anchor location is larger
464+
#' than the right anchor location. Left and right refer to the \code{bedlist}
465+
#' elements that generate combinations.
462466
#'
463467
#' @return An \code{CSCAn_discovery} object containing the following slots,
464468
#' wherein \code{i} is the number of combinations between the \code{bedlist}
@@ -502,7 +506,8 @@ CSCAn <- function(explist, bedlist, shift = 1e6L,
502506
size_bin = NULL, size_bp = NULL,
503507
outlier_filter = c(0, 1),
504508
min_compare = 10,
505-
anchors = NULL, raw = TRUE) {
509+
anchors = NULL, raw = TRUE,
510+
group_direction = FALSE) {
506511
explist <- check_compat_exp(explist)
507512

508513
if (!missing(bedlist)) {
@@ -531,7 +536,8 @@ CSCAn <- function(explist, bedlist, shift = 1e6L,
531536
anchors <- anchors_CSCAn(
532537
explist[[1]]$IDX, attr(explist[[1]], "res"),
533538
bedlist, dist_thres,
534-
min_compare = min_compare
539+
min_compare = min_compare,
540+
group_direction = group_direction
535541
)
536542
}
537543

R/GENOVA.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ utils::globalVariables(c('.',
9494
'chrom_x',
9595
'chrom_y',
9696
'chr',
97+
'grp',
98+
'error',
9799
'bin',
98100
'binned'))
99101

R/insulation_score.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ insulation_score <- function(explist, window = 30,
5252
}
5353
}
5454

55-
explist <- GENOVA:::check_compat_exp(explist)
55+
explist <- check_compat_exp(explist)
5656
expnames <- expnames(explist)
5757
nexp <- length(expnames)
5858

R/load_contacts.R

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -41,25 +41,27 @@
4141
#' attributes for a Hi-C matrix of a given sample at a given resolution.
4242
#' @export
4343
load_contacts = function(signal_path,
44-
indices_path = NULL,
45-
resolution = 10e3,
46-
sample_name = NULL,
47-
centromeres = NULL,
48-
colour = NULL,
49-
z_norm = FALSE,
50-
scale_bp = 1e9,
51-
scale_cis = FALSE,
52-
balancing = TRUE,
53-
legacy = FALSE,
54-
verbose = TRUE){
44+
indices_path = NULL,
45+
resolution = 10e3,
46+
sample_name = NULL,
47+
centromeres = NULL,
48+
colour = NULL,
49+
z_norm = FALSE,
50+
scale_bp = 1e9,
51+
scale_cis = FALSE,
52+
balancing = TRUE,
53+
legacy = FALSE,
54+
verbose = TRUE){
5555

5656
# Control data.table threads
5757
dt.cores <- data.table::getDTthreads()
5858
on.exit(data.table::setDTthreads(dt.cores))
5959
data.table::setDTthreads(1)
6060

6161
if(is.null(sample_name)){
62-
stop('Please give a valid sample_name.')
62+
63+
sample_name = basename(signal_path)
64+
6365
}
6466
doJuicer = F
6567
doCooler = F
@@ -73,6 +75,8 @@ load_contacts = function(signal_path,
7375
inputType = switch(tools::file_ext(tolower(signal_path)),
7476
'matrix' = 'hicpro',
7577
'cooler' = 'cooler',
78+
'cool' = 'cooler',
79+
'mcool' = 'cooler',
7680
'hic' = 'juicer')
7781

7882
if(inputType == 'juicer'){juicerPath = signal_path}
@@ -81,14 +85,14 @@ load_contacts = function(signal_path,
8185
if(!is.null(juicerPath)){
8286
##################################################################### juicer
8387

84-
88+
8589

8690
if(grepl(juicerPath ,pattern = '^http')){
8791
stop('signal_path starts with http, but the current version does not support it.')
8892
# signal_path points to an URL!
89-
if(unname(RCurl::url.exists(signal_path, .header = T)[7] == "404")){
90-
stop('signal_path starts with http, but url is not found (404).')
91-
}
93+
if(unname(RCurl::url.exists(signal_path, .header = T)[7] == "404")){
94+
stop('signal_path starts with http, but url is not found (404).')
95+
}
9296

9397
} else if(!file.exists(juicerPath)){
9498
stop("juicerPath doesn't point to an existing .hic-file.")
@@ -98,7 +102,7 @@ load_contacts = function(signal_path,
98102
try_require('strawr', "load_contacts", source = 'github')
99103

100104
doJuicer = T
101-
105+
102106

103107
} else if(!is.null(coolerPath)){
104108
##################################################################### cooler
@@ -125,7 +129,7 @@ load_contacts = function(signal_path,
125129
}
126130

127131
doHiCpro = T
128-
132+
129133
} else {
130134
stop('please provide either .matrix/indices_path, a .cooler or a .hic.')
131135
}
@@ -145,7 +149,7 @@ load_contacts = function(signal_path,
145149
}
146150

147151
if(doCooler){
148-
sig_ind = loadCooler(coolerPath, scale_bp = scale_bp,balancing = balancing, scale_cis = scale_cis)
152+
sig_ind = loadCooler(coolerPath, scale_bp = scale_bp,balancing = balancing, scale_cis = scale_cis, resolution = resolution)
149153
balanced = balancing
150154
}
151155

@@ -277,13 +281,13 @@ loadHiCpro = function(signal_path, indices_path, scale_bp, scale_cis){
277281

278282
F1 = findInterval(SIG$V1, chromRange$first)
279283
F2 = findInterval(SIG$V2, chromRange$first)
280-
284+
281285
SIG$V3 <- scale_bp * SIG$V3 / sum(SIG[ifelse(F1 == F2, T, F), 3])
282286

283287
} else {
284288
SIG <- read.hicpro.matrix(signal_path, scale_bp = scale_bp)
285289
}
286-
290+
287291
return(list(SIG, ABS))
288292
}
289293

@@ -295,7 +299,7 @@ zscore_hic = function(SIG, ABS){
295299
V3 <- NULL
296300
C1 <- NULL
297301
C2 <- NULL
298-
302+
299303
chromRLE = rle(ABS$V1)
300304

301305
CS = cumsum(c(1,chromRLE$lengths))
@@ -368,4 +372,3 @@ read.hicpro.matrix <- function(file, scale_bp = 1e9) {
368372
}
369373
return(data)
370374
}
371-

R/loading_cooler.R

Lines changed: 100 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,80 @@
1-
loadCooler = function(cooler, balancing = T, scale_bp = NULL, scale_cis = F){
2-
3-
ABS = data.table::as.data.table(rhdf5::h5read(file = cooler,
4-
name = "bins"))
5-
ABS$bin = 1:nrow(ABS)
6-
7-
if(!'weight' %in% colnames(ABS)){
8-
balancing = F
9-
warning('No balancing is done due to missing weights.')
1+
loadCooler = function(cooler, balancing = T, scale_bp = NULL, scale_cis = F, resolution = 10e3){
2+
3+
4+
bins_name = "bins"
5+
pixels_name = "pixels"
6+
ABS = NULL
7+
SIG = NULL
8+
# check if it contains bins or resolution groups:
9+
if(any(grepl("/resolutions",rhdf5::h5ls(cooler)[,1]))){
10+
# crap, one of those cooler-files...
11+
# lets start with finding all resolutions...
12+
cooler_res <- sapply(rhdf5::h5ls(cooler)[,1],
13+
function(x){ x <- gsub("/resolutions/","", x) })
14+
cooler_res <- as.numeric(unique(cooler_res[!grepl('/',cooler_res)]))
15+
if(!resolution %in% cooler_res){
16+
stop('Cool file does not include a resolution of ', resolution,".\nAvailable resolutions are: ", paste0(cooler_res, collapse = ", "))
17+
}
18+
ABS = data.table::as.data.table(rhdf5::h5read(file = cooler,
19+
name = paste0("/resolutions/",resolution, "/", bins_name)))
20+
21+
if('KR' %in% colnames(ABS)) {
22+
ABS$weight <- 1/ABS$KR
23+
}
24+
25+
if(!'weight' %in% colnames(ABS)) {
26+
balancing = F
27+
warning('No weights-column found: cannot balance matrix!')
28+
ABS$weight <- NA
29+
}
30+
31+
ABS <- ABS[, c("chrom", "start", "end", "weight" ), with = F]
32+
ABS$bin = 1:nrow(ABS)
33+
34+
35+
if(!'weight' %in% colnames(ABS)){
36+
balancing = F
37+
warning('No balancing is done due to missing weights.')
38+
}
39+
SIG = data.table::as.data.table(rhdf5::h5read(file = cooler,
40+
name = paste0("/resolutions/",resolution, "/", pixels_name)))
41+
} else {
42+
ABS = data.table::as.data.table(rhdf5::h5read(file = cooler,
43+
name = bins_name))
44+
if('KR' %in% colnames(ABS)) {
45+
ABS$weight <- 1/ABS$KR
46+
}
47+
48+
if(!'weight' %in% colnames(ABS)) {
49+
balancing = F
50+
warning('No weights-column found: cannot balance matrix!')
51+
ABS$weight <- NA
52+
}
53+
54+
ABS <- ABS[, c("chrom", "start", "end", "weight" ), with = F]
55+
ABS$bin = 1:nrow(ABS)
56+
SIG = data.table::as.data.table(rhdf5::h5read(file = cooler,
57+
name = pixels_name))
58+
1059
}
11-
12-
SIG = data.table::as.data.table(rhdf5::h5read(file = cooler,
13-
name = "pixels"))
14-
60+
1561
rhdf5::h5closeAll()
16-
62+
63+
# handle nan and NA in ABS$weigth
64+
ABS$weight[is.nan(ABS$weight)] <- 0
65+
ABS$weight[is.na(ABS$weight)] <- 0
66+
67+
colnames(SIG) = paste0('V', 1:3)
1768
SIG[,1] = SIG[,1] + 1
1869
SIG[,2] = SIG[,2] + 1
19-
70+
2071
if(balancing){
2172
SIG = balance_cooler(ABS, SIG)
2273
}
23-
74+
2475
ABS$weight = NULL
2576
colnames(ABS) = paste0('V', 1:4)
26-
colnames(SIG) = paste0('V', 1:3)
27-
77+
2878
if (!is.null(scale_bp)) {
2979

3080
if(scale_cis){
@@ -40,69 +90,46 @@ loadCooler = function(cooler, balancing = T, scale_bp = NULL, scale_cis = F){
4090
} else {
4191
SIG$V3 <- scale_bp * SIG$V3 / sum(SIG$V3)
4292
}
43-
93+
4494
}
45-
95+
96+
97+
SIG <- as.data.frame(SIG)
98+
SIG <- data.table::data.table(apply(SIG, 2, as.vector))
99+
data.table::setkey(SIG, "V1", "V2")
100+
101+
ABS <- as.data.frame(ABS)
102+
ABS$V1 <- as.character(as.vector(ABS$V1))
103+
ABS$V2 <- as.numeric(as.vector(ABS$V2))
104+
ABS$V3 <- as.numeric(as.vector(ABS$V3))
105+
ABS$V4 <- as.numeric(as.vector(ABS$V4))
106+
ABS <- data.table::data.table(ABS)
107+
data.table::setkey(ABS, "V1", "V2")
108+
46109
return(list(SIG, ABS))
47-
48110
}
49111

50112

51113

52-
#
53-
# out = loadCooler("Dixon2012-H1hESC-HindIII-allreps-filtered.50kb.cool")
54-
# SIG = out[[1]]
55-
# ABS = out[[2]]
56-
57-
58-
59114
balance_cooler = function(ABS, SIG){
60-
115+
61116
WEIGHTS = data.table::data.table(stats::setNames(ABS[, c('bin','weight'),],
62-
c('bin1_id','bin1_weight')),
63-
key = 'bin1_id')
64-
WEIGHTS$bin1_weight[is.nan(WEIGHTS$bin1_weight)] = 1
65-
66-
data.table::setkey(SIG, 'bin1_id')
117+
c('V1_id','V1_weight')),
118+
key = 'V1_id')
119+
WEIGHTS$V1_weight[is.nan(WEIGHTS$V1_weight)] = 1
120+
121+
data.table::setkey(SIG, 'V1')
67122
SIG <- SIG[WEIGHTS, nomatch = 0]
68-
69-
colnames(WEIGHTS) = c('bin2_id','bin2_weight')
70-
71-
data.table::setkey(WEIGHTS, 'bin2_id')
72-
data.table::setkey(SIG, 'bin2_id')
73-
123+
124+
colnames(WEIGHTS) = c('V2','V2_weight')
125+
126+
data.table::setkey(WEIGHTS, 'V2')
127+
data.table::setkey(SIG, 'V2')
128+
74129
SIG <- SIG[WEIGHTS, nomatch = 0]
75-
76-
SIG$weight = SIG$bin1_weight * SIG$bin2_weight
77-
SIG$signal = SIG$count*SIG$weight
78-
79-
return(SIG[, c(1,2,7)])
130+
131+
SIG$weight = SIG$V1_weight * SIG$V2_weight
132+
SIG$V3 = SIG$V3*SIG$weight
133+
134+
return(SIG[, c(1,2,3)])
80135
}
81-
82-
#
83-
#
84-
#
85-
#
86-
# idChr10 = (ABS[ABS[,1] == 'chr19',"bin"])
87-
#
88-
# data = SIG[SIG$bin1_id %in% idChr10 & SIG$bin2_id %in% idChr10, ]
89-
#
90-
# RAW = setNames(data[,1:3], c('bin1_id', 'bin2_id','signal')); RAW$set = 'count'
91-
# BAL = data[,c(1:2,7)]; BAL$set = 'balanced'
92-
#
93-
# data = rbind(RAW,BAL)
94-
#
95-
# dataSwitch = data
96-
# dataSwitch$bin1_id = data$bin2_id
97-
# dataSwitch$bin2_id = data$bin1_id
98-
#
99-
# data = rbind(data, dataSwitch)
100-
#
101-
# data$set = factor(data$set, levels = c('count','balanced'))
102-
#
103-
# ggplot(data, aes(x = bin1_id, y = bin2_id, fill= log10(signal))) +
104-
# theme(panel.background = element_rect(fill = 'white'))+
105-
# geom_raster() +
106-
# facet_grid(.~ set) +
107-
# scale_fill_gradientn(colours = rev(c('black','red','orange','yellow', 'white')))
108-
#

0 commit comments

Comments
 (0)