Skip to content

Commit d234133

Browse files
authored
Merge pull request #12 from mpc-bioinformatics/documentation
Add testing scripts
2 parents de21a08 + adf1e1d commit d234133

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+864
-301
lines changed

LICENSE

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
11
YEAR: 2024
2-
32
COPYRIGHT HOLDER: Medizinisches Proteom-Center
4-
53
ORGANIZATION: Ruhr University Bochum
6-

R/generate_graphs_from_quantdata.R

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#' @param id_cols \strong{integer vector} \cr
66
#' The columns with ids, e.g. peptide sequences (everything except the peptide ratios)
77
#' @param fasta_edgelist \strong{data.frame} \cr
8-
#' An edgelist created from the corresponding FASTA file, eg. created with [generate_edgelist()].
8+
#' An edgelist created from the corresponding FASTA file, eg. created with [bppg::generate_edgelist()].
99
#' @param outpath \strong{character} \cr
1010
#' The output path for the results.
1111
#' @param seq_column \strong{character} \cr
@@ -20,7 +20,7 @@
2020
#' @return A list of list of subgraphs
2121
#' @export
2222
#'
23-
#' @seealso [generate_edgelist()]
23+
#' @seealso [bppg::generate_edgelist()]
2424
#'
2525
#' @examples
2626
#'
@@ -91,24 +91,42 @@ generate_quant_graphs <- function(peptide_ratios,
9191

9292
#' Generate graphs from quantitative peptide-level data
9393
#'
94-
#' @param D data set with peptide sequence as first column and peptide intensities in subsequent columns
95-
#' (e.g. output from bppg::read_MQ_peptidetable)
96-
#' @param fasta fasta file used for identification of peptides in D
97-
#' @param outpath bla
98-
#' @param missed_cleavages bla
99-
#' @param min_aa bla
100-
#' @param max_aa bla
101-
#' @param ... currently not in use
102-
#' @param id_columns column numbers of D that contain ID information (the rest should contain only peptide intensities, properly normalized)
103-
#' @param seq_column column name of the peptide sequence
104-
#' @param collapse_protein_nodes if TRUE protein nodes will be collapsed
105-
#' @param collapse_peptide_nodes if TRUE, peptide nodes will be collapsed
106-
#' @param suffix suffix for output files
94+
#' @param D \strong{data.frame} \cr
95+
#' A data set with peptide sequence as first column
96+
#' and peptide intensities in subsequent columns
97+
#' ,e.g. created with [bppg::read_MQ_peptidetable()].
98+
#' @param fasta \strong{list of vector of characters} \cr
99+
#' A fasta file used for identification of peptides in D,
100+
#' already read into R by [seqinr::read.fasta()].
101+
#' @param outpath \strong{character} \cr
102+
#' The output path for the results.
103+
#' @param missed_cleavages \strong{integer} \cr
104+
#' The number of allowed missed cleavages in a peptide.
105+
#' @param min_aa \strong{integer} \cr
106+
#' The minimum number of amino acids in a peptide.
107+
#' @param max_aa \strong{integer} \cr
108+
#' The maximum number of amino acids in a peptide.
109+
#' @param id_columns \strong{integer vector} \cr
110+
#' The columns of D that contain ID information (the rest should contain only peptide intensities, properly normalized).
111+
#' @param seq_column \strong{character} \cr
112+
#' The column name of the column with the peptide sequences.
113+
#' @param collapse_protein_nodes \strong{logical} \cr
114+
#' If \code{TRUE}, the protein nodes will be collapsed.
115+
#' @param collapse_peptide_nodes \strong{logical} \cr
116+
#' If \code{TRUE}, the peptide nodes will be collapsed.
117+
#' @param suffix \strong{character} \cr
118+
#' The suffix for output files.
119+
#' @param ... currently not in use
107120
#'
108-
#' @return list of list of graphs
121+
#' @return A list of list of graphs
109122
#' @export
110123
#'
111-
#' @examples # TODO
124+
#' @seealso [bppg::read_MQ_peptidetable()], [seqinr::read.fasta()],
125+
#' [bppg::generate_quant_graphs()], [bppg::generate_graphs_from_FASTA()]
126+
#'
127+
#' @examples
128+
#'
129+
112130
generate_graphs_from_quant_data <- function(D,
113131
fasta,
114132
outpath = NULL,

R/helpers-Digest.R

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,29 @@
55

66
#' Digestion of a single protein sequence.
77
#'
8-
#' @param sequence Protein sequence as character.
9-
#' @param enzyme "trypsin" (does not cut before proline) or "trypsin.strict" ().
10-
#' @param missed Maximal number of missed cleavages.
11-
#' @param warn Print out warnings, e.g. if a protein has no cleavage site.
12-
#' @param remove_initial_M also return peptides where intital Methionine is removed?
8+
#' @param sequence \strong{character} \cr
9+
#' The protein sequence.
10+
#' @param enzyme \strong{character} \cr
11+
#' The enzyme used in digestion e.g. "trypsin" (does not cut before proline) or "trypsin.strict" ().
12+
#' @param missed \strong{character} \cr
13+
#' The maximal number of missed cleavages.
14+
#' @param warn \strong{logical} \cr
15+
#' If \code{TRUE}, warnings will be printed e.g. if a protein has no cleavage site.
16+
#' @param remove_initial_M \strong{logical} \cr
17+
#' If \code{TRUE}, the initial methionine will be removed from the peptides.
1318
#'
14-
#' @return vector of peptides
19+
#' @return A vector of peptides.
1520
#' @export
1621
#'
22+
#' @seealso [digest_fasta()]
23+
#'
1724
#' @examples
1825
#' library(seqinr)
1926
#' file <- system.file("extdata", "2020_01_31_proteome_S_cerevisae.fasta", package = "bppg")
2027
#' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
2128
#'
2229
#' digested_proteins <- Digest2(fasta[[1]])
23-
30+
#'
2431

2532
Digest2 <- function (sequence, enzyme = "trypsin", missed = 0, warn = TRUE, remove_initial_M = FALSE) {
2633
seq_vector <- strsplit(sequence, split = "")[[1]]
@@ -131,25 +138,35 @@ Digest2 <- function (sequence, enzyme = "trypsin", missed = 0, warn = TRUE, remo
131138

132139

133140

134-
#' In silico tryptic digestion of whole FASTA file
141+
#' In silico tryptic digestion of whole FASTA file.
135142
#'
136-
#' @param fasta List of protein sequences (e.g., imported FASTA file by seqinr::read.fasta).
137-
#' @param missed_cleavages Maximal number of missed cleavages.
138-
#' @param min_aa Minimal number of amino acids (set to 0 for no filtering).
139-
#' @param max_aa Maximal number of amino acids (set to Inf for no filtering).
140-
#' @param ... Additional arguments for Digest2().
143+
#' @param fasta \strong{list of vector of characters} \cr
144+
#' A fasta file, already read into R by [seqinr::read.fasta()].
145+
#' @param missed_cleavages \strong{integer} \cr
146+
#' The maximal number of missed cleavages.
147+
#' @param min_aa \strong{integer} \cr
148+
#' The minimal number of amino acids (set to 0 for no filtering).
149+
#' @param max_aa \strong{integer} \cr
150+
#' The maximal number of amino acids (set to Inf for no filtering).
151+
#' @param ... Additional arguments for [Digest2()].
141152
#'
142-
#' @return List of vectors of peptide sequences, filtered for minimal and maximal number of
143-
#' amino acids.
153+
#' @return List of vectors of peptide sequences, filtered for minimal and maximal number of amino acids.
144154
#' @export
145155
#'
156+
#' @seealso [Digest2()]
157+
#'
146158
#' @examples
147159
#' library(seqinr)
148160
#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
149161
#' fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE)
150162
#' res <- digest_fasta(fasta)
151163
#'
152-
digest_fasta <- function(fasta, missed_cleavages = 2, min_aa = 6, max_aa = 50, ...) {
164+
165+
digest_fasta <- function(fasta,
166+
missed_cleavages = 2,
167+
min_aa = 6,
168+
max_aa = 50,
169+
...) {
153170

154171
digested_proteins <- pbapply::pblapply(fasta, function(x) {
155172
sequ <- x

R/helpers-add_graph_attributes.R

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
1-
#' Adds vertex attributes with uniqueness of peptides and number of unique peptides
2-
#' for proteins
1+
#' Adds vertex attributes with uniqueness of peptides and number of unique peptides for proteins.
32
#'
4-
#' @param G graph
3+
#' @param G \strong{igraph graph object} \cr
4+
#' A peptide-protein graph.
55
#'
6-
#' @return graph with 2 additional vertex attributes, uniqueness and nr_unique_peptides
6+
#' @return A graph with 2 additional vertex attributes, uniqueness and nr_unique_peptides
77
#' @export
88
#'
9-
#' @examples # TODO
9+
#' @seealso [generate_graphs_from_FASTA()], [generate_quant_graphs()], [add_average_pep_ratio()]
10+
#'
11+
#' @examples
12+
1013
add_uniqueness_attributes <- function(G) {
1114

1215
### FALSE = peptide, TRUE = protein
@@ -37,16 +40,20 @@ add_uniqueness_attributes <- function(G) {
3740

3841

3942

40-
#' Adds average peptide ratios as a attribute to the graphs, if a list of peptide ratios is already present
43+
#' Adds average peptide ratios as a attribute to the graphs, if a list of peptide ratios is already present.
4144
#'
42-
#' @param G graph
43-
#' @param type not used at the moment. Default is 'geom_mean'
45+
#' @param G \strong{igraph graph object} \cr
46+
#' A peptide-protein graph.
47+
#' @param type \strong{character} \cr
48+
#' !NOT USED AT THE MOMENT!
4449
#'
45-
#' @return graphs with added attributes
50+
#' @return A graph with added peptide ratio attributes.
4651
#' @export
4752
#'
53+
#' @seealso [generate_graphs_from_FASTA()], [generate_quant_graphs()], [add_uniqueness_attributes()]
54+
#'
4855
#' @examples
49-
#' # TODO
56+
5057
add_average_pep_ratio <- function(G, type = "geom_mean") {
5158

5259
pep_ratio <- igraph::V(G)$pep_ratio

R/helpers-assign_protein_accessions.R

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
#' Assign protein accessions to a list of peptides, depending on a FASTA file.
22
#'
3-
#' @param sequence vector of peptide sequences
4-
#' @param fasta_vec names vector of protein sequences from fasta file(s)
3+
#' @param sequence \strong{character vector} \cr
4+
#' The peptide sequences.
5+
#' @param fasta_vec \strong{character vector} \cr
6+
#' The names vector of protein sequences from fasta file(s).
57
#'
6-
#' @return list of assigned proteins
8+
#' @return A list of assigned proteins.
79
#' @export
810
#'
11+
#' @seealso [generate_graphs_from_FASTA()], [generate_quant_graphs()]
12+
#'
913
#' @examples
10-
#' ### TODO
14+
1115
assign_protein_accessions <- function(sequence, fasta_vec) {
1216

1317
protein_accessions <- names(fasta_vec)

R/helpers-collapse_nodes_edgelist.R

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
#' Collapsing of peptide and protein nodes of an edgelist.
22
#'
3-
#' @param edgelist edgelist
4-
#' @param collapse_protein_nodes bla
5-
#' @param collapse_peptide_nodes bla
3+
#' @param edgelist \strong{data.frame} \cr
4+
#' An edgelist eg. created with [generate_edgelist()].
5+
#' @param collapse_protein_nodes \strong{logical} \cr
6+
#' If \code{TRUE}, the protein nodes will be collapsed.
7+
#' @param collapse_peptide_nodes \strong{logical} \cr
8+
#' If \code{TRUE}, the peptide nodes will be collapsed.
69
#'
7-
#' @return Edgelist with collapsed protein and peptide nodes
10+
#' @return An edgelist with collapsed protein and/or peptide nodes.
811
#' @export
912
#'
13+
#' @seealso For edgelists with peptide ratios: [collapse_edgelist_quant()] \cr
14+
#' [generate_graphs_from_FASTA()], [generate_quant_graphs()], [generate_edgelist()]
15+
#'
1016
#' @examples
1117
#' library(seqinr)
1218
#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
@@ -50,7 +56,6 @@ collapse_edgelist <- function(edgelist,
5056

5157

5258
edgelist2 <- edgelist
53-
#keep <- logical(nrow(edgelist2))
5459

5560
pepNodes2 <- pepNodes
5661
pepNodes2$peptide <- limma::strsplit2(pepNodes2$peptide, ";")[,1] # first peptide from list

R/helpers-collapse_nodes_edgelist_quant_pepratio.R

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
#' Collapsing of peptide and protein nodes of an edgelist.
22
#'
3-
#' @param edgelist edgelist
4-
#' @param collapse_protein_nodes bla
5-
#' @param collapse_peptide_nodes bla
3+
#' @param edgelist \strong{data.frame} \cr
4+
#' An edgelist with peptide ratios eg. created with [generate_edgelist()].
5+
#' @param collapse_protein_nodes \strong{logical} \cr
6+
#' If \code{TRUE}, the protein nodes will be collapsed.
7+
#' @param collapse_peptide_nodes \strong{logical} \cr
8+
#' If \code{TRUE}, the peptide nodes will be collapsed.
69
#'
7-
#' @return Edgelist with collapsed protein and peptide nodes
10+
#' @return An edgelist with collapsed protein and/or peptide nodes.
811
#' @export
912
#'
13+
#' @seealso For edgelists without peptide ratios: [collapse_edgelist()] \cr
14+
#' [generate_graphs_from_FASTA()], [generate_quant_graphs()], [generate_edgelist()]
15+
#'
1016
#' @examples
1117
#' library(seqinr)
1218
#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
@@ -18,8 +24,8 @@
1824

1925

2026
collapse_edgelist_quant <- function(edgelist,
21-
collapse_protein_nodes = TRUE,
22-
collapse_peptide_nodes = TRUE) {
27+
collapse_protein_nodes = TRUE,
28+
collapse_peptide_nodes = TRUE) {
2329

2430
if (!collapse_protein_nodes & !collapse_peptide_nodes) {
2531
return(edgelist)
@@ -55,7 +61,6 @@ collapse_edgelist_quant <- function(edgelist,
5561
pepNodes2$peptide <- limma::strsplit2(pepNodes2$peptide, ";")[,1] # first peptide from list
5662
edgelist2 <- edgelist[edgelist$peptide %in% pepNodes2$peptide,]
5763

58-
5964
protNodes2 <- protNodes
6065
protNodes2$protein <- limma::strsplit2(protNodes2$protein, ";")[,1] # first peptide from list
6166
edgelist3 <- edgelist2[edgelist2$protein %in% protNodes2$protein,]

R/helpers-convertToBipartiteGraph.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
#' Conversion of submatrices to subgraphs.
22
#'
3-
#' @param x element of a submatrix list
3+
#' @param x \strong{matrix} \cr
4+
#' An element of a submatrix list.
45
#'
5-
#' @return graph as igraph object
6+
#' @return A graph as igraph object.
67
#' @export
78
#'
89
#' @examples
910
#' M <- matrix(c(1,0,1,1), nrow = 2, byrow = TRUE)
1011
#' bppg::convertToBipartiteGraph(M)
12+
1113
convertToBipartiteGraph <- function(x) {
1214
if ("list" %in% class(x)) { # class list if it contains peptide ratios
1315
S <- x$X

R/helpers-generate_edgelist.R

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
1-
#' Generate edgelist from list of in silico digested proteins
1+
#' Generate edgelist from list of in silico digested proteins.
22
#'
3-
#' @param digested_proteins Output from digest_fasta() (List of vectors of peptide sequences)
4-
#' @param prot_origin origin of the protein (e.g. organism, spike-in/background etc)
3+
#' @param digested_proteins \strong{list of vector of characters} \cr
4+
#' The output from [digest_fasta()] (List of vectors of peptide sequences)
5+
#' @param prot_origin \strong{vector of characters} \cr
6+
#' origin of the protein (e.g. organism, spike-in/background etc)
57
#'
6-
#' @return edgelist
8+
#' @return An edgelist.
79
#' @export
810
#'
11+
#' @seealso [digest_fasta()]
12+
#'
913
#' @examples
1014
#' library(seqinr)
1115
#' file <- system.file("extdata", "uniprot_test.fasta", package = "bppg")
@@ -14,6 +18,7 @@
1418
#' edgelist <- generate_edgelist(digested_proteins)
1519
#'
1620
#'
21+
1722
generate_edgelist <- function(digested_proteins, prot_origin = NULL) {
1823
#calculate necessary number of edges by counting the peptides belonging to each protein
1924
mat_length <- sum(lengths(digested_proteins))

R/helpers-generate_graphs_via_edgelist.R

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1-
#' Generate bipartite peptide-protein graphs from a list of digested proteins via an edgelist
1+
#' Generate bipartite peptide-protein graphs from a list of digested proteins via an edgelist.
22
#'
3-
#' @param edgelist Output from generate_edgelist (edgelist)
3+
#' @param edgelist \strong{data.frame} \cr
4+
#' An edgelist, output from [generate_edgelist()].
45
#'
5-
#' @return List of subgraphs as igraph objects.
6+
#' @return A list of subgraphs as igraph objects.
67
#' @export
78
#'
9+
#' @seealso [generate_edgelist()]
10+
#'
811
#' @examples
912
#' ### TODO: example takes longer than 5s
1013
#' library(seqinr)

0 commit comments

Comments
 (0)