... | ... |
@@ -7,4 +7,14 @@ Maintainer: Markus List <[email protected]> |
7 | 7 |
Description: This package provides methods to efficiently detect competitive endogeneous RNA interactions between two genes. Such interactions are mediated by one or several miRNAs such that both gene and miRNA expression data for a larger number of samples is needed as input. |
8 | 8 |
License: GPLv3 |
9 | 9 |
LazyData: TRUE |
10 |
-RoxygenNote: 5.0.1 |
|
11 | 10 |
\ No newline at end of file |
11 |
+RoxygenNote: 5.0.1 |
|
12 |
+Suggests: |
|
13 |
+ testthat, |
|
14 |
+ ggplot2, |
|
15 |
+ d3network |
|
16 |
+Imports: |
|
17 |
+ ppcor, |
|
18 |
+ logging, |
|
19 |
+ foreach, |
|
20 |
+ dplyr, |
|
21 |
+ stringr |
... | ... |
@@ -1,15 +1,17 @@ |
1 | 1 |
# Generated by roxygen2: do not edit by hand |
2 | 2 |
|
3 | 3 |
export(gene_miRNA_interaction_filter) |
4 |
-export(genes_pairwise_combinations) |
|
5 | 4 |
export(sponge) |
5 |
+export(sponge_subsampling) |
|
6 | 6 |
import(dplyr) |
7 | 7 |
import(foreach) |
8 |
+import(ggplot2) |
|
8 | 9 |
import(glmnet) |
9 | 10 |
import(logging) |
10 | 11 |
import(mirbase.db) |
11 | 12 |
import(ppcor) |
12 | 13 |
import(targetscan.Hs.eg.db) |
14 |
+importFrom(foreach,foreach) |
|
13 | 15 |
importFrom(gRbase,combnPrim) |
14 | 16 |
importFrom(glmnet,cv.glmnet) |
15 | 17 |
importFrom(iterators,icount) |
... | ... |
@@ -10,6 +10,11 @@ |
10 | 10 |
#' with samples in columns and miRNA in rows |
11 | 11 |
"mir_expr" |
12 | 12 |
|
13 |
+#' miRNA / gene interactions |
|
14 |
+#' |
|
15 |
+#' @format A data frame of regression coefficients |
|
16 |
+#' typically provided by gene_miRNA_interaction_filter |
|
17 |
+"mir_interactions" |
|
13 | 18 |
|
14 | 19 |
#' miRNA family mapping table |
15 | 20 |
#' |
... | ... |
@@ -1,15 +1,61 @@ |
1 |
-sparc_network_centralities <- function(network, directed = FALSE){ |
|
1 |
+#' Computes various node centralities |
|
2 |
+#' |
|
3 |
+#' @description Computes degree, eigenvector centrality and betweenness |
|
4 |
+#' centrality for the ceRNA interaction network induced by the results of the |
|
5 |
+#' SPONGE method |
|
6 |
+#' |
|
7 |
+#' @param sponge_result output of the sponge method |
|
8 |
+#' |
|
9 |
+#' @importFrom igraph graph.data.frame |
|
10 |
+#' @importFrom igraph eigen_centrality |
|
11 |
+#' @importFrom igraph betweenness |
|
12 |
+#' @importFrom igraph degree |
|
13 |
+#' |
|
14 |
+#' @return data.table with gene, degree, eigenvector and betweenness |
|
15 |
+#' @export |
|
16 |
+#' |
|
17 |
+#' @seealso sponge |
|
18 |
+#' |
|
19 |
+#' @examples |
|
20 |
+sponge_node_centralities <- function(sponge_result){ |
|
21 |
+ directed <- FALSE |
|
2 | 22 |
|
3 |
- ev_centrality <- eigen_centrality(network, directed = directed) |
|
4 |
- btw_centrality <- betweenness(network, directed = directed) |
|
5 |
- centrality_df <- data.frame(gene = names(ev_centrality$vector), |
|
23 |
+ network <- igraph::graph.data.frame(sponge_result) |
|
24 |
+ |
|
25 |
+ ev_centrality <- igraph::eigen_centrality(network, directed = directed) |
|
26 |
+ btw_centrality <- igraph::betweenness(network, directed = directed) |
|
27 |
+ centrality_df <- data.table(gene = names(ev_centrality$vector), |
|
6 | 28 |
degree = igraph::degree(network), |
7 | 29 |
eigenvector = ev_centrality$vector, |
8 | 30 |
betweenness = btw_centrality) |
9 | 31 |
return(centrality_df) |
10 | 32 |
} |
11 | 33 |
|
12 |
-sparc_edge_betweenness <- function(network, directed = FALSE){ |
|
34 |
+#' Computes various edge centralities |
|
35 |
+#' |
|
36 |
+#' @description Computes edge betweenness |
|
37 |
+#' centrality for the ceRNA interaction network induced by the results of the |
|
38 |
+#' SPONGE method. |
|
39 |
+#' |
|
40 |
+#' @param sponge_result |
|
41 |
+#' |
|
42 |
+#' @importFrom igraph graph.data.frame |
|
43 |
+#' @importFrom igraph E |
|
44 |
+#' @importFrom igraph edge_betweenness |
|
45 |
+#' @importFrom igraph degree |
|
46 |
+#' @importFrom tidyr separate |
|
47 |
+#' |
|
48 |
+#' @return data.table with gene, degree, eigenvector and betweenness |
|
49 |
+#' @export |
|
50 |
+#' |
|
51 |
+#' @seealso sponge |
|
52 |
+#' |
|
53 |
+#' @examples |
|
54 |
+sponge_edge_centralities <- function(sponge_result){ |
|
55 |
+ directed <- FALSE |
|
56 |
+ |
|
57 |
+ network <- igraph::graph.data.frame(sponge_result) |
|
58 |
+ |
|
13 | 59 |
edge_labels <- attr(E(network), "vnames") |
14 | 60 |
ebtw <- edge_betweenness(network, directed = directed) |
15 | 61 |
ebtw <- data.frame(labels = edge_labels, edge_betweenness = ebtw) |
... | ... |
@@ -17,19 +63,19 @@ sparc_edge_betweenness <- function(network, directed = FALSE){ |
17 | 63 |
return(ebtw) |
18 | 64 |
} |
19 | 65 |
|
20 |
-sparc_matched_edge_betweenness <- function(cancer_network, normal_network, directed = FALSE){ |
|
21 |
- liver_cancer_edge_betweenness <- sparc_edge_betweenness(network = liver_cancer_sparc_network) |
|
22 |
- liver_normal_edge_betweenness <- sparc_edge_betweenness(network = liver_normal_sparc_network) |
|
66 |
+sponge_matched_edge_betweenness <- function(cancer_network, normal_network, directed = FALSE){ |
|
67 |
+ liver_cancer_edge_betweenness <- sponge_edge_betweenness(network = liver_cancer_sponge_network) |
|
68 |
+ liver_normal_edge_betweenness <- sponge_edge_betweenness(network = liver_normal_sponge_network) |
|
23 | 69 |
|
24 | 70 |
dplyr::inner_join(liver_cancer_edge_betweenness, liver_normal_edge_betweenness, by = c("source_gene", "target_gene")) %>% |
25 | 71 |
dplyr::rename(edge_betweenness_cancer = edge_betweenness.x, edge_betweenness_normal = edge_betweenness.y) %>% |
26 | 72 |
dplyr::mutate(edge_betweenness_diff = edge_betweenness_cancer - edge_betweenness_normal) |
27 | 73 |
} |
28 | 74 |
|
29 |
-sparc_matched_network_centralities <- function(cancer_network, normal_network, directed = FALSE){ |
|
75 |
+sponge_matched_network_centralities <- function(cancer_network, normal_network, directed = FALSE){ |
|
30 | 76 |
|
31 |
- cancer_centrality_df <- sparc_network_centralities(cancer_network, directed) |
|
32 |
- normal_centrality_df <- sparc_network_centralities(normal_network, directed) |
|
77 |
+ cancer_centrality_df <- sponge_network_centralities(cancer_network, directed) |
|
78 |
+ normal_centrality_df <- sponge_network_centralities(normal_network, directed) |
|
33 | 79 |
|
34 | 80 |
centrality_matched <- dplyr::full_join(normal_centrality_df, cancer_centrality_df, by="gene") %>% |
35 | 81 |
dplyr::rename(eigenvector_centrality.normal = eigenvector.x, |
... | ... |
@@ -51,7 +97,7 @@ sparc_matched_network_centralities <- function(cancer_network, normal_network, d |
51 | 97 |
return(centrality_matched) |
52 | 98 |
} |
53 | 99 |
|
54 |
-sparc_transform_centralities_for_plotting <- function(network_centralities){ |
|
100 |
+sponge_transform_centralities_for_plotting <- function(network_centralities){ |
|
55 | 101 |
library(dplyr) |
56 | 102 |
library(digest) |
57 | 103 |
plot.data <- bind_rows(dplyr::select(network_centralities, |
... | ... |
@@ -70,7 +116,7 @@ sparc_transform_centralities_for_plotting <- function(network_centralities){ |
70 | 116 |
return(plot.data) |
71 | 117 |
} |
72 | 118 |
|
73 |
-sparc_plot_top_edge_betweenness <- function(edge_betweenness, n){ |
|
119 |
+sponge_plot_edge_centralities <- function(edge_centralities, n){ |
|
74 | 120 |
top_x <- head(dplyr::arrange(edge_betweenness, desc(edge_betweenness)), n) %>% |
75 | 121 |
dplyr::mutate(edge = paste(source_gene, target_gene, sep="|")) |
76 | 122 |
ggplot(top_x, aes(x = reorder(edge, -edge_betweenness), y = edge_betweenness)) + |
... | ... |
@@ -81,11 +127,11 @@ sparc_plot_top_edge_betweenness <- function(edge_betweenness, n){ |
81 | 127 |
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) |
82 | 128 |
} |
83 | 129 |
|
84 |
-sparc_plot_network_centralities <- function(network_centralities, measure="all"){ |
|
130 |
+sponge_plot_network_centralities <- function(network_centralities, measure="all"){ |
|
85 | 131 |
library(ggplot2) |
86 | 132 |
library(ggrepel) |
87 | 133 |
|
88 |
- plot.data <- sparc_transform_centralities_for_plotting(network_centralities) |
|
134 |
+ plot.data <- sponge_transform_centralities_for_plotting(network_centralities) |
|
89 | 135 |
|
90 | 136 |
p1 <- ggplot(plot.data, aes(x=degree)) + |
91 | 137 |
geom_histogram(color=I("black"), fill=I("black"), alpha = 0.3)+ |
... | ... |
@@ -119,7 +165,7 @@ sparc_plot_network_centralities <- function(network_centralities, measure="all") |
119 | 165 |
ncol = 1) |
120 | 166 |
} |
121 | 167 |
|
122 |
-sparc_plot_eigenvector_centralities_differences <- function(network_centralities, label.threshold){ |
|
168 |
+sponge_plot_eigenvector_centralities_differences <- function(network_centralities, label.threshold){ |
|
123 | 169 |
network_centralities <- network_centralities %>% mutate(color = paste("#", substr(sapply(gene, function(x) digest(x, algo = "crc32")), 1, 6), sep="")) |
124 | 170 |
|
125 | 171 |
ggplot(data = network_centralities, |
... | ... |
@@ -133,7 +179,7 @@ sparc_plot_eigenvector_centralities_differences <- function(network_centralities |
133 | 179 |
geom_label_repel(data = dplyr::filter(network_centralities, abs(eigenvector_centrality.diff) > label.threshold)) |
134 | 180 |
} |
135 | 181 |
|
136 |
-sparc_plot_betweenness_centralities_differences <- function(network_centralities, label.threshold){ |
|
182 |
+sponge_plot_betweenness_centralities_differences <- function(network_centralities, label.threshold){ |
|
137 | 183 |
network_centralities <- network_centralities %>% mutate(color = paste("#", substr(sapply(gene, function(x) digest(x, algo = "crc32")), 1, 6), sep="")) |
138 | 184 |
|
139 | 185 |
ggplot(data = network_centralities, |
... | ... |
@@ -147,8 +193,8 @@ sparc_plot_betweenness_centralities_differences <- function(network_centralities |
147 | 193 |
geom_label_repel(data = dplyr::filter(network_centralities, abs(betweenness_centrality.diff) > label.threshold)) |
148 | 194 |
} |
149 | 195 |
|
150 |
-sparc_plot_top_centralities <- function(network_centralities, top = 50, |
|
151 |
- known.sparc.genes = c("ESR1", "CD44", "LIN28B", "HULC", "KRAS1P", "HSUR1", "HSUR2", "BRAFP1", "VCAN", "LINCMD1", "H19"), |
|
196 |
+sponge_plot_top_centralities <- function(network_centralities, top = 50, |
|
197 |
+ known.sponge.genes = c("ESR1", "CD44", "LIN28B", "HULC", "KRAS1P", "HSUR1", "HSUR2", "BRAFP1", "VCAN", "LINCMD1", "H19"), |
|
152 | 198 |
known.cancer.genes = c("TP53", "ESR1", "CD44", "KRAS"), |
153 | 199 |
only=""){ |
154 | 200 |
|
... | ... |
@@ -165,7 +211,7 @@ sparc_plot_top_centralities <- function(network_centralities, top = 50, |
165 | 211 |
} |
166 | 212 |
|
167 | 213 |
network_centralities$ceRNA = "novel" |
168 |
- network_centralities[which(network_centralities$gene %in% known.sparc.genes), "ceRNA"] <- "known" |
|
214 |
+ network_centralities[which(network_centralities$gene %in% known.sponge.genes), "ceRNA"] <- "known" |
|
169 | 215 |
network_centralities$ceRNA <- factor(network_centralities$ceRNA, levels = c("novel", "known")) |
170 | 216 |
|
171 | 217 |
network_centralities$cancer = FALSE |
... | ... |
@@ -1,37 +1,37 @@ |
1 | 1 |
sponge_plot_density <- function(cancer_sponge_effects, normal_sponge_effects){ |
2 | 2 |
cancer_sponge_effects$type <- "Liver cancer" |
3 | 3 |
normal_sponge_effects$type <- "Liver normal" |
4 |
- |
|
5 |
- p1 <- ggplot(cancer_sponge_effects) + |
|
4 |
+ |
|
5 |
+ p1 <- ggplot(cancer_sponge_effects) + |
|
6 | 6 |
facet_wrap(~type) + |
7 | 7 |
xlab("Cohen's q") + |
8 |
- geom_density(aes(x = cohens_q, fill = I("cornflowerblue")), alpha = 0.6) |
|
9 |
- |
|
10 |
- p2 <- ggplot(normal_sponge_effects) + |
|
8 |
+ geom_density(aes(x = scor, fill = I("cornflowerblue")), alpha = 0.6) |
|
9 |
+ |
|
10 |
+ p2 <- ggplot(normal_sponge_effects) + |
|
11 | 11 |
facet_wrap(~type) + |
12 | 12 |
xlab("Cohen's q") + |
13 |
- geom_density(aes(x = cohens_q, fill = I("darkorchid1")), alpha = 0.6) |
|
14 |
- |
|
15 |
- p3 <- ggplot(cancer_sponge_effects) + |
|
13 |
+ geom_density(aes(x = scor, fill = I("darkorchid1")), alpha = 0.6) |
|
14 |
+ |
|
15 |
+ p3 <- ggplot(cancer_sponge_effects) + |
|
16 | 16 |
xlab("p-value") + |
17 |
- geom_density(aes(x = cohens_q_p, fill = I("cornflowerblue")), alpha = 0.6) |
|
18 |
- |
|
19 |
- p4 <- ggplot(normal_sponge_effects) + |
|
17 |
+ geom_density(aes(x = scor_p, fill = I("cornflowerblue")), alpha = 0.6) |
|
18 |
+ |
|
19 |
+ p4 <- ggplot(normal_sponge_effects) + |
|
20 | 20 |
xlab("p-value") + |
21 |
- geom_density(aes(x = cohens_q_p, fill = I("darkorchid1")), alpha = 0.6) |
|
22 |
- |
|
23 |
- p5 <- ggplot(cancer_sponge_effects) + |
|
21 |
+ geom_density(aes(x = scor_p, fill = I("darkorchid1")), alpha = 0.6) |
|
22 |
+ |
|
23 |
+ p5 <- ggplot(cancer_sponge_effects) + |
|
24 | 24 |
xlab("adjusted p-value (BH)") + |
25 |
- geom_density(aes(x = cohens_q_p.adj, fill = I("cornflowerblue")), alpha = 0.6) |
|
26 |
- |
|
27 |
- p6 <- ggplot(normal_sponge_effects) + |
|
25 |
+ geom_density(aes(x = scor_p.adj, fill = I("cornflowerblue")), alpha = 0.6) |
|
26 |
+ |
|
27 |
+ p6 <- ggplot(normal_sponge_effects) + |
|
28 | 28 |
xlab("ajusted p-value (BH)") + |
29 |
- geom_density(aes(x = cohens_q_p.adj, fill = I("darkorchid1")), alpha = 0.6) |
|
30 |
- |
|
29 |
+ geom_density(aes(x = scor_p.adj, fill = I("darkorchid1")), alpha = 0.6) |
|
30 |
+ |
|
31 | 31 |
grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 2) |
32 | 32 |
} |
33 | 33 |
|
34 |
-sponge_plot_heatmap <- function(data, interactive=T, show = "cohens_q"){ |
|
34 |
+sponge_plot_heatmap <- function(data, interactive=T, show = "scor"){ |
|
35 | 35 |
if(require(d3heatmap) && interactive){ |
36 | 36 |
sponge.matrix <- xtabs(p.adj ~ source_gene + target_gene, data = data) |
37 | 37 |
d3heatmap(sponge.matrix, dendrogram = "none", symm=T) |
... | ... |
@@ -43,8 +43,8 @@ sponge_plot_heatmap <- function(data, interactive=T, show = "cohens_q"){ |
43 | 43 |
data[data$p.adj < 0.05, "significance"] <- "*" |
44 | 44 |
data[data$p.adj < 0.01, "significance"] <- "**" |
45 | 45 |
data[data$p.adj < 0.001, "significance"] <- "***" |
46 |
- ggplot(data = data, aes_string(fill = show, x = "source_gene", y = "target_gene")) + |
|
47 |
- geom_tile() + |
|
46 |
+ ggplot(data = data, aes_string(fill = show, x = "source_gene", y = "target_gene")) + |
|
47 |
+ geom_tile() + |
|
48 | 48 |
theme_bw() + |
49 | 49 |
geom_text(label=data$significance) |
50 | 50 |
} |
... | ... |
@@ -55,23 +55,23 @@ sponge_plot_heatmap <- function(data, interactive=T, show = "cohens_q"){ |
55 | 55 |
|
56 | 56 |
sponge_plot_boxplot <- function(data){ |
57 | 57 |
if(!require(ggplot2)) stop("library ggplot2 needs to be installed for this plot") |
58 |
- |
|
59 |
- ggplot(data = data, aes(x = source_gene, y = cohens_q)) + geom_boxplot(fill = "skyblue", aes(outlier.color = p.adj)) + theme_bw() |
|
58 |
+ |
|
59 |
+ ggplot(data = data, aes(x = source_gene, y = scor)) + geom_boxplot(fill = "skyblue", aes(outlier.color = p.adj)) + theme_bw() |
|
60 | 60 |
} |
61 | 61 |
|
62 |
-sponge_network <- function(sponge.data, |
|
63 |
- mir.data, |
|
62 |
+sponge_network <- function(sponge.data, |
|
63 |
+ mir.data, |
|
64 | 64 |
target.genes = NULL, |
65 |
- cerna.p.adj.threshold = 0.05, |
|
66 |
- show.sponge.interaction = TRUE, |
|
65 |
+ cerna.p.val.threshold = 0.05, |
|
66 |
+ show.sponge.interaction = TRUE, |
|
67 | 67 |
show.mirnas = c("none", "all", "shared"), |
68 | 68 |
replace.mirna.with.name = TRUE, |
69 | 69 |
min.interactions = 3){ |
70 | 70 |
library(foreach) |
71 | 71 |
library(iterators) |
72 | 72 |
library(dplyr) |
73 |
- sponge.data <- filter(sponge.data, cohens_q_p.adj < cerna.p.adj.threshold) |
|
74 |
- |
|
73 |
+ sponge.data <- filter(sponge.data, p.val < cerna.p.val.threshold) |
|
74 |
+ |
|
75 | 75 |
#genes <- unique(c(as.character(sponge.data$source_gene), as.character(sponge.data$target_gene))) |
76 | 76 |
genes <- unique(c(as.character(sponge.data$source_gene), as.character(sponge.data$target_gene))) |
77 | 77 |
|
... | ... |
@@ -79,26 +79,26 @@ sponge_network <- function(sponge.data, |
79 | 79 |
nodes <- NULL |
80 | 80 |
|
81 | 81 |
#sponge genes edges |
82 |
- sponge.edges <- with(sponge.data, { |
|
83 |
- result <- data.frame(from=source_gene, to=target_gene, width = cohens_q, color="red") |
|
84 |
- |
|
82 |
+ sponge.edges <- with(sponge.data, { |
|
83 |
+ result <- data.frame(from=source_gene, to=target_gene, width = scor, color="red") |
|
84 |
+ |
|
85 | 85 |
result[which(result$to %in% genes | result$from %in% genes),] |
86 | 86 |
}) |
87 |
- |
|
87 |
+ |
|
88 | 88 |
if(show.sponge.interaction) edges <- rbind(edges, sponge.edges) |
89 |
- |
|
89 |
+ |
|
90 | 90 |
#sponge gene nodes |
91 | 91 |
sponge.nodes <- data.frame(id = genes, label = genes, shape = "square", color="darkgreen", type=FALSE, value=1) |
92 | 92 |
nodes <- rbind(nodes, sponge.nodes) |
93 |
- |
|
93 |
+ |
|
94 | 94 |
if(!is.null(target.genes)){ |
95 | 95 |
nodes[which(nodes$id %in% target.genes), "shape"] <- "square" |
96 | 96 |
nodes[which(nodes$id %in% target.genes), "color"] <- "blue" |
97 | 97 |
} |
98 |
- |
|
98 |
+ |
|
99 | 99 |
#mirna nodes and edges |
100 | 100 |
if(show.mirnas != "none"){ |
101 |
- |
|
101 |
+ |
|
102 | 102 |
mirna.edges <- foreach(gene = nodes$id, .combine=rbind) %do% { |
103 | 103 |
gene_mirnas <- mir.data[[as.character(gene)]] |
104 | 104 |
gene_mirnas <- dplyr::filter(gene_mirnas, coefficient < 0) |
... | ... |
@@ -111,50 +111,50 @@ sponge_network <- function(sponge.data, |
111 | 111 |
{ |
112 | 112 |
#consider only miRNAs shared with a source gene |
113 | 113 |
mirnas <- mirna.edges[which(mirna.edges$from %in% genes),"to"] |
114 |
- |
|
114 |
+ |
|
115 | 115 |
#count mirnas that appear form at least x edges |
116 | 116 |
mirnas <- intersect(mirnas, names(which(table(as.character(mirna.edges$to)) > min.interactions))) |
117 |
- |
|
117 |
+ |
|
118 | 118 |
mirna.edges <- mirna.edges[which(mirna.edges$to %in% mirnas),] |
119 |
- } |
|
120 |
- |
|
119 |
+ } |
|
120 |
+ |
|
121 | 121 |
else mirnas <- unique(as.character(mirna.edges$to)) |
122 | 122 |
|
123 | 123 |
if(length(mirnas) > 0){ |
124 | 124 |
nodes <- rbind(nodes, data.frame(id = mirnas, label = mirnas, shape = "triangle", color="darkblue", type=TRUE, value=1)) |
125 | 125 |
edges <- rbind(edges, mirna.edges) |
126 |
- |
|
126 |
+ |
|
127 | 127 |
#replace mirna with name |
128 | 128 |
if(replace.mirna.with.name){ |
129 | 129 |
|
130 | 130 |
nodes$label <- as.character(nodes$label) |
131 | 131 |
nodes[which(nodes$type), "label"] <- as.character(fn_map_mimat_to_mir(as.character(nodes[which(nodes$type), "id"]))) |
132 | 132 |
} |
133 |
- } |
|
133 |
+ } |
|
134 | 134 |
else{ |
135 | 135 |
stop("No miRNAs found that match all criteria") |
136 |
- } |
|
136 |
+ } |
|
137 | 137 |
} |
138 |
- |
|
138 |
+ |
|
139 | 139 |
#filter out orphan nodes |
140 | 140 |
nodes <- nodes[which(nodes$id %in% c(as.character(edges$to), as.character(edges$from))),] |
141 |
- |
|
141 |
+ |
|
142 | 142 |
return(list(nodes=nodes, edges=edges)) |
143 | 143 |
} |
144 | 144 |
|
145 |
-sponge_plot_network <- function(nodes, |
|
146 |
- edges, |
|
147 |
- layout="layout.fruchterman.reingold", |
|
145 |
+sponge_plot_network <- function(nodes, |
|
146 |
+ edges, |
|
147 |
+ layout="layout.fruchterman.reingold", |
|
148 | 148 |
force.directed = FALSE){ |
149 | 149 |
library(visNetwork) |
150 |
- |
|
150 |
+ |
|
151 | 151 |
if(nrow(edges) < 5000){ |
152 | 152 |
plot <- visNetwork(nodes, edges) |
153 |
- plot <- plot %>% visIgraphLayout(layout = layout, type="full", physics = force.directed) |
|
153 |
+ plot <- plot %>% visIgraphLayout(layout = layout, type="full", physics = force.directed) |
|
154 | 154 |
plot <- plot %>% visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE) |
155 | 155 |
plot <- plot %>% visNodes(font = list(size = 32)) |
156 | 156 |
plot <- plot %>% visEdges(color = list(opacity = 1)) |
157 |
- |
|
157 |
+ |
|
158 | 158 |
return(plot) |
159 | 159 |
} |
160 | 160 |
else{ |
161 | 161 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,86 @@ |
1 |
+compute_p_values <- function(partition, |
|
2 |
+ cov.matrices, |
|
3 |
+ number.of.datasets = 1e6, |
|
4 |
+ number.of.samples){ |
|
5 |
+ |
|
6 |
+ if(!("scor" %in% colnames(partition))) stop("sensitivity correlation missing") |
|
7 |
+ |
|
8 |
+ #check which k and m |
|
9 |
+ k <- as.character(partition[1,cor_cut]) |
|
10 |
+ m <- as.character(partition[1,df_cut]) |
|
11 |
+ |
|
12 |
+ #simulate data using the appropriate covariance matrices |
|
13 |
+ cov.matrices.partition <- cov.matrices[[m]][[k]] |
|
14 |
+ |
|
15 |
+ #to reach the necessary number of datasets we need to find out how many |
|
16 |
+ #datasets to construct from each covariance matrix we have |
|
17 |
+ number.of.datasets.per.matrix <- ceiling(number.of.datasets / length(cov.matrices.partition)) |
|
18 |
+ |
|
19 |
+ scor <- unlist(sample_zero_scor_data(cov.matrices = cov.matrices.partition, |
|
20 |
+ number.of.datasets = number.of.datasets.per.matrix, |
|
21 |
+ number.of.samples = number.of.samples)[1:number.of.datasets]) |
|
22 |
+ |
|
23 |
+ test_data_dt <- data.table(scor) #data.table(scor[which(scor > 0)]) |
|
24 |
+ setkey(test_data_dt, scor) |
|
25 |
+ number.of.datasets.on.right.side <- length(test_data_dt$scor) |
|
26 |
+ |
|
27 |
+ #partition_scor_positive <- partition[which(partition$scor >= 0),] |
|
28 |
+ |
|
29 |
+ partition$p.val <- (number.of.datasets.on.right.side - |
|
30 |
+ test_data_dt[J(partition$scor), |
|
31 |
+ .I, |
|
32 |
+ roll = "nearest", |
|
33 |
+ by = .EACHI]$I) / number.of.datasets.on.right.side |
|
34 |
+ |
|
35 |
+ return(partition) |
|
36 |
+} |
|
37 |
+ |
|
38 |
+sponge_compute_p_values <- function(sponge_result, |
|
39 |
+ cov.matrices, |
|
40 |
+ number.of.samples, |
|
41 |
+ number.of.datasets = 10, |
|
42 |
+ ms, ks){ |
|
43 |
+ |
|
44 |
+ #divide gene_gene correlation |
|
45 |
+ if(max(sponge_result$df) > 7) df_breaks <- c(seq(0,7), max(sponge_result$df)) |
|
46 |
+ else df_breaks <- seq(0, max(sponge_result$df)) |
|
47 |
+ |
|
48 |
+ sponge_result <- sponge_result[, |
|
49 |
+ c("cor_cut", "df_cut") := list( |
|
50 |
+ cut(abs(cor), breaks = c(0, seq(0.25, 0.85, 0.1), 1)), |
|
51 |
+ cut(df, breaks = df_breaks))] |
|
52 |
+ |
|
53 |
+ levels(sponge_result$cor_cut) <- ks |
|
54 |
+ levels(sponge_result$df_cut) <- ms |
|
55 |
+ |
|
56 |
+ isplitDT2 <- function(x, ks, ms) { |
|
57 |
+ ival <- iter(apply(expand.grid(ks, ms), 1, list)) |
|
58 |
+ nextEl <- function() { |
|
59 |
+ val <- nextElem(ival) |
|
60 |
+ list(value=x[.(as.character(val[[1]][1]), |
|
61 |
+ as.character(val[[1]][2]))], key=val[[1]]) |
|
62 |
+ } |
|
63 |
+ obj <- list(nextElem=nextEl) |
|
64 |
+ class(obj) <- c('abstractiter', 'iter') |
|
65 |
+ obj |
|
66 |
+ } |
|
67 |
+ |
|
68 |
+ dtcomb <- function(...) { |
|
69 |
+ rbindlist(list(...)) |
|
70 |
+ } |
|
71 |
+ |
|
72 |
+ setkey(sponge_result, cor_cut, df_cut) |
|
73 |
+ |
|
74 |
+ foreach(dt.m=isplitDT2(sponge_result, ks, ms), |
|
75 |
+ .combine='dtcomb', |
|
76 |
+ .multicombine=TRUE, |
|
77 |
+ .export = c("compute_p_values", |
|
78 |
+ "sample_zero_scor_data"), |
|
79 |
+ .packages = c("gRbase", "MASS", "ppcor", "foreach", "logging", "data.table"), |
|
80 |
+ .noexport = c("sponge_result")) %dopar% { |
|
81 |
+ compute_p_values(partition = dt.m$value, |
|
82 |
+ cov.matrices = cov.matrices, |
|
83 |
+ number.of.datasets = number.of.datasets, |
|
84 |
+ number.of.samples = number.of.samples) |
|
85 |
+ } |
|
86 |
+} |
|
0 | 87 |
\ No newline at end of file |
1 | 88 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,287 @@ |
1 |
+library(MASS) |
|
2 |
+library(foreach) |
|
3 |
+library(gRbase) |
|
4 |
+library(testthat) |
|
5 |
+library(ppcor) |
|
6 |
+library(expm) |
|
7 |
+library(logging) |
|
8 |
+ |
|
9 |
+#generates a positive semi definite matrix |
|
10 |
+posdef <- function (n, ev = runif(n, 0, 10)) |
|
11 |
+{ |
|
12 |
+ Z <- matrix(ncol=n, rnorm(n^2)) |
|
13 |
+ decomp <- qr(Z) |
|
14 |
+ Q <- qr.Q(decomp) |
|
15 |
+ R <- qr.R(decomp) |
|
16 |
+ d <- diag(R) |
|
17 |
+ ph <- d / abs(d) |
|
18 |
+ O <- Q %*% diag(ph) |
|
19 |
+ Z <- t(O) %*% diag(ev) %*% O |
|
20 |
+ return(Z) |
|
21 |
+} |
|
22 |
+ |
|
23 |
+#computes the schur complement with respect to R22 |
|
24 |
+schur<-function(R) { |
|
25 |
+ ((R[1:2,1:2]) - (R[1:2,3:ncol(R)]) %*% (solve(R[3:nrow(R),3:ncol(R)])) %*% t(R[1:2,3:ncol(R)])) |
|
26 |
+} |
|
27 |
+ |
|
28 |
+#computes sensitivity correlation |
|
29 |
+get.q = function(S) { |
|
30 |
+ s11 = S[1:2,1:2] #gene-gene covariance |
|
31 |
+ pva = schur(S) #partial covariance |
|
32 |
+ q = s11/outer(sqrt(diag(s11)),sqrt(diag(s11))) - pva / outer(sqrt(diag(pva)),sqrt(diag(pva))) #- sens - corr |
|
33 |
+ return(q) |
|
34 |
+} |
|
35 |
+ |
|
36 |
+#computes lambda from the dot product |
|
37 |
+checkLambda <- function(x,y){ |
|
38 |
+ if(length(x) != length(y)) stop("input should be two vectors of equal length") |
|
39 |
+ beta <- foreach(i = 1:length(x), .combine = sum) %do% {x[i] * y[i]} |
|
40 |
+ norm.x <- base::norm(x,type="2") |
|
41 |
+ norm.y <- base::norm(y,type="2") |
|
42 |
+ (beta / (norm.x * norm.y)) |
|
43 |
+} |
|
44 |
+ |
|
45 |
+quadraticSolver <- function(a, b, c){ |
|
46 |
+ bsquare4ac <- b^2 - 4 * a * c |
|
47 |
+ |
|
48 |
+ tryCatch({ |
|
49 |
+ if(bsquare4ac < 0 || a == 0){ |
|
50 |
+ return(NULL) |
|
51 |
+ } |
|
52 |
+ |
|
53 |
+ solution.1 <- (-b + sqrt(bsquare4ac)) / (2 * a) |
|
54 |
+ solution.2 <- (-b - sqrt(bsquare4ac)) / (2 * a) |
|
55 |
+ |
|
56 |
+ return(list(solution.1, solution.2)) |
|
57 |
+ }, error = function(e){ |
|
58 |
+ return(NULL) |
|
59 |
+ }) |
|
60 |
+} |
|
61 |
+ |
|
62 |
+#main method for sampling zero sensitivity covariance matrices |
|
63 |
+sample_zero_scor_cov <- function(m, number.of.solutions, |
|
64 |
+ number.of.attempts = 1e3, |
|
65 |
+ gene_gene_correlation = NULL, |
|
66 |
+ log.level = "INFO"){ |
|
67 |
+ |
|
68 |
+ solutions <- foreach(solution = 1:number.of.solutions, |
|
69 |
+ .packages = c("MASS", "gRbase", "testthat", |
|
70 |
+ "ppcor", "expm", "logging", |
|
71 |
+ "foreach"), |
|
72 |
+ .export = c("quadraticSolver", |
|
73 |
+ "checkLambda", |
|
74 |
+ "get.q", |
|
75 |
+ "schur", |
|
76 |
+ "posdef")) %dopar% { |
|
77 |
+ total <- 0 |
|
78 |
+ basicConfig(level = log.level) |
|
79 |
+ |
|
80 |
+ loginfo(paste("Looking for zero sensitivity covariance matrix for case m =", |
|
81 |
+ m, "solution no.", solution)) |
|
82 |
+ #a lot of solutions are not within our constraints, so we repeat |
|
83 |
+ while(total < number.of.attempts){ |
|
84 |
+ total <- total + 1 |
|
85 |
+ if(is.null(gene_gene_correlation)) K = runif(1,-1,1) # R11 |
|
86 |
+ else K = gene_gene_correlation |
|
87 |
+ v1 <- runif(m, -1, 1) #random v1 |
|
88 |
+ |
|
89 |
+ if(m == 1){ |
|
90 |
+ R22 <- 1 |
|
91 |
+ Z <- v1 |
|
92 |
+ |
|
93 |
+ a <- K^2 + Z^2 - K^2 * Z^2 |
|
94 |
+ b <- -2 * K * Z |
|
95 |
+ c <- Z^2 * K^2 |
|
96 |
+ |
|
97 |
+ v2.solutions <- quadraticSolver(a, b, c) |
|
98 |
+ if(is.null(v2.solutions)){ |
|
99 |
+ logdebug("no solution for v2 found") |
|
100 |
+ next |
|
101 |
+ } |
|
102 |
+ else{ |
|
103 |
+ v2 <- v2.solutions[[1]] |
|
104 |
+ } |
|
105 |
+ } |
|
106 |
+ else{ |
|
107 |
+ lambda = runif(1,-1,1) #cos theta |
|
108 |
+ |
|
109 |
+ R22 <- cov2cor(posdef(m)) #miRNA correlation |
|
110 |
+ R22_invsqrt <- ginv(sqrtm(R22)) #R^-1/2 |
|
111 |
+ |
|
112 |
+ u1 <- (R22_invsqrt %*% v1)[,1] |
|
113 |
+ u1.norm = base::norm(u1, type = "2") |
|
114 |
+ |
|
115 |
+ #solving quadratic equation to obtain u2 solutions |
|
116 |
+ a <- ((K^2 - lambda^2) * u1.norm^2 - K^2) |
|
117 |
+ b <- 2 * lambda * K * u1.norm |
|
118 |
+ c <- -K^2 * u1.norm^2 |
|
119 |
+ |
|
120 |
+ u2.solutions <- quadraticSolver(a, b, c) |
|
121 |
+ if(is.null(u2.solutions)){ |
|
122 |
+ logdebug("no solution for u2") |
|
123 |
+ next |
|
124 |
+ } |
|
125 |
+ |
|
126 |
+ u2.norm.1 <- u2.solutions[[1]] |
|
127 |
+ u2.norm.2 <- u2.solutions[[2]] |
|
128 |
+ |
|
129 |
+ r12.m <- (K - sqrt(u1.norm^2) * sqrt(u2.norm.1^2) * lambda) * (1 - u1.norm^2)^-(1/2) * (1 - u2.norm.1^2)^-(1/2) |
|
130 |
+ |
|
131 |
+ if(is.nan(r12.m)){ |
|
132 |
+ r12.m <- (K - sqrt(u1.norm^2) * sqrt(u2.norm.2^2) * lambda) * (1 - u1.norm^2)^-(1/2) * (1 - u2.norm.2^2)^-(1/2) |
|
133 |
+ } |
|
134 |
+ |
|
135 |
+ if(is.nan(r12.m)){ |
|
136 |
+ logdebug("||u2|| is not a valid solution") |
|
137 |
+ next |
|
138 |
+ } |
|
139 |
+ |
|
140 |
+ if(r12.m != K){ |
|
141 |
+ logdebug("correlation is not equal to partial correlation for selected ||u2||") |
|
142 |
+ next |
|
143 |
+ } |
|
144 |
+ |
|
145 |
+ constraints <- (R22_invsqrt %*% rep(1,m))[,1] |
|
146 |
+ |
|
147 |
+ if(anyNA(constraints)){ |
|
148 |
+ logdebug("R22^-(1/2) invalid. Can not compute constraints") |
|
149 |
+ next |
|
150 |
+ } |
|
151 |
+ |
|
152 |
+ u1.m <- u1[m] |
|
153 |
+ u2.wo_m <- foreach(i = 1:(m-1), .combine = c) %do%{ |
|
154 |
+ runif(1, -constraints[i], constraints[i]) |
|
155 |
+ } |
|
156 |
+ |
|
157 |
+ if(anyNA(u2.wo_m)){ |
|
158 |
+ logdebug("R22^-(1/2) invalid. Can not compute constraints") |
|
159 |
+ next |
|
160 |
+ } |
|
161 |
+ |
|
162 |
+ beta <- foreach(i = 1:(m-1), .combine = sum) %do% {u1[i] * u2.wo_m[i]} |
|
163 |
+ |
|
164 |
+ A <- u1.m^2 - lambda^2 * u1.norm^2 |
|
165 |
+ B <- 2 * beta * u1.m |
|
166 |
+ C <- beta^2 - lambda^2 * u1.norm^2 * sum(u2.wo_m^2) |
|
167 |
+ |
|
168 |
+ u2.solutions <- quadraticSolver(A, B, C) |
|
169 |
+ |
|
170 |
+ if(is.null(u2.solutions)){ |
|
171 |
+ logdebug("no solution for the k-th element of u2") |
|
172 |
+ next |
|
173 |
+ } |
|
174 |
+ #solve quadratic equation |
|
175 |
+ u2.k.1 <- u2.solutions[[1]] |
|
176 |
+ u2.k.2 <- u2.solutions[[2]] |
|
177 |
+ |
|
178 |
+ #check if lambda constraint is really fulfilled |
|
179 |
+ u2 <- c(u2.wo_m, u2.k.1) |
|
180 |
+ |
|
181 |
+ #if the sign is wrong we have to take the other solution |
|
182 |
+ if(checkLambda(u1, u2) != lambda) |
|
183 |
+ u2 <- c(u2.wo_m, u2.k.2) |
|
184 |
+ |
|
185 |
+ #normalize and then multiply by ||u2|| |
|
186 |
+ u2.scaled <- u2 / base::norm(u2, type = "2") * u2.norm.1 |
|
187 |
+ |
|
188 |
+ i <- 1 |
|
189 |
+ within_constraints <- TRUE |
|
190 |
+ while(i < length(u2.scaled)){ |
|
191 |
+ if(u2.scaled[i] > constraints[i] || u2.scaled[i] < -constraints[i]) { |
|
192 |
+ within_constraints <- FALSE |
|
193 |
+ break |
|
194 |
+ } |
|
195 |
+ i <- i+1 |
|
196 |
+ } |
|
197 |
+ |
|
198 |
+ if(!within_constraints){ |
|
199 |
+ logdebug("solution violates constraints") |
|
200 |
+ next |
|
201 |
+ } |
|
202 |
+ else |
|
203 |
+ { |
|
204 |
+ #compute v2 |
|
205 |
+ v2 = (solve(R22_invsqrt) %*% u2.scaled)[,1] |
|
206 |
+ } |
|
207 |
+ } |
|
208 |
+ |
|
209 |
+ #construct correlation matrix R |
|
210 |
+ R <- matrix(ncol = m+2, nrow = m+2) |
|
211 |
+ diag(R) <- 1 |
|
212 |
+ R[1,2] <- K |
|
213 |
+ R[2,1] <- K |
|
214 |
+ R[3:(2+m),3:(2+m)] <- R22 |
|
215 |
+ R[1,3:(2+m)] <- v1 |
|
216 |
+ R[2,3:(2+m)] <- v2 |
|
217 |
+ R[3:(2+m), 1] <- v1 |
|
218 |
+ R[3:(2+m), 2] <- v2 |
|
219 |
+ |
|
220 |
+ #generate covariance matrix S |
|
221 |
+ L = diag(exp(rnorm(2+m))) #- variances |
|
222 |
+ S = L %*% R %*% L #- covariance matrix |
|
223 |
+ |
|
224 |
+ #test for negative variance |
|
225 |
+ if(any(diag(schur(S)) < 0)){ |
|
226 |
+ logdebug("negative variance in partial covariance matrix") |
|
227 |
+ next |
|
228 |
+ } |
|
229 |
+ |
|
230 |
+ #test sensitivity correlation is zero |
|
231 |
+ scor <- get.q(S)[1,2] |
|
232 |
+ if(is.nan(scor)){ |
|
233 |
+ logdebug("sensitivity correlation is NaN") |
|
234 |
+ next |
|
235 |
+ } |
|
236 |
+ else if(abs(scor) > sqrt(.Machine$double.eps)){ |
|
237 |
+ logdebug("sensitivity correlation is not zero") |
|
238 |
+ next |
|
239 |
+ } |
|
240 |
+ else{ |
|
241 |
+ loginfo(paste("viable solution found for m =", m, |
|
242 |
+ "and k =", K, |
|
243 |
+ "solution no.", solution)) |
|
244 |
+ attr(S, "iterations") <- total |
|
245 |
+ attr(S, "k") <- K |
|
246 |
+ attr(S, "m") <- m |
|
247 |
+ return(S) |
|
248 |
+ } |
|
249 |
+ } |
|
250 |
+ } |
|
251 |
+ solutions <- Filter(Negate(is.null), solutions) |
|
252 |
+ if(length(solutions) == 0){ |
|
253 |
+ logerror("No solutions found") |
|
254 |
+ return(NULL) |
|
255 |
+ } |
|
256 |
+ else{ |
|
257 |
+ total <- sum(unlist(lapply(solutions, function(x){ |
|
258 |
+ attr(x, "iterations")}))) |
|
259 |
+ loginfo(paste("case k = ", gene_gene_correlation, "m = ", m, " - Found", |
|
260 |
+ length(solutions), "solutions in a total of", |
|
261 |
+ total, "iterations.")) |
|
262 |
+ } |
|
263 |
+ |
|
264 |
+ return(solutions) |
|
265 |
+} |
|
266 |
+ |
|
267 |
+ |
|
268 |
+sample_zero_scor_data <- function(cov.matrices, |
|
269 |
+ number.of.samples = 100, |
|
270 |
+ number.of.datasets = 100){ |
|
271 |
+ foreach(cov.matrix = cov.matrices, |
|
272 |
+ .packages = c("gRbase", "MASS", "ppcor", "foreach", "logging")) %dopar% { |
|
273 |
+ #check that sensitivity correlation is zero |
|
274 |
+ if(abs(cov2pcor(cov.matrix)[1,2] - cov2cor(cov.matrix)[1,2]) > sqrt(.Machine$double.eps)) |
|
275 |
+ stop("sensitivity correlation of a given covariance matrix is not zero.") |
|
276 |
+ |
|
277 |
+ #sample data under this covariance matrix |
|
278 |
+ foreach(i = 1:number.of.datasets, .combine = c) %do%{ |
|
279 |
+ sample.data <- mvrnorm(n = number.of.samples, |
|
280 |
+ rep(0, ncol(cov.matrix)), |
|
281 |
+ cov.matrix, |
|
282 |
+ empirical = FALSE) |
|
283 |
+ cor(sample.data)[1,2] - pcor(sample.data)$estimate[1,2] |
|
284 |
+ } |
|
285 |
+ } |
|
286 |
+} |
|
287 |
+ |
... | ... |
@@ -41,12 +41,12 @@ fn_map_mimats_to_mir <- function(mimats){ |
41 | 41 |
|
42 | 42 |
#' Compute all pairwise interactions for a number of genes as indices |
43 | 43 |
#' |
44 |
-#' @param number.of.genes |
|
44 |
+#' @param number.of.genes Number of genes for which all pairwise interactions |
|
45 |
+#' are needed |
|
45 | 46 |
#' @importFrom gRbase combnPrim |
46 | 47 |
#' |
47 | 48 |
#' @return data frame with one row per unique pairwise combination. To be used |
48 | 49 |
#' as input for the sponge method. |
49 |
-#' @export |
|
50 | 50 |
#' |
51 | 51 |
#' @examples genes_pairwise_combinations(ncol(gene_expr)) |
52 | 52 |
genes_pairwise_combinations <- function(number.of.genes){ |
... | ... |
@@ -71,6 +71,9 @@ genes_pairwise_combinations <- function(number.of.genes){ |
71 | 71 |
#' @param log.every.n write to the log after every n steps |
72 | 72 |
#' @param selected.genes Operate only on a subset of genes, particularly |
73 | 73 |
#' useful for bootstrapping |
74 |
+#' @param gene.combinations A data frame of combinations of genes to be tested. |
|
75 |
+#' Gene names are taken from the first two columns and have to match the names |
|
76 |
+#' used for gene_expr |
|
74 | 77 |
#' @param p.adj.method Multiple testing correction method. see ?p.adjust for |
75 | 78 |
#' details |
76 | 79 |
#' @param p.value.threshold Multiple testing p-value cutoff |
... | ... |
@@ -128,14 +131,19 @@ sponge <- function(gene_expr, mir_expr, mir_interactions, |
128 | 131 |
sel.genes <- available.selected.genes |
129 | 132 |
} |
130 | 133 |
|
131 |
- #consider only genes that have miRNA interactions |
|
132 |
- sel.genes <- Filter(Negate(is.null), sel.genes) |
|
134 |
+ genes.as.indices <- FALSE |
|
133 | 135 |
|
134 | 136 |
#all pairwise combinations of selected genes |
135 | 137 |
if(is.null(gene.combinations)){ |
136 | 138 |
loginfo("Computing all pairwise combinations of genes") |
139 |
+ |
|
140 |
+ #consider only genes that have miRNA interactions |
|
141 |
+ sel.genes <- Filter(Negate(is.null), sel.genes) |
|
142 |
+ |
|
137 | 143 |
gene.combinations <- |
138 | 144 |
genes_pairwise_combinations(length(sel.genes)) |
145 |
+ |
|
146 |
+ genes.as.indices <- TRUE |
|
139 | 147 |
} |
140 | 148 |
|
141 | 149 |
loginfo("Beginning SPONGE run...") |
... | ... |
@@ -162,9 +170,14 @@ sponge <- function(gene_expr, mir_expr, mir_interactions, |
162 | 170 |
result <- foreach(gene_combi = iter(gene_combis, by="row"), |
163 | 171 |
.combine=rbind) %do% { |
164 | 172 |
|
165 |
- geneA <- sel.genes[gene_combi[1]] |
|
166 |
- geneB <- sel.genes[gene_combi[2]] |
|
167 |
- |
|
173 |
+ if(genes.as.indices){ |
|
174 |
+ geneA <- sel.genes[gene_combi[1]] |
|
175 |
+ geneB <- sel.genes[gene_combi[2]] |
|
176 |
+ } |
|
177 |
+ else { |
|
178 |
+ geneA <- as.character(gene_combi[1,1]) |
|
179 |
+ geneB <- as.character(gene_combi[1,2]) |
|
180 |
+ } |
|
168 | 181 |
|
169 | 182 |
logdebug(paste("Processing source gene", geneA, |
170 | 183 |
"and target gene", geneB )) |
... | ... |
@@ -188,6 +201,13 @@ sponge <- function(gene_expr, mir_expr, mir_interactions, |
188 | 201 |
mir_interactions) |
189 | 202 |
} |
190 | 203 |
|
204 |
+ #check if shared miRNAs are in expression matrix |
|
205 |
+ if(length(setdiff(mir_intersect, colnames(mir_expr))) > 0){ |
|
206 |
+ logwarn(paste("Source gene", geneA, "and target gene", geneB, |
|
207 |
+ "shared miRNAs not found in mir_expr are discarded")) |
|
208 |
+ mir_intersect <- intersect(mir_intersect, colnames(mir_expr)) |
|
209 |
+ } |
|
210 |
+ |
|
191 | 211 |
#check if there are actually any shared mirnas |
192 | 212 |
if(length(mir_intersect) == 0){ |
193 | 213 |
logdebug(paste("Source gene", geneA, "and target gene", geneB, |
194 | 214 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,84 @@ |
1 |
+#' Sponge subsampling |
|
2 |
+#' @importFrom foreach foreach |
|
3 |
+#' @import ggplot2 |
|
4 |
+#' @import dplyr |
|
5 |
+#' |
|
6 |
+#' @param subsample.n the number of samples to be drawn in each round |
|
7 |
+#' @param subsample.repeats how often should the subsampling be done? |
|
8 |
+#' @param subsample.with.replacement logical, should we allow samples to be used |
|
9 |
+#' repeatedly |
|
10 |
+#' @param subsample.plot logical, should the results be plotted as box plots |
|
11 |
+#' @param gene_expr gene expression matrix as defined in sponge |
|
12 |
+#' @param mir_expr miRNA expression matrix as defined in sponge |
|
13 |
+#' @param ... parameters passed on to the sponge function |
|
14 |
+#' |
|
15 |
+#' @references sponge |
|
16 |
+#' @return a summary of the results with mean and standard deviations of the |
|
17 |
+#' correlation and sensitive correlation. |
|
18 |
+#' @export |
|
19 |
+#' |
|
20 |
+#' @examples test <- sponge_subsampling(gene_expr = gene_expr, |
|
21 |
+#' mir_expr = mir_expr, mir_interactions = mir_interactions) |
|
22 |
+ |
|
23 |
+ |
|
24 |
+ |
|
25 |
+sponge_subsampling <- function( |
|
26 |
+ subsample.n = 100, |
|
27 |
+ subsample.repeats = 10, |
|
28 |
+ subsample.with.replacement = FALSE, |
|
29 |
+ subsample.plot = FALSE, |
|
30 |
+ gene_expr, |
|
31 |
+ mir_expr, |
|
32 |
+ ...){ |
|
33 |
+ |
|
34 |
+ subsample_results <- |
|
35 |
+ foreach(sub.n = subsample.n, .combine = rbind) %do%{ |
|
36 |
+ foreach(r = 1:subsample.repeats, .combine = rbind) %do% { |
|
37 |
+ random_draw <- sample.int(nrow(gene_expr), sub.n, replace = subsample.with.replacement) |
|
38 |
+ |
|
39 |
+ sub_gene_expr <- gene_expr[random_draw,] |
|
40 |
+ sub_mir_expr <- mir_expr[random_draw,] |
|
41 |
+ |
|
42 |
+ if(subsample.with.replacement){ |
|
43 |
+ rownames(sub_gene_expr) <- |
|
44 |
+ make.names(rownames(sub_gene_expr), unique = TRUE) |
|
45 |
+ |
|
46 |
+ rownames(sub_mir_expr) <- |
|
47 |
+ make.names(rownames(sub_mir_expr), unique = TRUE) |
|
48 |
+ } |
|
49 |
+ |
|
50 |
+ result <- sponge(gene_expr = sub_gene_expr, |
|
51 |
+ mir_expr = sub_mir_expr, ...) |
|
52 |
+ result$sub.n <- sub.n |
|
53 |
+ return(result) |
|
54 |
+ } |
|
55 |
+ } |
|
56 |
+ |
|
57 |
+ if(subsample.plot){ |
|
58 |
+ subsample_scor_plot <- ggplot(subsample_results, |
|
59 |
+ aes(x = paste(geneA, geneB, sep = " - "), |
|
60 |
+ y = cor - pcor)) + |
|
61 |
+ geom_boxplot(aes(fill = "green")) + |
|
62 |
+ geom_boxplot(aes(y = cor, fill = "red")) + |
|
63 |
+ scale_fill_discrete(name = "", |
|
64 |
+ labels = c("sensitivity correlation", "correlation")) + |
|
65 |
+ theme_bw() + |
|
66 |
+ theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) + |
|
67 |
+ ylab("") + |
|
68 |
+ xlab("ceRNA interaction") |
|
69 |
+ |
|
70 |
+ if(length(subsample.n) > 1){ |
|
71 |
+ subsample_scor_plot <- subsample_scor_plot + |
|
72 |
+ facet_wrap(~sub.n, ncol = 1) |
|
73 |
+ } |
|
74 |
+ |
|
75 |
+ print(subsample_scor_plot) |
|
76 |
+ } |
|
77 |
+ |
|
78 |
+ subsample_results %>% dplyr::group_by(geneA, geneB, df) %>% |
|
79 |
+ dplyr::summarize(cor_mean = mean(cor), |
|
80 |
+ cor_sd = sd(cor), |
|
81 |
+ pcor_mean = mean(pcor), |
|
82 |
+ pcor_sd = sd(pcor)) |
|
83 |
+ |
|
84 |
+} |
|
0 | 85 |
\ No newline at end of file |
... | ... |
@@ -6,6 +6,10 @@ |
6 | 6 |
\usage{ |
7 | 7 |
genes_pairwise_combinations(number.of.genes) |
8 | 8 |
} |
9 |
+\arguments{ |
|
10 |
+\item{number.of.genes}{Number of genes for which all pairwise interactions |
|
11 |
+are needed} |
|
12 |
+} |
|
9 | 13 |
\value{ |
10 | 14 |
data frame with one row per unique pairwise combination. To be used |
11 | 15 |
as input for the sponge method. |
... | ... |
@@ -25,6 +25,10 @@ all miRNA interaction partners that should be considered.} |
25 | 25 |
\item{selected.genes}{Operate only on a subset of genes, particularly |
26 | 26 |
useful for bootstrapping} |
27 | 27 |
|
28 |
+\item{gene.combinations}{A data frame of combinations of genes to be tested. |
|
29 |
+Gene names are taken from the first two columns and have to match the names |
|
30 |
+used for gene_expr} |
|
31 |
+ |
|
28 | 32 |
\item{p.adj.method}{Multiple testing correction method. see ?p.adjust for |
29 | 33 |
details} |
30 | 34 |
|