Browse code

code for simulations, bugfixes

mlist authored on 10/02/2017 14:35:43
Showing 11 changed files

... ...
@@ -7,4 +7,14 @@ Maintainer: Markus List <[email protected]>
7 7
 Description: This package provides methods to efficiently detect competitive endogeneous RNA interactions between two genes. Such interactions are mediated by one or several miRNAs such that both gene and miRNA expression data for a larger number of samples is needed as input.
8 8
 License: GPLv3
9 9
 LazyData: TRUE
10
-RoxygenNote: 5.0.1
11 10
\ No newline at end of file
11
+RoxygenNote: 5.0.1
12
+Suggests:
13
+    testthat,
14
+    ggplot2,
15
+    d3network
16
+Imports:
17
+    ppcor,
18
+    logging,
19
+    foreach,
20
+    dplyr,
21
+    stringr
... ...
@@ -1,15 +1,17 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3 3
 export(gene_miRNA_interaction_filter)
4
-export(genes_pairwise_combinations)
5 4
 export(sponge)
5
+export(sponge_subsampling)
6 6
 import(dplyr)
7 7
 import(foreach)
8
+import(ggplot2)
8 9
 import(glmnet)
9 10
 import(logging)
10 11
 import(mirbase.db)
11 12
 import(ppcor)
12 13
 import(targetscan.Hs.eg.db)
14
+importFrom(foreach,foreach)
13 15
 importFrom(gRbase,combnPrim)
14 16
 importFrom(glmnet,cv.glmnet)
15 17
 importFrom(iterators,icount)
... ...
@@ -10,6 +10,11 @@
10 10
 #' with samples in columns and miRNA in rows
11 11
 "mir_expr"
12 12
 
13
+#' miRNA / gene interactions
14
+#'
15
+#' @format A data frame of regression coefficients
16
+#' typically provided by gene_miRNA_interaction_filter
17
+"mir_interactions"
13 18
 
14 19
 #' miRNA family mapping table
15 20
 #'
... ...
@@ -1,15 +1,61 @@
1
-sparc_network_centralities <- function(network, directed = FALSE){
1
+#' Computes various node centralities
2
+#'
3
+#' @description Computes degree, eigenvector centrality and betweenness
4
+#' centrality for the ceRNA interaction network induced by the results of the
5
+#' SPONGE method
6
+#'
7
+#' @param sponge_result output of the sponge method
8
+#'
9
+#' @importFrom igraph graph.data.frame
10
+#' @importFrom igraph eigen_centrality
11
+#' @importFrom igraph betweenness
12
+#' @importFrom igraph degree
13
+#'
14
+#' @return data.table with gene, degree, eigenvector and betweenness
15
+#' @export
16
+#'
17
+#' @seealso sponge
18
+#'
19
+#' @examples
20
+sponge_node_centralities <- function(sponge_result){
21
+    directed <- FALSE
2 22
 
3
-    ev_centrality <- eigen_centrality(network, directed = directed)
4
-    btw_centrality <- betweenness(network, directed = directed)
5
-    centrality_df <- data.frame(gene = names(ev_centrality$vector),
23
+    network <- igraph::graph.data.frame(sponge_result)
24
+
25
+    ev_centrality <- igraph::eigen_centrality(network, directed = directed)
26
+    btw_centrality <- igraph::betweenness(network, directed = directed)
27
+    centrality_df <- data.table(gene = names(ev_centrality$vector),
6 28
                                 degree = igraph::degree(network),
7 29
                                 eigenvector = ev_centrality$vector,
8 30
                                 betweenness = btw_centrality)
9 31
     return(centrality_df)
10 32
 }
11 33
 
12
-sparc_edge_betweenness <- function(network, directed = FALSE){
34
+#' Computes various edge centralities
35
+#'
36
+#' @description Computes edge betweenness
37
+#' centrality for the ceRNA interaction network induced by the results of the
38
+#' SPONGE method.
39
+#'
40
+#' @param sponge_result
41
+#'
42
+#' @importFrom igraph graph.data.frame
43
+#' @importFrom igraph E
44
+#' @importFrom igraph edge_betweenness
45
+#' @importFrom igraph degree
46
+#' @importFrom tidyr separate
47
+#'
48
+#' @return data.table with gene, degree, eigenvector and betweenness
49
+#' @export
50
+#'
51
+#' @seealso sponge
52
+#'
53
+#' @examples
54
+sponge_edge_centralities <- function(sponge_result){
55
+    directed <- FALSE
56
+
57
+    network <- igraph::graph.data.frame(sponge_result)
58
+
13 59
     edge_labels <- attr(E(network), "vnames")
14 60
     ebtw <- edge_betweenness(network, directed = directed)
15 61
     ebtw <- data.frame(labels = edge_labels, edge_betweenness = ebtw)
... ...
@@ -17,19 +63,19 @@ sparc_edge_betweenness <- function(network, directed = FALSE){
17 63
     return(ebtw)
18 64
 }
19 65
 
20
-sparc_matched_edge_betweenness <- function(cancer_network, normal_network, directed = FALSE){
21
-    liver_cancer_edge_betweenness <- sparc_edge_betweenness(network = liver_cancer_sparc_network)
22
-    liver_normal_edge_betweenness <- sparc_edge_betweenness(network = liver_normal_sparc_network)
66
+sponge_matched_edge_betweenness <- function(cancer_network, normal_network, directed = FALSE){
67
+    liver_cancer_edge_betweenness <- sponge_edge_betweenness(network = liver_cancer_sponge_network)
68
+    liver_normal_edge_betweenness <- sponge_edge_betweenness(network = liver_normal_sponge_network)
23 69
 
24 70
     dplyr::inner_join(liver_cancer_edge_betweenness, liver_normal_edge_betweenness, by = c("source_gene", "target_gene")) %>%
25 71
         dplyr::rename(edge_betweenness_cancer = edge_betweenness.x, edge_betweenness_normal = edge_betweenness.y) %>%
26 72
         dplyr::mutate(edge_betweenness_diff = edge_betweenness_cancer - edge_betweenness_normal)
27 73
 }
28 74
 
29
-sparc_matched_network_centralities <- function(cancer_network, normal_network, directed = FALSE){
75
+sponge_matched_network_centralities <- function(cancer_network, normal_network, directed = FALSE){
30 76
 
31
-    cancer_centrality_df <- sparc_network_centralities(cancer_network, directed)
32
-    normal_centrality_df <- sparc_network_centralities(normal_network, directed)
77
+    cancer_centrality_df <- sponge_network_centralities(cancer_network, directed)
78
+    normal_centrality_df <- sponge_network_centralities(normal_network, directed)
33 79
 
34 80
     centrality_matched <- dplyr::full_join(normal_centrality_df, cancer_centrality_df, by="gene") %>%
35 81
         dplyr::rename(eigenvector_centrality.normal = eigenvector.x,
... ...
@@ -51,7 +97,7 @@ sparc_matched_network_centralities <- function(cancer_network, normal_network, d
51 97
     return(centrality_matched)
52 98
 }
53 99
 
54
-sparc_transform_centralities_for_plotting <- function(network_centralities){
100
+sponge_transform_centralities_for_plotting <- function(network_centralities){
55 101
     library(dplyr)
56 102
     library(digest)
57 103
     plot.data <- bind_rows(dplyr::select(network_centralities,
... ...
@@ -70,7 +116,7 @@ sparc_transform_centralities_for_plotting <- function(network_centralities){
70 116
     return(plot.data)
71 117
 }
72 118
 
73
-sparc_plot_top_edge_betweenness <- function(edge_betweenness, n){
119
+sponge_plot_edge_centralities <- function(edge_centralities, n){
74 120
     top_x <- head(dplyr::arrange(edge_betweenness, desc(edge_betweenness)), n) %>%
75 121
         dplyr::mutate(edge = paste(source_gene, target_gene, sep="|"))
76 122
     ggplot(top_x, aes(x = reorder(edge, -edge_betweenness), y = edge_betweenness)) +
... ...
@@ -81,11 +127,11 @@ sparc_plot_top_edge_betweenness <- function(edge_betweenness, n){
81 127
         theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
82 128
 }
83 129
 
84
-sparc_plot_network_centralities <- function(network_centralities, measure="all"){
130
+sponge_plot_network_centralities <- function(network_centralities, measure="all"){
85 131
     library(ggplot2)
86 132
     library(ggrepel)
87 133
 
88
-    plot.data <- sparc_transform_centralities_for_plotting(network_centralities)
134
+    plot.data <- sponge_transform_centralities_for_plotting(network_centralities)
89 135
 
90 136
     p1 <- ggplot(plot.data, aes(x=degree)) +
91 137
         geom_histogram(color=I("black"), fill=I("black"), alpha = 0.3)+
... ...
@@ -119,7 +165,7 @@ sparc_plot_network_centralities <- function(network_centralities, measure="all")
119 165
                       ncol = 1)
120 166
 }
121 167
 
122
-sparc_plot_eigenvector_centralities_differences <- function(network_centralities, label.threshold){
168
+sponge_plot_eigenvector_centralities_differences <- function(network_centralities, label.threshold){
123 169
     network_centralities <- network_centralities %>% mutate(color = paste("#", substr(sapply(gene, function(x) digest(x, algo = "crc32")), 1, 6), sep=""))
124 170
 
125 171
     ggplot(data = network_centralities,
... ...
@@ -133,7 +179,7 @@ sparc_plot_eigenvector_centralities_differences <- function(network_centralities
133 179
         geom_label_repel(data = dplyr::filter(network_centralities, abs(eigenvector_centrality.diff) > label.threshold))
134 180
 }
135 181
 
136
-sparc_plot_betweenness_centralities_differences <- function(network_centralities, label.threshold){
182
+sponge_plot_betweenness_centralities_differences <- function(network_centralities, label.threshold){
137 183
     network_centralities <- network_centralities %>% mutate(color = paste("#", substr(sapply(gene, function(x) digest(x, algo = "crc32")), 1, 6), sep=""))
138 184
 
139 185
     ggplot(data = network_centralities,
... ...
@@ -147,8 +193,8 @@ sparc_plot_betweenness_centralities_differences <- function(network_centralities
147 193
         geom_label_repel(data = dplyr::filter(network_centralities, abs(betweenness_centrality.diff) > label.threshold))
148 194
 }
149 195
 
150
-sparc_plot_top_centralities <- function(network_centralities, top = 50,
151
-                                         known.sparc.genes = c("ESR1", "CD44", "LIN28B", "HULC", "KRAS1P", "HSUR1", "HSUR2", "BRAFP1", "VCAN", "LINCMD1", "H19"),
196
+sponge_plot_top_centralities <- function(network_centralities, top = 50,
197
+                                         known.sponge.genes = c("ESR1", "CD44", "LIN28B", "HULC", "KRAS1P", "HSUR1", "HSUR2", "BRAFP1", "VCAN", "LINCMD1", "H19"),
152 198
                                          known.cancer.genes = c("TP53", "ESR1", "CD44", "KRAS"),
153 199
                                          only=""){
154 200
 
... ...
@@ -165,7 +211,7 @@ sparc_plot_top_centralities <- function(network_centralities, top = 50,
165 211
     }
166 212
 
167 213
     network_centralities$ceRNA = "novel"
168
-    network_centralities[which(network_centralities$gene %in% known.sparc.genes), "ceRNA"] <- "known"
214
+    network_centralities[which(network_centralities$gene %in% known.sponge.genes), "ceRNA"] <- "known"
169 215
     network_centralities$ceRNA <- factor(network_centralities$ceRNA, levels = c("novel", "known"))
170 216
 
171 217
     network_centralities$cancer = FALSE
... ...
@@ -1,37 +1,37 @@
1 1
 sponge_plot_density <- function(cancer_sponge_effects, normal_sponge_effects){
2 2
     cancer_sponge_effects$type <- "Liver cancer"
3 3
     normal_sponge_effects$type <- "Liver normal"
4
-    
5
-    p1 <- ggplot(cancer_sponge_effects) + 
4
+
5
+    p1 <- ggplot(cancer_sponge_effects) +
6 6
         facet_wrap(~type) +
7 7
         xlab("Cohen's q") +
8
-        geom_density(aes(x = cohens_q, fill = I("cornflowerblue")), alpha = 0.6)
9
-    
10
-    p2 <- ggplot(normal_sponge_effects) + 
8
+        geom_density(aes(x = scor, fill = I("cornflowerblue")), alpha = 0.6)
9
+
10
+    p2 <- ggplot(normal_sponge_effects) +
11 11
         facet_wrap(~type) +
12 12
         xlab("Cohen's q") +
13
-        geom_density(aes(x = cohens_q, fill = I("darkorchid1")), alpha = 0.6)
14
-    
15
-    p3 <- ggplot(cancer_sponge_effects) + 
13
+        geom_density(aes(x = scor, fill = I("darkorchid1")), alpha = 0.6)
14
+
15
+    p3 <- ggplot(cancer_sponge_effects) +
16 16
         xlab("p-value") +
17
-        geom_density(aes(x = cohens_q_p, fill = I("cornflowerblue")), alpha = 0.6)
18
-    
19
-    p4 <- ggplot(normal_sponge_effects) + 
17
+        geom_density(aes(x = scor_p, fill = I("cornflowerblue")), alpha = 0.6)
18
+
19
+    p4 <- ggplot(normal_sponge_effects) +
20 20
         xlab("p-value") +
21
-        geom_density(aes(x = cohens_q_p, fill = I("darkorchid1")), alpha = 0.6)
22
-    
23
-    p5 <- ggplot(cancer_sponge_effects) + 
21
+        geom_density(aes(x = scor_p, fill = I("darkorchid1")), alpha = 0.6)
22
+
23
+    p5 <- ggplot(cancer_sponge_effects) +
24 24
         xlab("adjusted p-value (BH)") +
25
-        geom_density(aes(x = cohens_q_p.adj, fill = I("cornflowerblue")), alpha = 0.6)
26
-    
27
-    p6 <- ggplot(normal_sponge_effects) + 
25
+        geom_density(aes(x = scor_p.adj, fill = I("cornflowerblue")), alpha = 0.6)
26
+
27
+    p6 <- ggplot(normal_sponge_effects) +
28 28
         xlab("ajusted p-value (BH)") +
29
-        geom_density(aes(x = cohens_q_p.adj, fill = I("darkorchid1")), alpha = 0.6)
30
-    
29
+        geom_density(aes(x = scor_p.adj, fill = I("darkorchid1")), alpha = 0.6)
30
+
31 31
     grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 2)
32 32
 }
33 33
 
34
-sponge_plot_heatmap <- function(data, interactive=T, show = "cohens_q"){
34
+sponge_plot_heatmap <- function(data, interactive=T, show = "scor"){
35 35
     if(require(d3heatmap) && interactive){
36 36
         sponge.matrix <- xtabs(p.adj ~ source_gene + target_gene, data = data)
37 37
         d3heatmap(sponge.matrix, dendrogram = "none", symm=T)
... ...
@@ -43,8 +43,8 @@ sponge_plot_heatmap <- function(data, interactive=T, show = "cohens_q"){
43 43
         data[data$p.adj < 0.05, "significance"] <- "*"
44 44
         data[data$p.adj < 0.01, "significance"] <- "**"
45 45
         data[data$p.adj < 0.001, "significance"] <- "***"
46
-        ggplot(data = data, aes_string(fill = show, x = "source_gene", y = "target_gene")) + 
47
-            geom_tile() + 
46
+        ggplot(data = data, aes_string(fill = show, x = "source_gene", y = "target_gene")) +
47
+            geom_tile() +
48 48
             theme_bw() +
49 49
             geom_text(label=data$significance)
50 50
     }
... ...
@@ -55,23 +55,23 @@ sponge_plot_heatmap <- function(data, interactive=T, show = "cohens_q"){
55 55
 
56 56
 sponge_plot_boxplot <- function(data){
57 57
     if(!require(ggplot2)) stop("library ggplot2 needs to be installed for this plot")
58
-    
59
-    ggplot(data = data, aes(x = source_gene, y = cohens_q)) + geom_boxplot(fill = "skyblue", aes(outlier.color = p.adj)) + theme_bw()
58
+
59
+    ggplot(data = data, aes(x = source_gene, y = scor)) + geom_boxplot(fill = "skyblue", aes(outlier.color = p.adj)) + theme_bw()
60 60
 }
61 61
 
62
-sponge_network <- function(sponge.data, 
63
-                           mir.data, 
62
+sponge_network <- function(sponge.data,
63
+                           mir.data,
64 64
                            target.genes = NULL,
65
-                           cerna.p.adj.threshold = 0.05, 
66
-                           show.sponge.interaction = TRUE, 
65
+                           cerna.p.val.threshold = 0.05,
66
+                           show.sponge.interaction = TRUE,
67 67
                            show.mirnas = c("none", "all", "shared"),
68 68
                            replace.mirna.with.name = TRUE,
69 69
                            min.interactions = 3){
70 70
     library(foreach)
71 71
     library(iterators)
72 72
     library(dplyr)
73
-    sponge.data <- filter(sponge.data, cohens_q_p.adj < cerna.p.adj.threshold)
74
-    
73
+    sponge.data <- filter(sponge.data, p.val < cerna.p.val.threshold)
74
+
75 75
     #genes <- unique(c(as.character(sponge.data$source_gene), as.character(sponge.data$target_gene)))
76 76
     genes <- unique(c(as.character(sponge.data$source_gene), as.character(sponge.data$target_gene)))
77 77
 
... ...
@@ -79,26 +79,26 @@ sponge_network <- function(sponge.data,
79 79
     nodes <- NULL
80 80
 
81 81
     #sponge genes edges
82
-    sponge.edges <- with(sponge.data, { 
83
-        result <- data.frame(from=source_gene, to=target_gene, width = cohens_q, color="red") 
84
-        
82
+    sponge.edges <- with(sponge.data, {
83
+        result <- data.frame(from=source_gene, to=target_gene, width = scor, color="red")
84
+
85 85
         result[which(result$to %in% genes | result$from %in% genes),]
86 86
     })
87
-    
87
+
88 88
     if(show.sponge.interaction) edges <- rbind(edges, sponge.edges)
89
-    
89
+
90 90
     #sponge gene nodes
91 91
     sponge.nodes <- data.frame(id = genes, label = genes, shape = "square", color="darkgreen", type=FALSE, value=1)
92 92
     nodes <- rbind(nodes, sponge.nodes)
93
-    
93
+
94 94
     if(!is.null(target.genes)){
95 95
         nodes[which(nodes$id %in% target.genes), "shape"] <- "square"
96 96
         nodes[which(nodes$id %in% target.genes), "color"] <- "blue"
97 97
     }
98
-    
98
+
99 99
     #mirna nodes and edges
100 100
     if(show.mirnas != "none"){
101
-        
101
+
102 102
         mirna.edges <- foreach(gene = nodes$id, .combine=rbind) %do% {
103 103
             gene_mirnas <- mir.data[[as.character(gene)]]
104 104
             gene_mirnas <- dplyr::filter(gene_mirnas, coefficient < 0)
... ...
@@ -111,50 +111,50 @@ sponge_network <- function(sponge.data,
111 111
         {
112 112
             #consider only miRNAs shared with a source gene
113 113
             mirnas <- mirna.edges[which(mirna.edges$from %in% genes),"to"]
114
-            
114
+
115 115
             #count mirnas that appear form at least x edges
116 116
             mirnas <- intersect(mirnas, names(which(table(as.character(mirna.edges$to)) > min.interactions)))
117
-            
117
+
118 118
             mirna.edges <- mirna.edges[which(mirna.edges$to %in% mirnas),]
119
-        }            
120
-        
119
+        }
120
+
121 121
         else mirnas <- unique(as.character(mirna.edges$to))
122 122
 
123 123
         if(length(mirnas) > 0){
124 124
             nodes <- rbind(nodes, data.frame(id = mirnas, label = mirnas, shape = "triangle", color="darkblue", type=TRUE, value=1))
125 125
             edges <- rbind(edges, mirna.edges)
126
-            
126
+
127 127
             #replace mirna with name
128 128
             if(replace.mirna.with.name){
129 129
 
130 130
                 nodes$label <- as.character(nodes$label)
131 131
                 nodes[which(nodes$type), "label"] <- as.character(fn_map_mimat_to_mir(as.character(nodes[which(nodes$type), "id"])))
132 132
             }
133
-        } 
133
+        }
134 134
         else{
135 135
             stop("No miRNAs found that match all criteria")
136
-        } 
136
+        }
137 137
     }
138
-    
138
+
139 139
     #filter out orphan nodes
140 140
     nodes <- nodes[which(nodes$id %in% c(as.character(edges$to), as.character(edges$from))),]
141
-    
141
+
142 142
     return(list(nodes=nodes, edges=edges))
143 143
 }
144 144
 
145
-sponge_plot_network <- function(nodes, 
146
-                                edges, 
147
-                                layout="layout.fruchterman.reingold", 
145
+sponge_plot_network <- function(nodes,
146
+                                edges,
147
+                                layout="layout.fruchterman.reingold",
148 148
                                 force.directed = FALSE){
149 149
     library(visNetwork)
150
-    
150
+
151 151
     if(nrow(edges) < 5000){
152 152
         plot <- visNetwork(nodes, edges)
153
-        plot <- plot %>% visIgraphLayout(layout = layout, type="full", physics = force.directed) 
153
+        plot <- plot %>% visIgraphLayout(layout = layout, type="full", physics = force.directed)
154 154
         plot <- plot %>% visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE)
155 155
         plot <- plot %>% visNodes(font = list(size = 32))
156 156
         plot <- plot %>% visEdges(color = list(opacity = 1))
157
-        
157
+
158 158
         return(plot)
159 159
     }
160 160
     else{
161 161
new file mode 100644
... ...
@@ -0,0 +1,86 @@
1
+compute_p_values <- function(partition,
2
+                             cov.matrices,
3
+                             number.of.datasets = 1e6,
4
+                             number.of.samples){
5
+
6
+    if(!("scor" %in% colnames(partition))) stop("sensitivity correlation missing")
7
+
8
+    #check which k and m
9
+    k <- as.character(partition[1,cor_cut])
10
+    m <- as.character(partition[1,df_cut])
11
+
12
+    #simulate data using the appropriate covariance matrices
13
+    cov.matrices.partition <- cov.matrices[[m]][[k]]
14
+
15
+    #to reach the necessary number of datasets we need to find out how many
16
+    #datasets to construct from each covariance matrix we have
17
+    number.of.datasets.per.matrix <- ceiling(number.of.datasets / length(cov.matrices.partition))
18
+
19
+    scor <- unlist(sample_zero_scor_data(cov.matrices = cov.matrices.partition,
20
+                                         number.of.datasets = number.of.datasets.per.matrix,
21
+                                         number.of.samples = number.of.samples)[1:number.of.datasets])
22
+
23
+    test_data_dt <- data.table(scor) #data.table(scor[which(scor > 0)])
24
+    setkey(test_data_dt, scor)
25
+    number.of.datasets.on.right.side <- length(test_data_dt$scor)
26
+
27
+    #partition_scor_positive <- partition[which(partition$scor >= 0),]
28
+
29
+    partition$p.val <- (number.of.datasets.on.right.side -
30
+        test_data_dt[J(partition$scor),
31
+                     .I,
32
+                     roll = "nearest",
33
+                     by = .EACHI]$I) / number.of.datasets.on.right.side
34
+
35
+    return(partition)
36
+}
37
+
38
+sponge_compute_p_values <- function(sponge_result,
39
+                                    cov.matrices,
40
+                                    number.of.samples,
41
+                                    number.of.datasets = 10,
42
+                                    ms, ks){
43
+
44
+    #divide gene_gene correlation
45
+    if(max(sponge_result$df) > 7) df_breaks <- c(seq(0,7), max(sponge_result$df))
46
+    else df_breaks <- seq(0, max(sponge_result$df))
47
+
48
+    sponge_result <- sponge_result[,
49
+                c("cor_cut", "df_cut") := list(
50
+                    cut(abs(cor), breaks = c(0, seq(0.25, 0.85, 0.1), 1)),
51
+                    cut(df, breaks = df_breaks))]
52
+
53
+    levels(sponge_result$cor_cut) <- ks
54
+    levels(sponge_result$df_cut) <- ms
55
+
56
+    isplitDT2 <- function(x, ks, ms) {
57
+        ival <- iter(apply(expand.grid(ks, ms), 1, list))
58
+        nextEl <- function() {
59
+            val <- nextElem(ival)
60
+            list(value=x[.(as.character(val[[1]][1]),
61
+                           as.character(val[[1]][2]))], key=val[[1]])
62
+        }
63
+        obj <- list(nextElem=nextEl)
64
+        class(obj) <- c('abstractiter', 'iter')
65
+        obj
66
+    }
67
+
68
+    dtcomb <- function(...) {
69
+        rbindlist(list(...))
70
+    }
71
+
72
+    setkey(sponge_result, cor_cut, df_cut)
73
+
74
+    foreach(dt.m=isplitDT2(sponge_result, ks, ms),
75
+        .combine='dtcomb',
76
+        .multicombine=TRUE,
77
+        .export = c("compute_p_values",
78
+                    "sample_zero_scor_data"),
79
+        .packages = c("gRbase", "MASS", "ppcor", "foreach", "logging", "data.table"),
80
+        .noexport = c("sponge_result")) %dopar% {
81
+            compute_p_values(partition = dt.m$value,
82
+                             cov.matrices = cov.matrices,
83
+                             number.of.datasets = number.of.datasets,
84
+                             number.of.samples = number.of.samples)
85
+        }
86
+}
0 87
\ No newline at end of file
1 88
new file mode 100644
... ...
@@ -0,0 +1,287 @@
1
+library(MASS)
2
+library(foreach)
3
+library(gRbase)
4
+library(testthat)
5
+library(ppcor)
6
+library(expm)
7
+library(logging)
8
+
9
+#generates a positive semi definite matrix
10
+posdef <- function (n, ev = runif(n, 0, 10))
11
+{
12
+    Z <- matrix(ncol=n, rnorm(n^2))
13
+    decomp <- qr(Z)
14
+    Q <- qr.Q(decomp)
15
+    R <- qr.R(decomp)
16
+    d <- diag(R)
17
+    ph <- d / abs(d)
18
+    O <- Q %*% diag(ph)
19
+    Z <- t(O) %*% diag(ev) %*% O
20
+    return(Z)
21
+}
22
+
23
+#computes the schur complement with respect to R22
24
+schur<-function(R) {
25
+    ((R[1:2,1:2]) - (R[1:2,3:ncol(R)]) %*% (solve(R[3:nrow(R),3:ncol(R)])) %*% t(R[1:2,3:ncol(R)]))
26
+}
27
+
28
+#computes sensitivity correlation
29
+get.q = function(S) {
30
+    s11  = S[1:2,1:2] #gene-gene covariance
31
+    pva  = schur(S) #partial covariance
32
+    q    = s11/outer(sqrt(diag(s11)),sqrt(diag(s11))) - pva / outer(sqrt(diag(pva)),sqrt(diag(pva))) #- sens - corr
33
+    return(q)
34
+}
35
+
36
+#computes lambda from the dot product
37
+checkLambda <- function(x,y){
38
+    if(length(x) != length(y)) stop("input should be two vectors of equal length")
39
+    beta <- foreach(i = 1:length(x), .combine = sum) %do% {x[i] * y[i]}
40
+    norm.x <- base::norm(x,type="2")
41
+    norm.y <- base::norm(y,type="2")
42
+    (beta / (norm.x * norm.y))
43
+}
44
+
45
+quadraticSolver <- function(a, b, c){
46
+    bsquare4ac <- b^2 - 4 * a * c
47
+
48
+    tryCatch({
49
+    if(bsquare4ac < 0 || a == 0){
50
+        return(NULL)
51
+    }
52
+
53
+    solution.1 <- (-b + sqrt(bsquare4ac)) / (2 * a)
54
+    solution.2 <- (-b - sqrt(bsquare4ac)) / (2 * a)
55
+
56
+    return(list(solution.1, solution.2))
57
+    }, error = function(e){
58
+        return(NULL)
59
+    })
60
+}
61
+
62
+#main method for sampling zero sensitivity covariance matrices
63
+sample_zero_scor_cov <- function(m, number.of.solutions,
64
+                                 number.of.attempts = 1e3,
65
+                                 gene_gene_correlation = NULL,
66
+                                 log.level = "INFO"){
67
+
68
+    solutions <- foreach(solution = 1:number.of.solutions,
69
+                         .packages = c("MASS", "gRbase", "testthat",
70
+                                       "ppcor", "expm", "logging",
71
+                                       "foreach"),
72
+                         .export = c("quadraticSolver",
73
+                                     "checkLambda",
74
+                                     "get.q",
75
+                                     "schur",
76
+                                     "posdef")) %dopar% {
77
+        total <- 0
78
+        basicConfig(level = log.level)
79
+
80
+        loginfo(paste("Looking for zero sensitivity covariance matrix for case m =",
81
+                      m, "solution no.", solution))
82
+        #a lot of solutions are not within our constraints, so we repeat
83
+        while(total < number.of.attempts){
84
+            total <- total + 1
85
+            if(is.null(gene_gene_correlation)) K = runif(1,-1,1) # R11
86
+            else K = gene_gene_correlation
87
+            v1 <- runif(m, -1, 1) #random v1
88
+
89
+            if(m == 1){
90
+                R22 <- 1
91
+                Z <- v1
92
+
93
+                a <- K^2 + Z^2 - K^2 * Z^2
94
+                b <- -2 * K * Z
95
+                c <- Z^2 * K^2
96
+
97
+                v2.solutions <- quadraticSolver(a, b, c)
98
+                if(is.null(v2.solutions)){
99
+                    logdebug("no solution for v2 found")
100
+                    next
101
+                }
102
+                else{
103
+                    v2 <- v2.solutions[[1]]
104
+                }
105
+            }
106
+            else{
107
+                lambda = runif(1,-1,1) #cos theta
108
+
109
+                R22 <- cov2cor(posdef(m)) #miRNA correlation
110
+                R22_invsqrt <- ginv(sqrtm(R22)) #R^-1/2
111
+
112
+                u1 <- (R22_invsqrt %*% v1)[,1]
113
+                u1.norm = base::norm(u1, type = "2")
114
+
115
+                #solving quadratic equation to obtain u2 solutions
116
+                a <- ((K^2 - lambda^2) * u1.norm^2 - K^2)
117
+                b <- 2 * lambda * K * u1.norm
118
+                c <- -K^2 * u1.norm^2
119
+
120
+                u2.solutions <- quadraticSolver(a, b, c)
121
+                if(is.null(u2.solutions)){
122
+                    logdebug("no solution for u2")
123
+                    next
124
+                }
125
+
126
+                u2.norm.1 <- u2.solutions[[1]]
127
+                u2.norm.2 <- u2.solutions[[2]]
128
+
129
+                r12.m <- (K - sqrt(u1.norm^2) * sqrt(u2.norm.1^2) * lambda) * (1 - u1.norm^2)^-(1/2) * (1 - u2.norm.1^2)^-(1/2)
130
+
131
+                if(is.nan(r12.m)){
132
+                    r12.m <- (K - sqrt(u1.norm^2) * sqrt(u2.norm.2^2) * lambda) * (1 - u1.norm^2)^-(1/2) * (1 - u2.norm.2^2)^-(1/2)
133
+                }
134
+
135
+                if(is.nan(r12.m)){
136
+                    logdebug("||u2|| is not a valid solution")
137
+                    next
138
+                }
139
+
140
+                if(r12.m != K){
141
+                    logdebug("correlation is not equal to partial correlation for selected ||u2||")
142
+                    next
143
+                }
144
+
145
+                constraints <- (R22_invsqrt %*% rep(1,m))[,1]
146
+
147
+                if(anyNA(constraints)){
148
+                    logdebug("R22^-(1/2) invalid. Can not compute constraints")
149
+                    next
150
+                }
151
+
152
+                u1.m <- u1[m]
153
+                u2.wo_m <- foreach(i = 1:(m-1), .combine = c) %do%{
154
+                    runif(1, -constraints[i], constraints[i])
155
+                }
156
+
157
+                if(anyNA(u2.wo_m)){
158
+                    logdebug("R22^-(1/2) invalid. Can not compute constraints")
159
+                    next
160
+                }
161
+
162
+                beta <- foreach(i = 1:(m-1), .combine = sum) %do% {u1[i] * u2.wo_m[i]}
163
+
164
+                A <- u1.m^2 - lambda^2 * u1.norm^2
165
+                B <- 2 * beta * u1.m
166
+                C <- beta^2 - lambda^2  * u1.norm^2 * sum(u2.wo_m^2)
167
+
168
+                u2.solutions <- quadraticSolver(A, B, C)
169
+
170
+                if(is.null(u2.solutions)){
171
+                    logdebug("no solution for the k-th element of u2")
172
+                    next
173
+                }
174
+                #solve quadratic equation
175
+                u2.k.1 <- u2.solutions[[1]]
176
+                u2.k.2 <- u2.solutions[[2]]
177
+
178
+                #check if lambda constraint is really fulfilled
179
+                u2 <- c(u2.wo_m, u2.k.1)
180
+
181
+                #if the sign is wrong we have to take the other solution
182
+                if(checkLambda(u1, u2) != lambda)
183
+                    u2 <- c(u2.wo_m, u2.k.2)
184
+
185
+                #normalize and then multiply by ||u2||
186
+                u2.scaled <- u2 / base::norm(u2, type = "2") * u2.norm.1
187
+
188
+                i <- 1
189
+                within_constraints <- TRUE
190
+                while(i < length(u2.scaled)){
191
+                    if(u2.scaled[i] > constraints[i] || u2.scaled[i] < -constraints[i]) {
192
+                        within_constraints <- FALSE
193
+                        break
194
+                    }
195
+                    i <- i+1
196
+                }
197
+
198
+                if(!within_constraints){
199
+                    logdebug("solution violates constraints")
200
+                    next
201
+                }
202
+                else
203
+                {
204
+                    #compute v2
205
+                    v2 = (solve(R22_invsqrt) %*% u2.scaled)[,1]
206
+                }
207
+            }
208
+
209
+            #construct correlation matrix R
210
+            R <- matrix(ncol = m+2, nrow = m+2)
211
+            diag(R) <- 1
212
+            R[1,2] <- K
213
+            R[2,1] <- K
214
+            R[3:(2+m),3:(2+m)] <- R22
215
+            R[1,3:(2+m)] <- v1
216
+            R[2,3:(2+m)] <- v2
217
+            R[3:(2+m), 1] <- v1
218
+            R[3:(2+m), 2] <- v2
219
+
220
+            #generate covariance matrix S
221
+            L = diag(exp(rnorm(2+m))) 							#- variances
222
+            S = L %*% R %*% L 									#- covariance matrix
223
+
224
+            #test for negative variance
225
+            if(any(diag(schur(S)) < 0)){
226
+                logdebug("negative variance in partial covariance matrix")
227
+                next
228
+            }
229
+
230
+            #test sensitivity correlation is zero
231
+            scor <- get.q(S)[1,2]
232
+            if(is.nan(scor)){
233
+                logdebug("sensitivity correlation is NaN")
234
+                next
235
+            }
236
+            else if(abs(scor) > sqrt(.Machine$double.eps)){
237
+                logdebug("sensitivity correlation is not zero")
238
+                next
239
+            }
240
+            else{
241
+                loginfo(paste("viable solution found for m =", m,
242
+                              "and k =", K,
243
+                               "solution no.", solution))
244
+                attr(S, "iterations") <- total
245
+                attr(S, "k") <- K
246
+                attr(S, "m") <- m
247
+                return(S)
248
+            }
249
+        }
250
+    }
251
+    solutions <- Filter(Negate(is.null), solutions)
252
+    if(length(solutions) == 0){
253
+        logerror("No solutions found")
254
+        return(NULL)
255
+    }
256
+    else{
257
+        total <- sum(unlist(lapply(solutions, function(x){
258
+            attr(x, "iterations")})))
259
+        loginfo(paste("case k = ", gene_gene_correlation, "m = ", m, " - Found",
260
+                      length(solutions), "solutions in a total of",
261
+                      total, "iterations."))
262
+    }
263
+
264
+    return(solutions)
265
+}
266
+
267
+
268
+sample_zero_scor_data <- function(cov.matrices,
269
+                                  number.of.samples = 100,
270
+                                  number.of.datasets = 100){
271
+    foreach(cov.matrix = cov.matrices,
272
+            .packages = c("gRbase", "MASS", "ppcor", "foreach", "logging")) %dopar% {
273
+        #check that sensitivity correlation is zero
274
+        if(abs(cov2pcor(cov.matrix)[1,2] - cov2cor(cov.matrix)[1,2]) > sqrt(.Machine$double.eps))
275
+            stop("sensitivity correlation of a given covariance matrix is not zero.")
276
+
277
+        #sample data under this covariance matrix
278
+        foreach(i = 1:number.of.datasets, .combine = c) %do%{
279
+            sample.data <- mvrnorm(n = number.of.samples,
280
+                                   rep(0, ncol(cov.matrix)),
281
+                                   cov.matrix,
282
+                                   empirical = FALSE)
283
+            cor(sample.data)[1,2] - pcor(sample.data)$estimate[1,2]
284
+        }
285
+    }
286
+}
287
+
... ...
@@ -41,12 +41,12 @@ fn_map_mimats_to_mir <- function(mimats){
41 41
 
42 42
 #' Compute all pairwise interactions for a number of genes as indices
43 43
 #'
44
-#' @param number.of.genes
44
+#' @param number.of.genes Number of genes for which all pairwise interactions
45
+#' are needed
45 46
 #' @importFrom gRbase combnPrim
46 47
 #'
47 48
 #' @return data frame with one row per unique pairwise combination. To be used
48 49
 #' as input for the sponge method.
49
-#' @export
50 50
 #'
51 51
 #' @examples genes_pairwise_combinations(ncol(gene_expr))
52 52
 genes_pairwise_combinations <- function(number.of.genes){
... ...
@@ -71,6 +71,9 @@ genes_pairwise_combinations <- function(number.of.genes){
71 71
 #' @param log.every.n write to the log after every n steps
72 72
 #' @param selected.genes Operate only on a subset of genes, particularly
73 73
 #' useful for bootstrapping
74
+#' @param gene.combinations A data frame of combinations of genes to be tested.
75
+#' Gene names are taken from the first two columns and have to match the names
76
+#' used for gene_expr
74 77
 #' @param p.adj.method Multiple testing correction method. see ?p.adjust for
75 78
 #' details
76 79
 #' @param p.value.threshold Multiple testing p-value cutoff
... ...
@@ -128,14 +131,19 @@ sponge <- function(gene_expr, mir_expr, mir_interactions,
128 131
         sel.genes <- available.selected.genes
129 132
     }
130 133
 
131
-    #consider only genes that have miRNA interactions
132
-    sel.genes <- Filter(Negate(is.null), sel.genes)
134
+    genes.as.indices <- FALSE
133 135
 
134 136
     #all pairwise combinations of selected genes
135 137
     if(is.null(gene.combinations)){
136 138
         loginfo("Computing all pairwise combinations of genes")
139
+
140
+        #consider only genes that have miRNA interactions
141
+        sel.genes <- Filter(Negate(is.null), sel.genes)
142
+
137 143
         gene.combinations <-
138 144
            genes_pairwise_combinations(length(sel.genes))
145
+
146
+        genes.as.indices <- TRUE
139 147
     }
140 148
 
141 149
     loginfo("Beginning SPONGE run...")
... ...
@@ -162,9 +170,14 @@ sponge <- function(gene_expr, mir_expr, mir_interactions,
162 170
         result <- foreach(gene_combi = iter(gene_combis, by="row"),
163 171
                             .combine=rbind) %do% {
164 172
 
165
-            geneA <- sel.genes[gene_combi[1]]
166
-            geneB <- sel.genes[gene_combi[2]]
167
-
173
+            if(genes.as.indices){
174
+                geneA <- sel.genes[gene_combi[1]]
175
+                geneB <- sel.genes[gene_combi[2]]
176
+            }
177
+            else {
178
+                geneA <- as.character(gene_combi[1,1])
179
+                geneB <- as.character(gene_combi[1,2])
180
+            }
168 181
 
169 182
             logdebug(paste("Processing source gene", geneA,
170 183
                            "and target gene", geneB ))
... ...
@@ -188,6 +201,13 @@ sponge <- function(gene_expr, mir_expr, mir_interactions,
188 201
                                                       mir_interactions)
189 202
             }
190 203
 
204
+            #check if shared miRNAs are in expression matrix
205
+            if(length(setdiff(mir_intersect, colnames(mir_expr))) > 0){
206
+                logwarn(paste("Source gene", geneA, "and target gene", geneB,
207
+                              "shared miRNAs not found in mir_expr are discarded"))
208
+                mir_intersect <- intersect(mir_intersect, colnames(mir_expr))
209
+            }
210
+
191 211
             #check if there are actually any shared mirnas
192 212
             if(length(mir_intersect) == 0){
193 213
                 logdebug(paste("Source gene", geneA, "and target gene", geneB,
194 214
new file mode 100644
... ...
@@ -0,0 +1,84 @@
1
+#' Sponge subsampling
2
+#' @importFrom foreach foreach
3
+#' @import ggplot2
4
+#' @import dplyr
5
+#'
6
+#' @param subsample.n the number of samples to be drawn in each round
7
+#' @param subsample.repeats how often should the subsampling be done?
8
+#' @param subsample.with.replacement logical, should we allow samples to be used
9
+#' repeatedly
10
+#' @param subsample.plot logical, should the results be plotted as box plots
11
+#' @param gene_expr gene expression matrix as defined in sponge
12
+#' @param mir_expr miRNA expression matrix as defined in sponge
13
+#' @param ... parameters passed on to the sponge function
14
+#'
15
+#' @references sponge
16
+#' @return a summary of the results with mean and standard deviations of the
17
+#' correlation and sensitive correlation.
18
+#' @export
19
+#'
20
+#' @examples test <- sponge_subsampling(gene_expr = gene_expr,
21
+#' mir_expr = mir_expr, mir_interactions = mir_interactions)
22
+
23
+
24
+
25
+sponge_subsampling <- function(
26
+                    subsample.n = 100,
27
+                    subsample.repeats = 10,
28
+                    subsample.with.replacement = FALSE,
29
+                    subsample.plot = FALSE,
30
+                    gene_expr,
31
+                    mir_expr,
32
+                    ...){
33
+
34
+    subsample_results <-
35
+    foreach(sub.n = subsample.n, .combine = rbind) %do%{
36
+        foreach(r = 1:subsample.repeats, .combine = rbind) %do% {
37
+            random_draw <- sample.int(nrow(gene_expr), sub.n, replace = subsample.with.replacement)
38
+
39
+            sub_gene_expr <- gene_expr[random_draw,]
40
+            sub_mir_expr <- mir_expr[random_draw,]
41
+
42
+            if(subsample.with.replacement){
43
+                rownames(sub_gene_expr) <-
44
+                    make.names(rownames(sub_gene_expr), unique = TRUE)
45
+
46
+                rownames(sub_mir_expr) <-
47
+                    make.names(rownames(sub_mir_expr), unique = TRUE)
48
+            }
49
+
50
+            result <- sponge(gene_expr = sub_gene_expr,
51
+                             mir_expr = sub_mir_expr, ...)
52
+            result$sub.n <- sub.n
53
+            return(result)
54
+        }
55
+    }
56
+
57
+    if(subsample.plot){
58
+        subsample_scor_plot <- ggplot(subsample_results,
59
+                                      aes(x = paste(geneA, geneB, sep = " - "),
60
+                                          y = cor - pcor)) +
61
+            geom_boxplot(aes(fill = "green")) +
62
+            geom_boxplot(aes(y = cor, fill = "red")) +
63
+            scale_fill_discrete(name = "",
64
+                    labels = c("sensitivity correlation", "correlation")) +
65
+            theme_bw() +
66
+            theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
67
+            ylab("") +
68
+            xlab("ceRNA interaction")
69
+
70
+        if(length(subsample.n) > 1){
71
+            subsample_scor_plot <- subsample_scor_plot +
72
+                facet_wrap(~sub.n, ncol = 1)
73
+        }
74
+
75
+        print(subsample_scor_plot)
76
+    }
77
+
78
+    subsample_results %>% dplyr::group_by(geneA, geneB, df) %>%
79
+        dplyr::summarize(cor_mean = mean(cor),
80
+                  cor_sd = sd(cor),
81
+                  pcor_mean = mean(pcor),
82
+                  pcor_sd = sd(pcor))
83
+
84
+}
0 85
\ No newline at end of file
... ...
@@ -6,6 +6,10 @@
6 6
 \usage{
7 7
 genes_pairwise_combinations(number.of.genes)
8 8
 }
9
+\arguments{
10
+\item{number.of.genes}{Number of genes for which all pairwise interactions
11
+are needed}
12
+}
9 13
 \value{
10 14
 data frame with one row per unique pairwise combination. To be used
11 15
 as input for the sponge method.
... ...
@@ -25,6 +25,10 @@ all miRNA interaction partners that should be considered.}
25 25
 \item{selected.genes}{Operate only on a subset of genes, particularly
26 26
 useful for bootstrapping}
27 27
 
28
+\item{gene.combinations}{A data frame of combinations of genes to be tested.
29
+Gene names are taken from the first two columns and have to match the names
30
+used for gene_expr}
31
+
28 32
 \item{p.adj.method}{Multiple testing correction method. see ?p.adjust for
29 33
 details}
30 34