Bioconductor Code: SPONGE

Browse code

code for simulations, bugfixes

mlist authored on 10/02/2017 14:35:43
Showing 11 changed files

DESCRIPTION index 7f36f01..c96ce90 100644
NAMESPACE index 0f23fcc..25a89b9 100644
R/data.R index ee9cd29..5ca5ee9 100644
R/fn_network_centralities.R index 998a7a7..b904686 100644
R/fn_plots.R index 8b8c903..490a797 100644
R/fn_significance.R index 0000000..4763e69
R/fn_simulation.R index 0000000..aef25bd
R/fn_sparc.R index 626d04c..53a7c29 100644
R/fn_subsampling.R index 0000000..43e59c2
man/genes_pairwise_combinations.Rd index 6dafd66..3b03a6d 100644
man/sponge.Rd index be8b301..85db0e4 100644

History View file @ bffe309

@@ -7,4 +7,14 @@ Maintainer: Markus List <[email protected]>
                      Description: This package provides methods to efficiently detect competitive endogeneous RNA interactions between two genes. Such interactions are mediated by one or several miRNAs such that both gene and miRNA expression data for a larger number of samples is needed as input.
                      License: GPLv3
                      LazyData: TRUE
                     -RoxygenNote: 5.0.1
                     \ No newline at end of file
                     +RoxygenNote: 5.0.1
                     +Suggests:
                     +    testthat,
                     +    ggplot2,
                     +    d3network
                     +Imports:
                     +    ppcor,
                     +    logging,
                     +    foreach,
                     +    dplyr,
                     +    stringr

NAMESPACE

History View file @ bffe309

@@ -1,15 +1,17 @@
                      # Generated by roxygen2: do not edit by hand
                      export(gene_miRNA_interaction_filter)
                     -export(genes_pairwise_combinations)
                      export(sponge)
                     +export(sponge_subsampling)
                      import(dplyr)
                      import(foreach)
                     +import(ggplot2)
                      import(glmnet)
                      import(logging)
                      import(mirbase.db)
                      import(ppcor)
                      import(targetscan.Hs.eg.db)
                     +importFrom(foreach,foreach)
                      importFrom(gRbase,combnPrim)
                      importFrom(glmnet,cv.glmnet)
                      importFrom(iterators,icount)

R/data.R

History View file @ bffe309

@@ -10,6 +10,11 @@
                      #' with samples in columns and miRNA in rows
                      "mir_expr"
                     +#' miRNA / gene interactions
                     +#'
                     +#' @format A data frame of regression coefficients
                     +#' typically provided by gene_miRNA_interaction_filter
                     +"mir_interactions"
                      #' miRNA family mapping table
                      #'

R/fn_network_centralities.R

History View file @ bffe309

@@ -1,15 +1,61 @@
                     -sparc_network_centralities <- function(network, directed = FALSE){
                     +#' Computes various node centralities
                     +#'
                     +#' @description Computes degree, eigenvector centrality and betweenness
                     +#' centrality for the ceRNA interaction network induced by the results of the
                     +#' SPONGE method
                     +#'
                     +#' @param sponge_result output of the sponge method
                     +#'
                     +#' @importFrom igraph graph.data.frame
                     +#' @importFrom igraph eigen_centrality
                     +#' @importFrom igraph betweenness
                     +#' @importFrom igraph degree
                     +#'
                     +#' @return data.table with gene, degree, eigenvector and betweenness
                     +#' @export
                     +#'
                     +#' @seealso sponge
                     +#'
                     +#' @examples
                     +sponge_node_centralities <- function(sponge_result){
                     +    directed <- FALSE
                     -    ev_centrality <- eigen_centrality(network, directed = directed)
                     -    btw_centrality <- betweenness(network, directed = directed)
                     -    centrality_df <- data.frame(gene = names(ev_centrality$vector),
                     +    network <- igraph::graph.data.frame(sponge_result)
+                    +
                     +    ev_centrality <- igraph::eigen_centrality(network, directed = directed)
                     +    btw_centrality <- igraph::betweenness(network, directed = directed)
                     +    centrality_df <- data.table(gene = names(ev_centrality$vector),
                                                      degree = igraph::degree(network),
                                                      eigenvector = ev_centrality$vector,
                                                      betweenness = btw_centrality)
                          return(centrality_df)
+                     }
                     -sparc_edge_betweenness <- function(network, directed = FALSE){
                     +#' Computes various edge centralities
                     +#'
                     +#' @description Computes edge betweenness
                     +#' centrality for the ceRNA interaction network induced by the results of the
                     +#' SPONGE method.
                     +#'
                     +#' @param sponge_result
                     +#'
                     +#' @importFrom igraph graph.data.frame
                     +#' @importFrom igraph E
                     +#' @importFrom igraph edge_betweenness
                     +#' @importFrom igraph degree
                     +#' @importFrom tidyr separate
                     +#'
                     +#' @return data.table with gene, degree, eigenvector and betweenness
                     +#' @export
                     +#'
                     +#' @seealso sponge
                     +#'
                     +#' @examples
                     +sponge_edge_centralities <- function(sponge_result){
                     +    directed <- FALSE
+                    +
                     +    network <- igraph::graph.data.frame(sponge_result)
+                    +
                          edge_labels <- attr(E(network), "vnames")
                          ebtw <- edge_betweenness(network, directed = directed)
                          ebtw <- data.frame(labels = edge_labels, edge_betweenness = ebtw)
@@ -17,19 +63,19 @@ sparc_edge_betweenness <- function(network, directed = FALSE){
                          return(ebtw)
+                     }
                     -sparc_matched_edge_betweenness <- function(cancer_network, normal_network, directed = FALSE){
                     -    liver_cancer_edge_betweenness <- sparc_edge_betweenness(network = liver_cancer_sparc_network)
                     -    liver_normal_edge_betweenness <- sparc_edge_betweenness(network = liver_normal_sparc_network)
                     +sponge_matched_edge_betweenness <- function(cancer_network, normal_network, directed = FALSE){
                     +    liver_cancer_edge_betweenness <- sponge_edge_betweenness(network = liver_cancer_sponge_network)
                     +    liver_normal_edge_betweenness <- sponge_edge_betweenness(network = liver_normal_sponge_network)
                          dplyr::inner_join(liver_cancer_edge_betweenness, liver_normal_edge_betweenness, by = c("source_gene", "target_gene")) %>%
                              dplyr::rename(edge_betweenness_cancer = edge_betweenness.x, edge_betweenness_normal = edge_betweenness.y) %>%
                              dplyr::mutate(edge_betweenness_diff = edge_betweenness_cancer - edge_betweenness_normal)
+                     }
                     -sparc_matched_network_centralities <- function(cancer_network, normal_network, directed = FALSE){
                     +sponge_matched_network_centralities <- function(cancer_network, normal_network, directed = FALSE){
                     -    cancer_centrality_df <- sparc_network_centralities(cancer_network, directed)
                     -    normal_centrality_df <- sparc_network_centralities(normal_network, directed)
                     +    cancer_centrality_df <- sponge_network_centralities(cancer_network, directed)
                     +    normal_centrality_df <- sponge_network_centralities(normal_network, directed)
                          centrality_matched <- dplyr::full_join(normal_centrality_df, cancer_centrality_df, by="gene") %>%
                              dplyr::rename(eigenvector_centrality.normal = eigenvector.x,
@@ -51,7 +97,7 @@ sparc_matched_network_centralities <- function(cancer_network, normal_network, d
                          return(centrality_matched)
+                     }
                     -sparc_transform_centralities_for_plotting <- function(network_centralities){
                     +sponge_transform_centralities_for_plotting <- function(network_centralities){
                          library(dplyr)
                          library(digest)
                          plot.data <- bind_rows(dplyr::select(network_centralities,
@@ -70,7 +116,7 @@ sparc_transform_centralities_for_plotting <- function(network_centralities){
                          return(plot.data)
+                     }
                     -sparc_plot_top_edge_betweenness <- function(edge_betweenness, n){
                     +sponge_plot_edge_centralities <- function(edge_centralities, n){
                          top_x <- head(dplyr::arrange(edge_betweenness, desc(edge_betweenness)), n) %>%
                              dplyr::mutate(edge = paste(source_gene, target_gene, sep="|"))
                          ggplot(top_x, aes(x = reorder(edge, -edge_betweenness), y = edge_betweenness)) +
@@ -81,11 +127,11 @@ sparc_plot_top_edge_betweenness <- function(edge_betweenness, n){
                              theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
+                     }
                     -sparc_plot_network_centralities <- function(network_centralities, measure="all"){
                     +sponge_plot_network_centralities <- function(network_centralities, measure="all"){
                          library(ggplot2)
                          library(ggrepel)
                     -    plot.data <- sparc_transform_centralities_for_plotting(network_centralities)
                     +    plot.data <- sponge_transform_centralities_for_plotting(network_centralities)
                          p1 <- ggplot(plot.data, aes(x=degree)) +
                              geom_histogram(color=I("black"), fill=I("black"), alpha = 0.3)+
@@ -119,7 +165,7 @@ sparc_plot_network_centralities <- function(network_centralities, measure="all")
                                            ncol = 1)
+                     }
                     -sparc_plot_eigenvector_centralities_differences <- function(network_centralities, label.threshold){
                     +sponge_plot_eigenvector_centralities_differences <- function(network_centralities, label.threshold){
                          network_centralities <- network_centralities %>% mutate(color = paste("#", substr(sapply(gene, function(x) digest(x, algo = "crc32")), 1, 6), sep=""))
                          ggplot(data = network_centralities,
@@ -133,7 +179,7 @@ sparc_plot_eigenvector_centralities_differences <- function(network_centralities
                              geom_label_repel(data = dplyr::filter(network_centralities, abs(eigenvector_centrality.diff) > label.threshold))
+                     }
                     -sparc_plot_betweenness_centralities_differences <- function(network_centralities, label.threshold){
                     +sponge_plot_betweenness_centralities_differences <- function(network_centralities, label.threshold){
                          network_centralities <- network_centralities %>% mutate(color = paste("#", substr(sapply(gene, function(x) digest(x, algo = "crc32")), 1, 6), sep=""))
                          ggplot(data = network_centralities,
@@ -147,8 +193,8 @@ sparc_plot_betweenness_centralities_differences <- function(network_centralities
                              geom_label_repel(data = dplyr::filter(network_centralities, abs(betweenness_centrality.diff) > label.threshold))
+                     }
                     -sparc_plot_top_centralities <- function(network_centralities, top = 50,
                     -                                         known.sparc.genes = c("ESR1", "CD44", "LIN28B", "HULC", "KRAS1P", "HSUR1", "HSUR2", "BRAFP1", "VCAN", "LINCMD1", "H19"),
                     +sponge_plot_top_centralities <- function(network_centralities, top = 50,
                     +                                         known.sponge.genes = c("ESR1", "CD44", "LIN28B", "HULC", "KRAS1P", "HSUR1", "HSUR2", "BRAFP1", "VCAN", "LINCMD1", "H19"),
                                                               known.cancer.genes = c("TP53", "ESR1", "CD44", "KRAS"),
                                                               only=""){
@@ -165,7 +211,7 @@ sparc_plot_top_centralities <- function(network_centralities, top = 50,
+                         }
                          network_centralities$ceRNA = "novel"
                     -    network_centralities[which(network_centralities$gene %in% known.sparc.genes), "ceRNA"] <- "known"
                     +    network_centralities[which(network_centralities$gene %in% known.sponge.genes), "ceRNA"] <- "known"
                          network_centralities$ceRNA <- factor(network_centralities$ceRNA, levels = c("novel", "known"))
                          network_centralities$cancer = FALSE

R/fn_plots.R

History View file @ bffe309

@@ -1,37 +1,37 @@
                      sponge_plot_density <- function(cancer_sponge_effects, normal_sponge_effects){
                          cancer_sponge_effects$type <- "Liver cancer"
                          normal_sponge_effects$type <- "Liver normal"
+                    -
                     -    p1 <- ggplot(cancer_sponge_effects) +
+                    +
                     +    p1 <- ggplot(cancer_sponge_effects) +
                              facet_wrap(~type) +
                              xlab("Cohen's q") +
                     -        geom_density(aes(x = cohens_q, fill = I("cornflowerblue")), alpha = 0.6)
+                    -
                     -    p2 <- ggplot(normal_sponge_effects) +
                     +        geom_density(aes(x = scor, fill = I("cornflowerblue")), alpha = 0.6)
+                    +
                     +    p2 <- ggplot(normal_sponge_effects) +
                              facet_wrap(~type) +
                              xlab("Cohen's q") +
                     -        geom_density(aes(x = cohens_q, fill = I("darkorchid1")), alpha = 0.6)
+                    -
                     -    p3 <- ggplot(cancer_sponge_effects) +
                     +        geom_density(aes(x = scor, fill = I("darkorchid1")), alpha = 0.6)
+                    +
                     +    p3 <- ggplot(cancer_sponge_effects) +
                              xlab("p-value") +
                     -        geom_density(aes(x = cohens_q_p, fill = I("cornflowerblue")), alpha = 0.6)
+                    -
                     -    p4 <- ggplot(normal_sponge_effects) +
                     +        geom_density(aes(x = scor_p, fill = I("cornflowerblue")), alpha = 0.6)
+                    +
                     +    p4 <- ggplot(normal_sponge_effects) +
                              xlab("p-value") +
                     -        geom_density(aes(x = cohens_q_p, fill = I("darkorchid1")), alpha = 0.6)
+                    -
                     -    p5 <- ggplot(cancer_sponge_effects) +
                     +        geom_density(aes(x = scor_p, fill = I("darkorchid1")), alpha = 0.6)
+                    +
                     +    p5 <- ggplot(cancer_sponge_effects) +
                              xlab("adjusted p-value (BH)") +
                     -        geom_density(aes(x = cohens_q_p.adj, fill = I("cornflowerblue")), alpha = 0.6)
+                    -
                     -    p6 <- ggplot(normal_sponge_effects) +
                     +        geom_density(aes(x = scor_p.adj, fill = I("cornflowerblue")), alpha = 0.6)
+                    +
                     +    p6 <- ggplot(normal_sponge_effects) +
                              xlab("ajusted p-value (BH)") +
                     -        geom_density(aes(x = cohens_q_p.adj, fill = I("darkorchid1")), alpha = 0.6)
+                    -
                     +        geom_density(aes(x = scor_p.adj, fill = I("darkorchid1")), alpha = 0.6)
+                    +
                          grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 2)
+                     }
                     -sponge_plot_heatmap <- function(data, interactive=T, show = "cohens_q"){
                     +sponge_plot_heatmap <- function(data, interactive=T, show = "scor"){
                          if(require(d3heatmap) && interactive){
                              sponge.matrix <- xtabs(p.adj ~ source_gene + target_gene, data = data)
                              d3heatmap(sponge.matrix, dendrogram = "none", symm=T)
@@ -43,8 +43,8 @@ sponge_plot_heatmap <- function(data, interactive=T, show = "cohens_q"){
                              data[data$p.adj < 0.05, "significance"] <- "*"
                              data[data$p.adj < 0.01, "significance"] <- "**"
                              data[data$p.adj < 0.001, "significance"] <- "***"
                     -        ggplot(data = data, aes_string(fill = show, x = "source_gene", y = "target_gene")) +
                     -            geom_tile() +
                     +        ggplot(data = data, aes_string(fill = show, x = "source_gene", y = "target_gene")) +
                     +            geom_tile() +
                                  theme_bw() +
                                  geom_text(label=data$significance)
+                         }
@@ -55,23 +55,23 @@ sponge_plot_heatmap <- function(data, interactive=T, show = "cohens_q"){
                      sponge_plot_boxplot <- function(data){
                          if(!require(ggplot2)) stop("library ggplot2 needs to be installed for this plot")
+                    -
                     -    ggplot(data = data, aes(x = source_gene, y = cohens_q)) + geom_boxplot(fill = "skyblue", aes(outlier.color = p.adj)) + theme_bw()
+                    +
                     +    ggplot(data = data, aes(x = source_gene, y = scor)) + geom_boxplot(fill = "skyblue", aes(outlier.color = p.adj)) + theme_bw()
+                     }
                     -sponge_network <- function(sponge.data,
                     -                           mir.data,
                     +sponge_network <- function(sponge.data,
                     +                           mir.data,
                                                 target.genes = NULL,
                     -                           cerna.p.adj.threshold = 0.05,
                     -                           show.sponge.interaction = TRUE,
                     +                           cerna.p.val.threshold = 0.05,
                     +                           show.sponge.interaction = TRUE,
                                                 show.mirnas = c("none", "all", "shared"),
                                                 replace.mirna.with.name = TRUE,
                                                 min.interactions = 3){
                          library(foreach)
                          library(iterators)
                          library(dplyr)
                     -    sponge.data <- filter(sponge.data, cohens_q_p.adj < cerna.p.adj.threshold)
+                    -
                     +    sponge.data <- filter(sponge.data, p.val < cerna.p.val.threshold)
+                    +
                          #genes <- unique(c(as.character(sponge.data$source_gene), as.character(sponge.data$target_gene)))
                          genes <- unique(c(as.character(sponge.data$source_gene), as.character(sponge.data$target_gene)))
@@ -79,26 +79,26 @@ sponge_network <- function(sponge.data,
                          nodes <- NULL
                          #sponge genes edges
                     -    sponge.edges <- with(sponge.data, {
                     -        result <- data.frame(from=source_gene, to=target_gene, width = cohens_q, color="red")
+                    -
                     +    sponge.edges <- with(sponge.data, {
                     +        result <- data.frame(from=source_gene, to=target_gene, width = scor, color="red")
+                    +
                              result[which(result$to %in% genes | result$from %in% genes),]
                          })
+                    -
+                    +
                          if(show.sponge.interaction) edges <- rbind(edges, sponge.edges)
+                    -
+                    +
                          #sponge gene nodes
                          sponge.nodes <- data.frame(id = genes, label = genes, shape = "square", color="darkgreen", type=FALSE, value=1)
                          nodes <- rbind(nodes, sponge.nodes)
+                    -
+                    +
                          if(!is.null(target.genes)){
                              nodes[which(nodes$id %in% target.genes), "shape"] <- "square"
                              nodes[which(nodes$id %in% target.genes), "color"] <- "blue"
+                         }
+                    -
+                    +
                          #mirna nodes and edges
                          if(show.mirnas != "none"){
+                    -
+                    +
                              mirna.edges <- foreach(gene = nodes$id, .combine=rbind) %do% {
                                  gene_mirnas <- mir.data[[as.character(gene)]]
                                  gene_mirnas <- dplyr::filter(gene_mirnas, coefficient < 0)
@@ -111,50 +111,50 @@ sponge_network <- function(sponge.data,
+                             {
                                  #consider only miRNAs shared with a source gene
                                  mirnas <- mirna.edges[which(mirna.edges$from %in% genes),"to"]
+                    -
+                    +
                                  #count mirnas that appear form at least x edges
                                  mirnas <- intersect(mirnas, names(which(table(as.character(mirna.edges$to)) > min.interactions)))
+                    -
+                    +
                                  mirna.edges <- mirna.edges[which(mirna.edges$to %in% mirnas),]
                     -        }
+                    -
                     +        }
+                    +
                              else mirnas <- unique(as.character(mirna.edges$to))
                              if(length(mirnas) > 0){
                                  nodes <- rbind(nodes, data.frame(id = mirnas, label = mirnas, shape = "triangle", color="darkblue", type=TRUE, value=1))
                                  edges <- rbind(edges, mirna.edges)
+                    -
+                    +
                                  #replace mirna with name
                                  if(replace.mirna.with.name){
                                      nodes$label <- as.character(nodes$label)
                                      nodes[which(nodes$type), "label"] <- as.character(fn_map_mimat_to_mir(as.character(nodes[which(nodes$type), "id"])))
+                                 }
                     -        }
                     +        }
                              else{
                                  stop("No miRNAs found that match all criteria")
                     -        }
                     +        }
+                         }
+                    -
+                    +
                          #filter out orphan nodes
                          nodes <- nodes[which(nodes$id %in% c(as.character(edges$to), as.character(edges$from))),]
+                    -
+                    +
                          return(list(nodes=nodes, edges=edges))
+                     }
                     -sponge_plot_network <- function(nodes,
                     -                                edges,
                     -                                layout="layout.fruchterman.reingold",
                     +sponge_plot_network <- function(nodes,
                     +                                edges,
                     +                                layout="layout.fruchterman.reingold",
                                                      force.directed = FALSE){
                          library(visNetwork)
+                    -
+                    +
                          if(nrow(edges) < 5000){
                              plot <- visNetwork(nodes, edges)
                     -        plot <- plot %>% visIgraphLayout(layout = layout, type="full", physics = force.directed)
                     +        plot <- plot %>% visIgraphLayout(layout = layout, type="full", physics = force.directed)
                              plot <- plot %>% visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE)
                              plot <- plot %>% visNodes(font = list(size = 32))
                              plot <- plot %>% visEdges(color = list(opacity = 1))
+                    -
+                    +
                              return(plot)
+                         }
                          else{

R/fn_significance.R

History View file @ bffe309

                     new file mode 100644
@@ -0,0 +1,86 @@
                     +compute_p_values <- function(partition,
                     +                             cov.matrices,
                     +                             number.of.datasets = 1e6,
                     +                             number.of.samples){
+                    +
                     +    if(!("scor" %in% colnames(partition))) stop("sensitivity correlation missing")
+                    +
                     +    #check which k and m
                     +    k <- as.character(partition[1,cor_cut])
                     +    m <- as.character(partition[1,df_cut])
+                    +
                     +    #simulate data using the appropriate covariance matrices
                     +    cov.matrices.partition <- cov.matrices[[m]][[k]]
+                    +
                     +    #to reach the necessary number of datasets we need to find out how many
                     +    #datasets to construct from each covariance matrix we have
                     +    number.of.datasets.per.matrix <- ceiling(number.of.datasets / length(cov.matrices.partition))
+                    +
                     +    scor <- unlist(sample_zero_scor_data(cov.matrices = cov.matrices.partition,
                     +                                         number.of.datasets = number.of.datasets.per.matrix,
                     +                                         number.of.samples = number.of.samples)[1:number.of.datasets])
+                    +
                     +    test_data_dt <- data.table(scor) #data.table(scor[which(scor > 0)])
                     +    setkey(test_data_dt, scor)
                     +    number.of.datasets.on.right.side <- length(test_data_dt$scor)
+                    +
                     +    #partition_scor_positive <- partition[which(partition$scor >= 0),]
+                    +
                     +    partition$p.val <- (number.of.datasets.on.right.side -
                     +        test_data_dt[J(partition$scor),
                     +                     .I,
                     +                     roll = "nearest",
                     +                     by = .EACHI]$I) / number.of.datasets.on.right.side
+                    +
                     +    return(partition)
                     +}
+                    +
                     +sponge_compute_p_values <- function(sponge_result,
                     +                                    cov.matrices,
                     +                                    number.of.samples,
                     +                                    number.of.datasets = 10,
                     +                                    ms, ks){
+                    +
                     +    #divide gene_gene correlation
                     +    if(max(sponge_result$df) > 7) df_breaks <- c(seq(0,7), max(sponge_result$df))
                     +    else df_breaks <- seq(0, max(sponge_result$df))
+                    +
                     +    sponge_result <- sponge_result[,
                     +                c("cor_cut", "df_cut") := list(
                     +                    cut(abs(cor), breaks = c(0, seq(0.25, 0.85, 0.1), 1)),
                     +                    cut(df, breaks = df_breaks))]
+                    +
                     +    levels(sponge_result$cor_cut) <- ks
                     +    levels(sponge_result$df_cut) <- ms
+                    +
                     +    isplitDT2 <- function(x, ks, ms) {
                     +        ival <- iter(apply(expand.grid(ks, ms), 1, list))
                     +        nextEl <- function() {
                     +            val <- nextElem(ival)
                     +            list(value=x[.(as.character(val[[1]][1]),
                     +                           as.character(val[[1]][2]))], key=val[[1]])
                     +        }
                     +        obj <- list(nextElem=nextEl)
                     +        class(obj) <- c('abstractiter', 'iter')
                     +        obj
                     +    }
+                    +
                     +    dtcomb <- function(...) {
                     +        rbindlist(list(...))
                     +    }
+                    +
                     +    setkey(sponge_result, cor_cut, df_cut)
+                    +
                     +    foreach(dt.m=isplitDT2(sponge_result, ks, ms),
                     +        .combine='dtcomb',
                     +        .multicombine=TRUE,
                     +        .export = c("compute_p_values",
                     +                    "sample_zero_scor_data"),
                     +        .packages = c("gRbase", "MASS", "ppcor", "foreach", "logging", "data.table"),
                     +        .noexport = c("sponge_result")) %dopar% {
                     +            compute_p_values(partition = dt.m$value,
                     +                             cov.matrices = cov.matrices,
                     +                             number.of.datasets = number.of.datasets,
                     +                             number.of.samples = number.of.samples)
                     +        }
                     +}
                     \ No newline at end of file

R/fn_simulation.R

History View file @ bffe309

                     new file mode 100644
@@ -0,0 +1,287 @@
                     +library(MASS)
                     +library(foreach)
                     +library(gRbase)
                     +library(testthat)
                     +library(ppcor)
                     +library(expm)
                     +library(logging)
+                    +
                     +#generates a positive semi definite matrix
                     +posdef <- function (n, ev = runif(n, 0, 10))
                     +{
                     +    Z <- matrix(ncol=n, rnorm(n^2))
                     +    decomp <- qr(Z)
                     +    Q <- qr.Q(decomp)
                     +    R <- qr.R(decomp)
                     +    d <- diag(R)
                     +    ph <- d / abs(d)
                     +    O <- Q %*% diag(ph)
                     +    Z <- t(O) %*% diag(ev) %*% O
                     +    return(Z)
                     +}
+                    +
                     +#computes the schur complement with respect to R22
                     +schur<-function(R) {
                     +    ((R[1:2,1:2]) - (R[1:2,3:ncol(R)]) %*% (solve(R[3:nrow(R),3:ncol(R)])) %*% t(R[1:2,3:ncol(R)]))
                     +}
+                    +
                     +#computes sensitivity correlation
                     +get.q = function(S) {
                     +    s11  = S[1:2,1:2] #gene-gene covariance
                     +    pva  = schur(S) #partial covariance
                     +    q    = s11/outer(sqrt(diag(s11)),sqrt(diag(s11))) - pva / outer(sqrt(diag(pva)),sqrt(diag(pva))) #- sens - corr
                     +    return(q)
                     +}
+                    +
                     +#computes lambda from the dot product
                     +checkLambda <- function(x,y){
                     +    if(length(x) != length(y)) stop("input should be two vectors of equal length")
                     +    beta <- foreach(i = 1:length(x), .combine = sum) %do% {x[i] * y[i]}
                     +    norm.x <- base::norm(x,type="2")
                     +    norm.y <- base::norm(y,type="2")
                     +    (beta / (norm.x * norm.y))
                     +}
+                    +
                     +quadraticSolver <- function(a, b, c){
                     +    bsquare4ac <- b^2 - 4 * a * c
+                    +
                     +    tryCatch({
                     +    if(bsquare4ac < 0 || a == 0){
                     +        return(NULL)
                     +    }
+                    +
                     +    solution.1 <- (-b + sqrt(bsquare4ac)) / (2 * a)
                     +    solution.2 <- (-b - sqrt(bsquare4ac)) / (2 * a)
+                    +
                     +    return(list(solution.1, solution.2))
                     +    }, error = function(e){
                     +        return(NULL)
                     +    })
                     +}
+                    +
                     +#main method for sampling zero sensitivity covariance matrices
                     +sample_zero_scor_cov <- function(m, number.of.solutions,
                     +                                 number.of.attempts = 1e3,
                     +                                 gene_gene_correlation = NULL,
                     +                                 log.level = "INFO"){
+                    +
                     +    solutions <- foreach(solution = 1:number.of.solutions,
                     +                         .packages = c("MASS", "gRbase", "testthat",
                     +                                       "ppcor", "expm", "logging",
                     +                                       "foreach"),
                     +                         .export = c("quadraticSolver",
                     +                                     "checkLambda",
                     +                                     "get.q",
                     +                                     "schur",
                     +                                     "posdef")) %dopar% {
                     +        total <- 0
                     +        basicConfig(level = log.level)
+                    +
                     +        loginfo(paste("Looking for zero sensitivity covariance matrix for case m =",
                     +                      m, "solution no.", solution))
                     +        #a lot of solutions are not within our constraints, so we repeat
                     +        while(total < number.of.attempts){
                     +            total <- total + 1
                     +            if(is.null(gene_gene_correlation)) K = runif(1,-1,1) # R11
                     +            else K = gene_gene_correlation
                     +            v1 <- runif(m, -1, 1) #random v1
+                    +
                     +            if(m == 1){
                     +                R22 <- 1
                     +                Z <- v1
+                    +
                     +                a <- K^2 + Z^2 - K^2 * Z^2
                     +                b <- -2 * K * Z
                     +                c <- Z^2 * K^2
+                    +
                     +                v2.solutions <- quadraticSolver(a, b, c)
                     +                if(is.null(v2.solutions)){
                     +                    logdebug("no solution for v2 found")
                     +                    next
                     +                }
                     +                else{
                     +                    v2 <- v2.solutions[[1]]
                     +                }
                     +            }
                     +            else{
                     +                lambda = runif(1,-1,1) #cos theta
+                    +
                     +                R22 <- cov2cor(posdef(m)) #miRNA correlation
                     +                R22_invsqrt <- ginv(sqrtm(R22)) #R^-1/2
+                    +
                     +                u1 <- (R22_invsqrt %*% v1)[,1]
                     +                u1.norm = base::norm(u1, type = "2")
+                    +
                     +                #solving quadratic equation to obtain u2 solutions
                     +                a <- ((K^2 - lambda^2) * u1.norm^2 - K^2)
                     +                b <- 2 * lambda * K * u1.norm
                     +                c <- -K^2 * u1.norm^2
+                    +
                     +                u2.solutions <- quadraticSolver(a, b, c)
                     +                if(is.null(u2.solutions)){
                     +                    logdebug("no solution for u2")
                     +                    next
                     +                }
+                    +
                     +                u2.norm.1 <- u2.solutions[[1]]
                     +                u2.norm.2 <- u2.solutions[[2]]
+                    +
                     +                r12.m <- (K - sqrt(u1.norm^2) * sqrt(u2.norm.1^2) * lambda) * (1 - u1.norm^2)^-(1/2) * (1 - u2.norm.1^2)^-(1/2)
+                    +
                     +                if(is.nan(r12.m)){
                     +                    r12.m <- (K - sqrt(u1.norm^2) * sqrt(u2.norm.2^2) * lambda) * (1 - u1.norm^2)^-(1/2) * (1 - u2.norm.2^2)^-(1/2)
                     +                }
+                    +
                     +                if(is.nan(r12.m)){
                     +                    logdebug("||u2|| is not a valid solution")
                     +                    next
                     +                }
+                    +
                     +                if(r12.m != K){
                     +                    logdebug("correlation is not equal to partial correlation for selected ||u2||")
                     +                    next
                     +                }
+                    +
                     +                constraints <- (R22_invsqrt %*% rep(1,m))[,1]
+                    +
                     +                if(anyNA(constraints)){
                     +                    logdebug("R22^-(1/2) invalid. Can not compute constraints")
                     +                    next
                     +                }
+                    +
                     +                u1.m <- u1[m]
                     +                u2.wo_m <- foreach(i = 1:(m-1), .combine = c) %do%{
                     +                    runif(1, -constraints[i], constraints[i])
                     +                }
+                    +
                     +                if(anyNA(u2.wo_m)){
                     +                    logdebug("R22^-(1/2) invalid. Can not compute constraints")
                     +                    next
                     +                }
+                    +
                     +                beta <- foreach(i = 1:(m-1), .combine = sum) %do% {u1[i] * u2.wo_m[i]}
+                    +
                     +                A <- u1.m^2 - lambda^2 * u1.norm^2
                     +                B <- 2 * beta * u1.m
                     +                C <- beta^2 - lambda^2  * u1.norm^2 * sum(u2.wo_m^2)
+                    +
                     +                u2.solutions <- quadraticSolver(A, B, C)
+                    +
                     +                if(is.null(u2.solutions)){
                     +                    logdebug("no solution for the k-th element of u2")
                     +                    next
                     +                }
                     +                #solve quadratic equation
                     +                u2.k.1 <- u2.solutions[[1]]
                     +                u2.k.2 <- u2.solutions[[2]]
+                    +
                     +                #check if lambda constraint is really fulfilled
                     +                u2 <- c(u2.wo_m, u2.k.1)
+                    +
                     +                #if the sign is wrong we have to take the other solution
                     +                if(checkLambda(u1, u2) != lambda)
                     +                    u2 <- c(u2.wo_m, u2.k.2)
+                    +
                     +                #normalize and then multiply by ||u2||
                     +                u2.scaled <- u2 / base::norm(u2, type = "2") * u2.norm.1
+                    +
                     +                i <- 1
                     +                within_constraints <- TRUE
                     +                while(i < length(u2.scaled)){
                     +                    if(u2.scaled[i] > constraints[i] || u2.scaled[i] < -constraints[i]) {
                     +                        within_constraints <- FALSE
                     +                        break
                     +                    }
                     +                    i <- i+1
                     +                }
+                    +
                     +                if(!within_constraints){
                     +                    logdebug("solution violates constraints")
                     +                    next
                     +                }
                     +                else
                     +                {
                     +                    #compute v2
                     +                    v2 = (solve(R22_invsqrt) %*% u2.scaled)[,1]
                     +                }
                     +            }
+                    +
                     +            #construct correlation matrix R
                     +            R <- matrix(ncol = m+2, nrow = m+2)
                     +            diag(R) <- 1
                     +            R[1,2] <- K
                     +            R[2,1] <- K
                     +            R[3:(2+m),3:(2+m)] <- R22
                     +            R[1,3:(2+m)] <- v1
                     +            R[2,3:(2+m)] <- v2
                     +            R[3:(2+m), 1] <- v1
                     +            R[3:(2+m), 2] <- v2
+                    +
                     +            #generate covariance matrix S
                     +            L = diag(exp(rnorm(2+m))) 							#- variances
                     +            S = L %*% R %*% L 									#- covariance matrix
+                    +
                     +            #test for negative variance
                     +            if(any(diag(schur(S)) < 0)){
                     +                logdebug("negative variance in partial covariance matrix")
                     +                next
                     +            }
+                    +
                     +            #test sensitivity correlation is zero
                     +            scor <- get.q(S)[1,2]
                     +            if(is.nan(scor)){
                     +                logdebug("sensitivity correlation is NaN")
                     +                next
                     +            }
                     +            else if(abs(scor) > sqrt(.Machine$double.eps)){
                     +                logdebug("sensitivity correlation is not zero")
                     +                next
                     +            }
                     +            else{
                     +                loginfo(paste("viable solution found for m =", m,
                     +                              "and k =", K,
                     +                               "solution no.", solution))
                     +                attr(S, "iterations") <- total
                     +                attr(S, "k") <- K
                     +                attr(S, "m") <- m
                     +                return(S)
                     +            }
                     +        }
                     +    }
                     +    solutions <- Filter(Negate(is.null), solutions)
                     +    if(length(solutions) == 0){
                     +        logerror("No solutions found")
                     +        return(NULL)
                     +    }
                     +    else{
                     +        total <- sum(unlist(lapply(solutions, function(x){
                     +            attr(x, "iterations")})))
                     +        loginfo(paste("case k = ", gene_gene_correlation, "m = ", m, " - Found",
                     +                      length(solutions), "solutions in a total of",
                     +                      total, "iterations."))
                     +    }
+                    +
                     +    return(solutions)
                     +}
+                    +
+                    +
                     +sample_zero_scor_data <- function(cov.matrices,
                     +                                  number.of.samples = 100,
                     +                                  number.of.datasets = 100){
                     +    foreach(cov.matrix = cov.matrices,
                     +            .packages = c("gRbase", "MASS", "ppcor", "foreach", "logging")) %dopar% {
                     +        #check that sensitivity correlation is zero
                     +        if(abs(cov2pcor(cov.matrix)[1,2] - cov2cor(cov.matrix)[1,2]) > sqrt(.Machine$double.eps))
                     +            stop("sensitivity correlation of a given covariance matrix is not zero.")
+                    +
                     +        #sample data under this covariance matrix
                     +        foreach(i = 1:number.of.datasets, .combine = c) %do%{
                     +            sample.data <- mvrnorm(n = number.of.samples,
                     +                                   rep(0, ncol(cov.matrix)),
                     +                                   cov.matrix,
                     +                                   empirical = FALSE)
                     +            cor(sample.data)[1,2] - pcor(sample.data)$estimate[1,2]
                     +        }
                     +    }
                     +}
+                    +

R/fn_sparc.R

History View file @ bffe309

@@ -41,12 +41,12 @@ fn_map_mimats_to_mir <- function(mimats){
                      #' Compute all pairwise interactions for a number of genes as indices
                      #'
                     -#' @param number.of.genes
                     +#' @param number.of.genes Number of genes for which all pairwise interactions
                     +#' are needed
                      #' @importFrom gRbase combnPrim
                      #'
                      #' @return data frame with one row per unique pairwise combination. To be used
                      #' as input for the sponge method.
                     -#' @export
                      #'
                      #' @examples genes_pairwise_combinations(ncol(gene_expr))
                      genes_pairwise_combinations <- function(number.of.genes){
@@ -71,6 +71,9 @@ genes_pairwise_combinations <- function(number.of.genes){
                      #' @param log.every.n write to the log after every n steps
                      #' @param selected.genes Operate only on a subset of genes, particularly
                      #' useful for bootstrapping
                     +#' @param gene.combinations A data frame of combinations of genes to be tested.
                     +#' Gene names are taken from the first two columns and have to match the names
                     +#' used for gene_expr
                      #' @param p.adj.method Multiple testing correction method. see ?p.adjust for
                      #' details
                      #' @param p.value.threshold Multiple testing p-value cutoff
@@ -128,14 +131,19 @@ sponge <- function(gene_expr, mir_expr, mir_interactions,
                              sel.genes <- available.selected.genes
+                         }
                     -    #consider only genes that have miRNA interactions
                     -    sel.genes <- Filter(Negate(is.null), sel.genes)
                     +    genes.as.indices <- FALSE
                          #all pairwise combinations of selected genes
                          if(is.null(gene.combinations)){
                              loginfo("Computing all pairwise combinations of genes")
+                    +
                     +        #consider only genes that have miRNA interactions
                     +        sel.genes <- Filter(Negate(is.null), sel.genes)
+                    +
                              gene.combinations <-
                                 genes_pairwise_combinations(length(sel.genes))
+                    +
                     +        genes.as.indices <- TRUE
+                         }
                          loginfo("Beginning SPONGE run...")
@@ -162,9 +170,14 @@ sponge <- function(gene_expr, mir_expr, mir_interactions,
                              result <- foreach(gene_combi = iter(gene_combis, by="row"),
                                                  .combine=rbind) %do% {
                     -            geneA <- sel.genes[gene_combi[1]]
                     -            geneB <- sel.genes[gene_combi[2]]
+                    -
                     +            if(genes.as.indices){
                     +                geneA <- sel.genes[gene_combi[1]]
                     +                geneB <- sel.genes[gene_combi[2]]
                     +            }
                     +            else {
                     +                geneA <- as.character(gene_combi[1,1])
                     +                geneB <- as.character(gene_combi[1,2])
                     +            }
                                  logdebug(paste("Processing source gene", geneA,
                                                 "and target gene", geneB ))
@@ -188,6 +201,13 @@ sponge <- function(gene_expr, mir_expr, mir_interactions,
                                                                            mir_interactions)
+                                 }
                     +            #check if shared miRNAs are in expression matrix
                     +            if(length(setdiff(mir_intersect, colnames(mir_expr))) > 0){
                     +                logwarn(paste("Source gene", geneA, "and target gene", geneB,
                     +                              "shared miRNAs not found in mir_expr are discarded"))
                     +                mir_intersect <- intersect(mir_intersect, colnames(mir_expr))
                     +            }
+                    +
                                  #check if there are actually any shared mirnas
                                  if(length(mir_intersect) == 0){
                                      logdebug(paste("Source gene", geneA, "and target gene", geneB,

R/fn_subsampling.R

History View file @ bffe309

                     new file mode 100644
@@ -0,0 +1,84 @@
                     +#' Sponge subsampling
                     +#' @importFrom foreach foreach
                     +#' @import ggplot2
                     +#' @import dplyr
                     +#'
                     +#' @param subsample.n the number of samples to be drawn in each round
                     +#' @param subsample.repeats how often should the subsampling be done?
                     +#' @param subsample.with.replacement logical, should we allow samples to be used
                     +#' repeatedly
                     +#' @param subsample.plot logical, should the results be plotted as box plots
                     +#' @param gene_expr gene expression matrix as defined in sponge
                     +#' @param mir_expr miRNA expression matrix as defined in sponge
                     +#' @param ... parameters passed on to the sponge function
                     +#'
                     +#' @references sponge
                     +#' @return a summary of the results with mean and standard deviations of the
                     +#' correlation and sensitive correlation.
                     +#' @export
                     +#'
                     +#' @examples test <- sponge_subsampling(gene_expr = gene_expr,
                     +#' mir_expr = mir_expr, mir_interactions = mir_interactions)
+                    +
+                    +
+                    +
                     +sponge_subsampling <- function(
                     +                    subsample.n = 100,
                     +                    subsample.repeats = 10,
                     +                    subsample.with.replacement = FALSE,
                     +                    subsample.plot = FALSE,
                     +                    gene_expr,
                     +                    mir_expr,
                     +                    ...){
+                    +
                     +    subsample_results <-
                     +    foreach(sub.n = subsample.n, .combine = rbind) %do%{
                     +        foreach(r = 1:subsample.repeats, .combine = rbind) %do% {
                     +            random_draw <- sample.int(nrow(gene_expr), sub.n, replace = subsample.with.replacement)
+                    +
                     +            sub_gene_expr <- gene_expr[random_draw,]
                     +            sub_mir_expr <- mir_expr[random_draw,]
+                    +
                     +            if(subsample.with.replacement){
                     +                rownames(sub_gene_expr) <-
                     +                    make.names(rownames(sub_gene_expr), unique = TRUE)
+                    +
                     +                rownames(sub_mir_expr) <-
                     +                    make.names(rownames(sub_mir_expr), unique = TRUE)
                     +            }
+                    +
                     +            result <- sponge(gene_expr = sub_gene_expr,
                     +                             mir_expr = sub_mir_expr, ...)
                     +            result$sub.n <- sub.n
                     +            return(result)
                     +        }
                     +    }
+                    +
                     +    if(subsample.plot){
                     +        subsample_scor_plot <- ggplot(subsample_results,
                     +                                      aes(x = paste(geneA, geneB, sep = " - "),
                     +                                          y = cor - pcor)) +
                     +            geom_boxplot(aes(fill = "green")) +
                     +            geom_boxplot(aes(y = cor, fill = "red")) +
                     +            scale_fill_discrete(name = "",
                     +                    labels = c("sensitivity correlation", "correlation")) +
                     +            theme_bw() +
                     +            theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
                     +            ylab("") +
                     +            xlab("ceRNA interaction")
+                    +
                     +        if(length(subsample.n) > 1){
                     +            subsample_scor_plot <- subsample_scor_plot +
                     +                facet_wrap(~sub.n, ncol = 1)
                     +        }
+                    +
                     +        print(subsample_scor_plot)
                     +    }
+                    +
                     +    subsample_results %>% dplyr::group_by(geneA, geneB, df) %>%
                     +        dplyr::summarize(cor_mean = mean(cor),
                     +                  cor_sd = sd(cor),
                     +                  pcor_mean = mean(pcor),
                     +                  pcor_sd = sd(pcor))
+                    +
                     +}
                     \ No newline at end of file

man/genes_pairwise_combinations.Rd

History View file @ bffe309

@@ -6,6 +6,10 @@
                      \usage{
                      genes_pairwise_combinations(number.of.genes)
+                     }
                     +\arguments{
                     +\item{number.of.genes}{Number of genes for which all pairwise interactions
                     +are needed}
                     +}
                      \value{
                      data frame with one row per unique pairwise combination. To be used
                      as input for the sponge method.

man/sponge.Rd

History View file @ bffe309

@@ -25,6 +25,10 @@ all miRNA interaction partners that should be considered.}
                      \item{selected.genes}{Operate only on a subset of genes, particularly
                      useful for bootstrapping}
                     +\item{gene.combinations}{A data frame of combinations of genes to be tested.
                     +Gene names are taken from the first two columns and have to match the names
                     +used for gene_expr}
+                    +
                      \item{p.adj.method}{Multiple testing correction method. see ?p.adjust for
                      details}