R/visualize.R
2ea4e3dd
 #' Line plot for expression traces
 #'
 #' Plot expression traces for genes across sections in a \code{SummarizedExperiment} object.
 #'
 #' @param object A \code{SummarizedExperiment} object.
 #' @param genes A character vector of gene names for plotting expression traces.
 #' @param matrix Character, must be one of \code{"count"}, \code{"normalized"}, or \code{"scaled"}.
 #' @param facet Logical. Plot the expression trace of each gene in a facet if it is \code{TRUE}.
 #' @param span Numeric, the amount of smoothing for the default loess smoother. Smaller numbers produce wigglier lines, larger numbers produce smoother lines. Set it to 0 for non-smoothing lines.
 #'
 #' @return A \code{ggplot} object.
 #'
 #' @seealso \code{geom_smooth} for plotting smooth lines, \code{facet_wrap} for faceting genes.
 #'
 #' @importFrom ggplot2 ggplot aes_string theme theme_bw geom_smooth geom_line labs facet_wrap
6484cb9d
 #' @importFrom SummarizedExperiment assay rowData colData
2ea4e3dd
 #' @export
 #'
 #' @examples
 #' data(zh.data)
6484cb9d
 #' zh <- createTomo(zh.data)
 #' linePlot(zh,
2ea4e3dd
 #'  c("ENSDARG00000002131", "ENSDARG00000003061", "ENSDARG00000076075", "ENSDARG00000076850"))
 #'
 #' # Do not smooth lines.
6484cb9d
 #' linePlot(zh,
2ea4e3dd
 #'  c("ENSDARG00000002131", "ENSDARG00000003061", "ENSDARG00000076075", "ENSDARG00000076850"), span=0)
 #'
 #' # Plot genes in different facets.
6484cb9d
 #' linePlot(zh,
2ea4e3dd
 #'  c("ENSDARG00000002131", "ENSDARG00000003061", "ENSDARG00000076075", "ENSDARG00000076850"),
 #'  facet=TRUE)
6484cb9d
 linePlot <- function(object, genes, matrix='normalized', facet=FALSE, span=0.3)
2ea4e3dd
 {
6484cb9d
     exp_matrix <- assay(object, matrix)
3ce60083
     exp_genes <- subset(exp_matrix, rowData(object)$gene %in% genes)
     exp_genes_df <- NULL
     sections <- factor(colData(object)$section, levels=colData(object)$section)
     for(i in seq_len(nrow(exp_genes)))
     {
         exp_gene_df <- data.frame(gene=genes[i], section=sections, value=exp_genes[i,])
         exp_genes_df <- rbind(exp_genes_df, exp_gene_df)
     }
     ylab <- c('Count', 'Normalized count', 'Scaled expression')[c('count', 'normalized', 'scaled') == matrix]
2ea4e3dd
 
3ce60083
     g <- ggplot(exp_genes_df, aes_string(x='section', y='value', group='gene', color='gene')) +
6484cb9d
         labs(y=ylab) +
3ce60083
         theme_bw() +
         theme(legend.position='top', axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
2ea4e3dd
 
3ce60083
     if(span > 0)
         g <- g + geom_smooth(method='loess', span=span, se=FALSE)
     else
         g <- g + geom_line(size=1)
2ea4e3dd
 
3ce60083
     if(facet)
         g <- g + facet_wrap(~gene, scales='free') + theme(legend.position='none')
2ea4e3dd
 
3ce60083
     return(g)
2ea4e3dd
 }
 
 #' Embedding plot for sections
 #'
 #' Scatter plot for sections with two-dimenstional embeddings in a \code{SummarizedExperiment} object. Each point stands for a section.
 #'
 #' @param object A \code{SummarizedExperiment} object.
 #' @param group Character, a variable in slot \code{meta} defining the groups of sections. Sections in the same group have same colors.
 #' @param method Character, the embeddings for scatter plot. Must be one of \code{"TSNE"}, \code{"UMAP"}, or \code{"PCA"}.
 #'
 #' @return A \code{ggplot} object.
 #'
 #' @importFrom SummarizedExperiment colData
 #' @importFrom ggplot2 ggplot aes_string geom_point theme theme_bw
 #' @importFrom ggrepel geom_text_repel
 #'
 #' @export
 #'
 #' @examples
 #' data(zh.data)
6484cb9d
 #' zh <- createTomo(zh.data)
 #' zh <- runTSNE(zh)
2ea4e3dd
 #' # Plot TSNE embeddings.
6484cb9d
 #' embedPlot(zh)
2ea4e3dd
 #'
 #' # Plot UMAP embeddings.
6484cb9d
 #' zh <- runUMAP(zh)
 #' embedPlot(zh, method="UMAP")
2ea4e3dd
 #'
 #' # Color sections by kmeans cluster labels.
6484cb9d
 #' zh <- kmeansClust(zh, 3)
 #' embedPlot(zh, group="kmeans_cluster")
 embedPlot <- function(object, group='section', method='TSNE')
2ea4e3dd
 {
3ce60083
     if(!group %in% names(colData(object)))
2ea4e3dd
     {
3ce60083
         group <- 'section'
6484cb9d
         message("Unknown parameter 'group' for plotting sections!")
2ea4e3dd
     }
3ce60083
     colData(object)[[group]] <- as.character(colData(object)[[group]])
 
     if(method == 'TSNE')
2ea4e3dd
     {
3ce60083
         if(all(c('TSNE1','TSNE2') %in% names(colData(object))))
         {
             g <- ggplot(data.frame(colData(object)), aes_string(x='TSNE1', y='TSNE2', color=group)) +
                 geom_point() +
                 geom_text_repel(aes_string(label='section')) +
                 theme_bw() +
                 theme(legend.position='none')
             return(g)
         }
         else
         {
6484cb9d
             message("Function 'runTSNE' must be run before plotting TSNE embeddings.\n")
3ce60083
         }
2ea4e3dd
     }
3ce60083
     else if(method == 'UMAP')
2ea4e3dd
     {
3ce60083
         if(all(c('UMAP1','UMAP2') %in% names(colData(object))))
         {
             g <- ggplot(data.frame(colData(object)), aes_string(x='UMAP1', y='UMAP2', color=group)) +
                 geom_point(aes_string()) +
                 geom_text_repel(aes_string(label='section')) +
                 theme_bw() +
                 theme(legend.position='none')
             return(g)
         }
         else
         {
6484cb9d
             message("Function 'runUMAP' must be run before plotting UMAP embeddings.\n")
3ce60083
         }
2ea4e3dd
     }
3ce60083
     else if(method == 'PCA')
2ea4e3dd
     {
3ce60083
         if(all(c('PC1','PC2') %in% names(colData(object))))
         {
             g <- ggplot(data.frame(colData(object)), aes_string(x='PC1', y='PC2', color=group)) +
                 geom_point(aes_string()) +
                 geom_text_repel(aes_string(label='section')) +
                 theme_bw() +
                 theme(legend.position='none')
             return(g)
         }
         else
         {
6484cb9d
             message("Function 'runPCA' must be run before plotting PC embeddings.\n")
3ce60083
         }
2ea4e3dd
     }
     else
     {
6484cb9d
         message("Unknown embeddings!\n")
2ea4e3dd
     }
 }
 
 #' Embedding plot for genes
 #'
 #' Scatter plot for genes with two-dimenstional embeddings in a \code{SummarizedExperiment} object. Each point stands for a gene.
 #'
 #' @param object A \code{SummarizedExperiment} object.
 #' @param gene.df Data.frame. The first column must be a vector of gene names, and has the name \code{"gene"}. Additional columns in \code{gene.df} can be used to set the colors of genes.
 #' @param group Character, a column name in \code{gene.df} defining the groups of genes. Genes in the same group have same colors.
 #' @param method Character, the embeddings for scatter plot. Must be one of \code{"TSNE"}, \code{"UMAP"}, or \code{"PCA"}.
 #'
 #' @return A \code{ggplot} object.
 #'
6484cb9d
 #' @importFrom SummarizedExperiment rowData
2ea4e3dd
 #' @importFrom ggplot2 ggplot aes_string geom_point theme theme_bw
 #'
 #' @export
 #'
 #' @examples
 #' data(zh.data)
6484cb9d
 #' zh <- createTomo(zh.data)
 #' peak_genes <- findPeakGene(zh)
 #' zh <- runTSNE(zh, peak_genes$gene)
2ea4e3dd
 #' # Color genes by peak centers.
6484cb9d
 #' geneEmbedPlot(zh, peak_genes)
2ea4e3dd
 #'
 #' # Color genes by peak starts.
6484cb9d
 #' geneEmbedPlot(zh, peak_genes, group="start")
2ea4e3dd
 #'
 #' # Do not color genes.
6484cb9d
 #' geneEmbedPlot(zh, peak_genes["gene"])
 geneEmbedPlot <- function(object, gene.df, group='center', method='TSNE')
2ea4e3dd
 {
3ce60083
     if(group %in% names(gene.df))
         gene.df[[group]] <- as.factor(gene.df[[group]])
     if(method == 'TSNE')
2ea4e3dd
     {
3ce60083
         if(all(c('TSNE1','TSNE2') %in% names(rowData(object))))
         {
             gene.df$TSNE1 <- rowData(object)[as.character(gene.df$gene), 'TSNE1']
             gene.df$TSNE2 <- rowData(object)[as.character(gene.df$gene), 'TSNE2']
             g <- ggplot(gene.df, aes_string(x='TSNE1', y='TSNE2')) +
                 theme_bw()
2ea4e3dd
 
3ce60083
             if(group %in% names(gene.df))
                 g <- g + geom_point(aes_string(color=group))
             else
                 g <- g + geom_point()
2ea4e3dd
 
3ce60083
             return(g)
         }
         else
         {
6484cb9d
             message("Function 'TSNE' must be run for input genes before plotting TSNE embeddings.\n")
3ce60083
         }
2ea4e3dd
     }
3ce60083
     else if(method == 'UMAP')
2ea4e3dd
     {
3ce60083
         if(all(c('UMAP1','UMAP2') %in% names(rowData(object))))
         {
             gene.df$UMAP1 <- rowData(object)[as.character(gene.df$gene), 'UMAP1']
             gene.df$UMAP2 <- rowData(object)[as.character(gene.df$gene), 'UMAP2']
             g <- ggplot(gene.df, aes_string(x='UMAP1', y='UMAP2')) +
                 theme_bw()
2ea4e3dd
 
3ce60083
             if(group %in% names(gene.df))
                 g <- g + geom_point(aes_string(color=group))
             else
                 g <- g + geom_point()
2ea4e3dd
 
3ce60083
             return(g)
         }
         else
         {
6484cb9d
             message("Function 'UMAP' must be run for input genes before plotting UMAP embeddings.\n")
3ce60083
         }
2ea4e3dd
     }
3ce60083
     else if(method == 'PCA')
2ea4e3dd
     {
3ce60083
         if(all(c('PC1','PC2') %in% names(rowData(object))))
         {
             gene.df$PC1 <- rowData(object)[as.character(gene.df$gene), 'PC1']
             gene.df$PC2 <- rowData(object)[as.character(gene.df$gene), 'PC2']
             g <- ggplot(gene.df, aes_string(x='PC1', y='PC2')) +
                 theme_bw()
2ea4e3dd
 
3ce60083
             if(group %in% names(gene.df))
                 g <- g + geom_point(aes_string(color=group))
             else
                 g <- g + geom_point()
2ea4e3dd
 
3ce60083
             return(g)
         }
         else
         {
6484cb9d
             message("Function 'runPCA' must be run for input genes before plotting PC embeddings.\n")
3ce60083
         }
2ea4e3dd
     }
     else
     {
6484cb9d
         message("Unknown embeddings!\n")
2ea4e3dd
     }
 }
 
 #' Expression heatmap
 #'
 #' Heatmap for gene expression across sections in a \code{SummarizedExperiment} object.
 #'
 #' @param object A \code{SummarizedExperiment} object.
 #' @param genes A vector of character, the name of genes to plot heatmap.
 #' @param matrix Character, must be one of \code{"count"}, \code{"normalized"}, or \code{"scaled"}.
 #' @param size Character, the size of gene names. Set it to 0 if you do not want to show gene names.
 #'
 #' @return A \code{ggplot} object.
 #'
6484cb9d
 #' @importFrom SummarizedExperiment assay
2ea4e3dd
 #' @importFrom reshape2 melt
6484cb9d
 #' @importFrom RColorBrewer brewer.pal
 #' @importFrom ggplot2 ggplot aes_string geom_raster scale_fill_distiller theme element_blank element_text
2ea4e3dd
 #'
 #' @export
 #'
 #' @examples
 #' data(zh.data)
6484cb9d
 #' zh <- createTomo(zh.data)
2ea4e3dd
 #'
 #' # Plot some genes.
6484cb9d
 #' expHeatmap(zh,
2ea4e3dd
 #'  c("ENSDARG00000002131", "ENSDARG00000003061", "ENSDARG00000076075", "ENSDARG00000076850"))
 #'
 #' # Plot peak genes.
6484cb9d
 #' peak_genes <- findPeakGene(zh)
 #' expHeatmap(zh, peak_genes$gene)
2ea4e3dd
 #'
 #' # Remove gene names if too many genes are in the heatmap.
6484cb9d
 #' expHeatmap(zh, peak_genes$gene, size=0)
 expHeatmap <- function(object, genes, matrix='scaled', size=5)
2ea4e3dd
 {
3ce60083
     genes <- rev(as.character(genes))
6484cb9d
     exp_matrix <- assay(object, matrix)[genes, ]
     if(matrix %in% c('count', 'normalized'))
3ce60083
         exp_matrix <- log10(exp_matrix + 1)
     genes_df <- melt(exp_matrix, varnames=c('gene','section'))
     exp_name <-  c('Log10(Count+1)', 'Log10(Normalized count+1)', 'Scaled expression\n(Z score)')[c('count', 'normalized', 'scaled') == matrix]
2ea4e3dd
 
3ce60083
     g <- ggplot(genes_df, aes_string(x='section', y='gene', fill='value')) +
         geom_raster() +
6484cb9d
         scale_fill_distiller(palette='RdYlBu', name=exp_name) +
3ce60083
         theme(axis.line=element_blank(),
               axis.text.x=element_text(angle=90, hjust=1, vjust=0.5),
               axis.ticks=element_blank(),
               panel.background=element_blank())
     if(size == 0)
         g <- g + theme(axis.text.y=element_blank())
     else
         g <- g + theme(axis.text.y=element_text(size=size))
2ea4e3dd
 
3ce60083
     return(g)
2ea4e3dd
 }
 
 #' Correlation heatmap of sections
 #'
 #' Heatmap pf correlation coefficients between any two sections in a \code{SummarizedExperiment} object.
 #'
 #' @param object A \code{SummarizedExperiment} object.
 #' @param matrix Character, must be one of \code{"count"}, \code{"normalized"}, or \code{"scaled"}.
 #' @param max.cor Numeric, correlation coefficients bigger than \code{max.cor} are set to \code{max.cor}. It is used to clearly show small correlation coefficients.
 #' @param cor.method Character, the method to calculate correlation coefficients. must be one of \code{"pearson"}, \code{"kendall"}, or \code{"spearman"}.
 #'
 #' @return A \code{ggplot} object.
 #'
6484cb9d
 #' @importFrom SummarizedExperiment assay
2ea4e3dd
 #' @importFrom stats cor
 #' @importFrom reshape2 melt
6484cb9d
 #' @importFrom RColorBrewer brewer.pal
 #' @importFrom ggplot2 ggplot aes_string geom_raster scale_fill_distiller labs theme element_blank element_text
2ea4e3dd
 #'
 #' @export
 #'
 #' @examples
 #' data(zh.data)
6484cb9d
 #' zh <- createTomo(zh.data)
 #' corHeatmap(zh)
2ea4e3dd
 #'
 #' # Use Spearman correlation coefficients.
6484cb9d
 #' corHeatmap(zh, cor.method='spearman')
2ea4e3dd
 #'
 #' # Set max correlation coefficients to 0.3.
6484cb9d
 #' corHeatmap(zh, max.cor=0.3)
 corHeatmap <- function(object, matrix='scaled', max.cor=0.5, cor.method='pearson')
2ea4e3dd
 {
6484cb9d
     exp_matrix <- assay(object, matrix)
3ce60083
     cor_matrix <- cor(exp_matrix, method=cor.method)
     cor_matrix[cor_matrix > max.cor] <- max.cor
     cor_df <- melt(cor_matrix, varnames=c('section1','section2'))
     # c('Pearson r', 'Kendall τ', 'Spearman ρ')
     cor_name <- expression("Pearson"~r,"Kendall"~tau,"Spearman"~rho)[c('pearson', 'kendall', 'spearman') == cor.method]
2ea4e3dd
 
3ce60083
     g <- ggplot(cor_df, aes_string(x='section1', y='section2', fill='value')) +
         geom_raster() +
6484cb9d
         scale_fill_distiller(palette='RdYlBu', name=cor_name) +
3ce60083
         labs(x='', y='') +
         theme(axis.line=element_blank(),
               axis.text.x=element_text(angle=90, hjust=1, vjust=0.5),
               axis.ticks=element_blank(),
               panel.background=element_blank())
2ea4e3dd
 
3ce60083
     return(g)
2ea4e3dd
 }
 
 #' Correlation heatmap of genes
 #'
 #' Heatmap of correlation coefficients between any two queried genes in a \code{SummarizedExperiment} object.
 #'
 #' @param object A \code{SummarizedExperiment} object.
 #' @param gene.df Data.frame. The first column must be a vector of gene names, and has the name \code{"gene"}. Additional columns in \code{gene.df} can be used to set the colors of genes.
 #' @param group Character, a column name in \code{gene.df} defining the groups of genes. Genes in the same group have same colors on the side bar.
 #' @param matrix Character, must be one of \code{"count"}, \code{"normalized"}, or \code{"scaled"}.
 #' @param size Numeric, the size of gene names. Set it to 0 if you do not want to show gene names.
 #' @param cor.method Character, the method to calculate correlation coefficients. must be one of \code{"pearson"}, \code{"kendall"}, or \code{"spearman"}.
 #'
 #' @details This method can create a pure heatmap or a heatmap with side bar. If you prefer a pure heatmap, input a \code{gene.df} with a single column of gene names.
 #' However, you may want to show additional information of genes with a side bar, and the grouping information should be saved as additional column(s) of \code{gene.df}, and declared as \code{group}.
6484cb9d
 #' By default, you can use the output by \code{findPeakGene} as input \code{gene.df}. Peak genes will be grouped by their centers on the side bar.
2ea4e3dd
 #'
 #' @return A \code{ggplot} object.
 #'
6484cb9d
 #' @importFrom SummarizedExperiment assay
2ea4e3dd
 #' @importFrom stats cor
 #' @importFrom grDevices rainbow
 #' @importFrom reshape2 melt
6484cb9d
 #' @importFrom RColorBrewer brewer.pal
 #' @importFrom ggplot2 ggplot aes_string geom_raster scale_fill_distiller labs theme element_blank element_text ggplot_build scale_color_manual guides annotation_raster guide_legend coord_cartesian
2ea4e3dd
 #'
 #' @export
 #'
 #' @examples
 #' data(zh.data)
6484cb9d
 #' zh <- createTomo(zh.data)
2ea4e3dd
 #'
 #' # Correlation heatmap for all peak genes.
6484cb9d
 #' peak_genes <- findPeakGene(zh)
 #' geneCorHeatmap(zh, peak_genes)
2ea4e3dd
 #'
 #' # Use Spearman correlation coefficients.
6484cb9d
 #' geneCorHeatmap(zh, peak_genes, cor.method="spearman")
2ea4e3dd
 #'
 #' # Group genes by peak start.
6484cb9d
 #' geneCorHeatmap(zh, peak_genes, group="start")
2ea4e3dd
 #'
 #' # Plot without side bar.
6484cb9d
 #' geneCorHeatmap(zh, data.frame(
2ea4e3dd
 #'  gene=c("ENSDARG00000002131", "ENSDARG00000003061", "ENSDARG00000076075", "ENSDARG00000076850")))
6484cb9d
 geneCorHeatmap <- function(object, gene.df, group='center', matrix='scaled', size=5, cor.method='pearson')
2ea4e3dd
 {
6484cb9d
     exp_matrix <- assay(object, matrix)
3ce60083
     genes <- as.character(gene.df$gene)
     cor_matrix <- cor(t(exp_matrix[genes, ]), method=cor.method)
     cor_df <- melt(cor_matrix, varnames=c('gene1', 'gene2'))
     cor_name <- expression("Pearson"~r,"Kendall"~tau,"Spearman"~rho)[c('pearson', 'kendall', 'spearman') == cor.method]
     # plot sidebar if genes are grouped
     plot_sidebar <- group %in% names(gene.df)
2ea4e3dd
 
3ce60083
     if(plot_sidebar)
     {
         group_genes <- gene.df[[group]]
         names(group_genes) <- gene.df$gene
         cor_df$group <- as.factor(group_genes[cor_df$gene1])
         # Colors for column side bar
         n_groups <- length(unique(group_genes))
         color_legend <- rainbow(n_groups)
         names(color_legend) <- unique(group_genes)
     }
2ea4e3dd
 
3ce60083
     g <- ggplot(cor_df, aes_string(x='gene1', y='gene2', fill='value')) +
         geom_raster() +
6484cb9d
         scale_fill_distiller(palette='RdYlBu', name=cor_name)  +
3ce60083
         labs(x='', y='') +
         theme(axis.line=element_blank(),
               axis.text.y=element_blank(),
               axis.ticks=element_blank(),
               panel.background=element_blank(),
               legend.key=element_blank(),
               legend.box='horizontal')
     if(size == 0)
         g <- g + theme(axis.text.x=element_blank())
     else
         g <- g + theme(axis.text.x=element_text(angle=90, hjust=1, size=size))
2ea4e3dd
 
3ce60083
     if(plot_sidebar)
     {
         gbuild <- ggplot_build(plot = g)
         y_range <- diff(x = gbuild$layout$panel_params[[1]]$y.range)
         y_min <- max(gbuild$layout$panel_params[[1]]$y.range) + 0.01 * y_range
         y_max <- y_min + 0.03 * y_range
2ea4e3dd
 
3ce60083
         g <- g + geom_point(aes_string(color='group'), alpha=0, shape=15, size=5) +
6484cb9d
             scale_color_manual(name=group, values=color_legend) +
3ce60083
             guides(color=guide_legend(override.aes=list(alpha=1))) +
6484cb9d
             annotation_raster(t(color_legend[as.character(group_genes)]),
                               -Inf, Inf, ymin=y_min, ymax=y_max) +
3ce60083
             coord_cartesian(ylim = c(0, y_max), clip='off')
     }
2ea4e3dd
 
3ce60083
     return(g)
2ea4e3dd
 }