Seurat中Dotplot的作图数据，其实来源于FindAllmarker得到的数据：pct，avg_log2C 。获取百分比，平均表达量

本文链接：https://blog.csdn.net/qq_52813185/article/details/136194641
博客涉及Python、数据库和人工智能相关内容。Python作为强大的编程语言，在数据处理等方面发挥重要作用；数据库用于存储和管理数据；人工智能则借助前两者实现智能应用。三者结合推动信息技术发展。
摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >


#########################设置我自己的r包加载路径，通常你不需要运行这段代码-----
.libPaths(c(
  '/home/rootyll/seurat_v5/',
  "/usr/local/lib/R/site-library", "/usr/lib/R/site-library", "/usr/lib/R/library"
))
###################################################################################333
source('~/OLP_combine/scRNA_scripts/lib.R')
library(Seurat)
print(getwd())
print(.libPaths())


dir.create('~/gzh/20240220_获取表达marker基因的百分比pct')
setwd('~/gzh/20240220_获取表达marker基因的百分比pct')

sce.all.int=readRDS("~/gzh/pbmc3k_final.rds")

sce.all.int@assays$RNA@data
DimPlot(sce.all.int)
head(sce.all.int@meta.data)

sce.all.int$labels=Idents(sce.all.int)
sce.all.int$group=ifelse( sce.all.int$nFeature_RNA>800,'Healthy','OLP')
All.merge=sce.all.int
table(All.merge$group)
DimPlot(All.merge,label = TRUE)

head(sce.all.int@meta.data)

markers_croase=FindAllMarkers(All.merge,only.pos = TRUE)
head(markers_croase);print(getwd())
#write.csv(markers_croase,file = 'markers_croase.csv')
markers_croase=read.csv('./markers_croase.csv')
head(markers_croase);dim(markers_croase)

head(sce.all.int@meta.data)

DotPlot(sce.all.int,features = "CD3E")

#计算每个基因的pct和avg.exp,以细胞类型为细胞类型分组信息,信息在meta下面的labels----

object=sce.all.int
features= markers_croase$gene
assay = NULL;cols = c("lightgrey",
                      "blue");
col.min = -2.5; col.max = 2.5; dot.min = 0; dot.scale = 6;
idents =   Idents(sce.all.int)       ; group.by = "labels";

split.by = NULL; cluster.idents = FALSE;
scale = TRUE;
scale.by = "radius"; scale.min = NA; scale.max = NA





assay <- assay %||% DefaultAssay(object = object) ;assay
DefaultAssay(object = object) <- assay
split.colors <- !is.null(x = split.by) && !any(cols %in%
                                                 rownames(x = brewer.pal.info));split.colors

##SCALE------
scale.func <- switch(EXPR = scale.by, size = scale_size,
                     radius = scale_radius, stop("'scale.by' must be either 'size' or 'radius'"))

feature.groups <- NULL


#features是否是list----
if (is.list(features) | any(!is.na(names(features)))) {
  feature.groups <- unlist(x = sapply(X = 1:length(features),
                                      FUN = function(x) {
                                        return(rep(x = names(x = features)[x], each = length(features[[x]])))
                                      }))
  
  if (any(is.na(x = feature.groups))) {
    warning("Some feature groups are unnamed.", call. = FALSE,
            immediate. = TRUE)
  }
  features <- unlist(x = features)
  names(x = feature.groups) <- features
}


cells <- unlist(x = CellsByIdentities(object = object, cells = colnames(object[[assay]]),
                                      idents =Idents(object)   ));length(cells)



#3 获取画图数据--------
data.features <- FetchData(object = object, vars = features,  #slot = 'data',
                           cells = cells);dim(data.features);
head(data.features[,1:10])
dim(data.features)
length(features)
unique(features) |>length()
# mydata=FetchData(sce.all.int,vars = features)
# head(mydata[,1:10])


data.features$id <- if ( is.null(x = group.by) ) {
  Idents(object = object)[cells, drop = TRUE]  #Drop unused levels
} else {
  object[[group.by, drop = TRUE]][cells, drop = TRUE]
}

head(data.features);dim(data.features)



if (!is.factor(x = data.features$id)) {
  data.features$id <- factor(x = data.features$id)
}

id.levels <- levels(x = data.features$id)
data.features$id <- as.vector(x = data.features$id)
head(data.features[,(ncol(data.features)-1):ncol(data.features)]);dim(data.features)

#4是否split------
if (!is.null(x = split.by)) {
  splits <- FetchData(object = object, vars = split.by)[cells,
                                                        split.by]
  if (split.colors) {
    if (length(x = unique(x = splits)) > length(x = cols)) {
      stop(paste0("Need to specify at least ", length(x = unique(x = splits)),
                  " colors using the cols parameter"))
    }
    cols <- cols[1:length(x = unique(x = splits))]
    names(x = cols) <- unique(x = splits)
  }
  data.features$id <- paste(data.features$id, splits,
                            sep = "_")
  unique.splits <- unique(x = splits)
  id.levels <- paste0(rep(x = id.levels, each = length(x = unique.splits)),
                      "_", rep(x = unique(x = splits), times = length(x = id.levels)))
}

head(data.features);dim(data.features)

data.plot <- lapply(X = unique(x = data.features$id), FUN = function(ident) {
  
  #ident="Myofibroblasts"
  data.use <- data.features[data.features$id == ident,
                            1:(ncol(x = data.features) - 1), drop = FALSE]
  head(data.use);dim(data.use)
  
  
  #5 计算avg.exp和pct.exp----
  avg.exp <- apply(X = data.use, MARGIN = 2, FUN = function(x) {
    return(mean(x = expm1(x = x)))        #expm1() function in R Language is used to compute exponential minus 1 i.e, exp()-1.
  });head(avg.exp);expm1(1)
  
  pct.exp <- apply(X = data.use, MARGIN = 2, FUN = PercentAbove, #Calculate the percentage of a vector above some threshold   #表示要应用的函数是PercentAbove，这个函数会计算大于指定阈值的数值所占比例。
                   threshold = 0);pct.exp
  return(list(avg.exp = avg.exp, pct.exp = pct.exp))
  
})
head(data.plot)



names(x = data.plot) <- unique(x = data.features$id) ;head(data.plot)

if (cluster.idents) {
  mat <- do.call(what = rbind, args = lapply(X = data.plot,
                                             FUN = unlist));mat
  mat <- scale(x = mat);mat
  id.levels <- id.levels[hclust(d = dist(x = mat))$order]
}


head(data.plot);head(data.use)

data.plot <- lapply(X = names(x = data.plot), FUN = function(x) {
  data.use <- as.data.frame(x = data.plot[[x]])
  data.use$features.plot <- rownames(x = data.use)
  data.use$id <- x
  return(data.use)
});head(data.plot)

data.plot <- do.call(what = "rbind", args = data.plot);data.plot

if ( !is.null(x = id.levels) ) {
  data.plot$id <- factor(x = data.plot$id, levels = id.levels)
}
ngroup <- length(x = levels(x = data.plot$id));ngroup

if (ngroup == 1) {
  scale <- FALSE
  warning("Only one identity present, the expression values will be not scaled",
          call. = FALSE, immediate. = TRUE)
}else if (ngroup < 5 & scale) {
  warning("Scaling data with a low number of groups may produce misleading results",
          call. = FALSE, immediate. = TRUE)
}

head(data.plot)

avg.exp.scaled <- sapply(X = unique(x = data.plot$features.plot),
                         FUN = function(x) {
                           data.use <- data.plot[data.plot$features.plot ==
                                                   x, "avg.exp"]
                           if (scale) {
                             data.use <- scale(x = log1p(data.use))
                             data.use <- MinMax(data = data.use, min = col.min,
                                                max = col.max)
                           }
                           else {
                             data.use <- log1p(x = data.use)
                           }
                           return(data.use)
                         });avg.exp.scaled

avg.exp.scaled <- as.vector(x = t(x = avg.exp.scaled))
if (split.colors) {
  avg.exp.scaled <- as.numeric(x = cut(x = avg.exp.scaled,
                                       breaks = 20))
}
data.plot$avg.exp.scaled <- avg.exp.scaled

head(data.plot);dim(data.plot)

# data.plot$features.plot <- factor(x = data.plot$features.plot,
#                                   levels = features);data.plot
# data.plot$pct.exp[data.plot$pct.exp < dot.min] <- NA
# data.plot$pct.exp <- data.plot$pct.exp * 100 ;data.plot
data.plot$gene=data.plot$features.plot

head(data.plot)

head(markers_croase)

merged_data <- merge( markers_croase,data.plot, by = "gene")
head(merged_data)

merged_data[merged_data$gene=='CD3E',]
dim(merged_data)

merged_data$'pct.exp*100'=merged_data$pct.exp*100


DotPlot(sce.all.int,features = "PTMA")



write.csv(merged_data,file = "markers_croase_with_pct_avg.exp.csv")

dim(merged_data)



#5 学习材料 dotplot源代码-----

??DotPlot
Seurat:: DotPlot(sce.all.int,features = 'ACTA2',assay = "RNA" )
debug(DotPlot)
debug(DotPlot(sce.all.int,features = 'OR4F5'))
debug(PercentAbove)
undebug(DotPlot)

{
  function (object, features, assay = NULL, cols = c("lightgrey",
                                                     "blue"), col.min = -2.5, col.max = 2.5, dot.min = 0, dot.scale = 6,
            idents = NULL, group.by = NULL, split.by = NULL, cluster.idents = FALSE,
            scale = TRUE, scale.by = "radius", scale.min = NA, scale.max = NA)
    
    
  {
    
    object=sce.all.int
    features="ACTA2"
    assay = NULL;cols = c("lightgrey",
                          "blue");
    col.min = -2.5; col.max = 2.5; dot.min = 0; dot.scale = 6;
    idents = 'Myofibroblasts'; group.by = "labels";
    split.by = NULL; cluster.idents = FALSE;
    scale = TRUE;
    scale.by = "radius"; scale.min = NA; scale.max = NA
    
    
    
    assay <- assay %||% DefaultAssay(object = object)
    DefaultAssay(object = object) <- assay
    split.colors <- !is.null(x = split.by) && !any(cols %in%
                                                     rownames(x = brewer.pal.info));split.colors
    
    ##SCALE------
    scale.func <- switch(EXPR = scale.by, size = scale_size,
                         radius = scale_radius, stop("'scale.by' must be either 'size' or 'radius'"))
    
    feature.groups <- NULL
    
    
    #features是否是list----
    if (is.list(features) | any(!is.na(names(features)))) {
      feature.groups <- unlist(x = sapply(X = 1:length(features),
                                          FUN = function(x) {
                                            return(rep(x = names(x = features)[x], each = length(features[[x]])))
                                          }))
      
      if (any(is.na(x = feature.groups))) {
        warning("Some feature groups are unnamed.", call. = FALSE,
                immediate. = TRUE)
      }
      features <- unlist(x = features)
      names(x = feature.groups) <- features
    }
    
    
    cells <- unlist(x = CellsByIdentities(object = object, cells = colnames(object[[assay]]),
                                          idents =Idents(object)   ));length(cells)
    #3 获取画图数据--------
    data.features <- FetchData(object = object, vars = features,
                               cells = cells);dim(data.features);head(data.features)
    
    data.features$id <- if ( is.null(x = group.by) ) {
      Idents(object = object)[cells, drop = TRUE]  #Drop unused levels
    } else {
      object[[group.by, drop = TRUE]][cells, drop = TRUE]
    }
    
    head(data.features);dim(data.features)
    
    
    
    if (!is.factor(x = data.features$id)) {
      data.features$id <- factor(x = data.features$id)
    }
    
    id.levels <- levels(x = data.features$id)
    data.features$id <- as.vector(x = data.features$id)
    head(data.features)
    
    #4是否split------
    if (!is.null(x = split.by)) {
      splits <- FetchData(object = object, vars = split.by)[cells,
                                                            split.by]
      if (split.colors) {
        if (length(x = unique(x = splits)) > length(x = cols)) {
          stop(paste0("Need to specify at least ", length(x = unique(x = splits)),
                      " colors using the cols parameter"))
        }
        cols <- cols[1:length(x = unique(x = splits))]
        names(x = cols) <- unique(x = splits)
      }
      data.features$id <- paste(data.features$id, splits,
                                sep = "_")
      unique.splits <- unique(x = splits)
      id.levels <- paste0(rep(x = id.levels, each = length(x = unique.splits)),
                          "_", rep(x = unique(x = splits), times = length(x = id.levels)))
    }
    
    head(data.features);dim(data.features)
    
    data.plot <- lapply(X = unique(x = data.features$id), FUN = function(ident) {
      
      #ident="Myofibroblasts"
      data.use <- data.features[data.features$id == ident,
                                1:(ncol(x = data.features) - 1), drop = FALSE]
      head(data.use);dim(data.use)
      
      
      #5 计算avg.exp和pct.exp----
      avg.exp <- apply(X = data.use, MARGIN = 2, FUN = function(x) {
        return(mean(x = expm1(x = x)))        #expm1() function in R Language is used to compute exponential minus 1 i.e, exp()-1.
      });head(avg.exp);expm1(1)
      
      pct.exp <- apply(X = data.use, MARGIN = 2, FUN = PercentAbove, #Calculate the percentage of a vector above some threshold   #表示要应用的函数是PercentAbove，这个函数会计算大于指定阈值的数值所占比例。
                       threshold = 0);pct.exp
      return(list(avg.exp = avg.exp, pct.exp = pct.exp))
      
    })
    data.plot
    
    
    
    names(x = data.plot) <- unique(x = data.features$id) ;head(data.plot)
    
    if (cluster.idents) {
      mat <- do.call(what = rbind, args = lapply(X = data.plot,
                                                 FUN = unlist));mat
      mat <- scale(x = mat);mat
      id.levels <- id.levels[hclust(d = dist(x = mat))$order]
    }
    
    
    head(data.plot);head(data.use)
    
    data.plot <- lapply(X = names(x = data.plot), FUN = function(x) {
      data.use <- as.data.frame(x = data.plot[[x]])
      data.use$features.plot <- rownames(x = data.use)
      data.use$id <- x
      return(data.use)
    });head(data.plot)
    
    data.plot <- do.call(what = "rbind", args = data.plot);data.plot
    
    if ( !is.null(x = id.levels) ) {
      data.plot$id <- factor(x = data.plot$id, levels = id.levels)
    }
    ngroup <- length(x = levels(x = data.plot$id));ngroup
    
    if (ngroup == 1) {
      scale <- FALSE
      warning("Only one identity present, the expression values will be not scaled",
              call. = FALSE, immediate. = TRUE)
    }else if (ngroup < 5 & scale) {
      warning("Scaling data with a low number of groups may produce misleading results",
              call. = FALSE, immediate. = TRUE)
    }
    
    head(data.plot)
    
    avg.exp.scaled <- sapply(X = unique(x = data.plot$features.plot),
                             FUN = function(x) {
                               data.use <- data.plot[data.plot$features.plot ==
                                                       x, "avg.exp"]
                               if (scale) {
                                 data.use <- scale(x = log1p(data.use))
                                 data.use <- MinMax(data = data.use, min = col.min,
                                                    max = col.max)
                               }
                               else {
                                 data.use <- log1p(x = data.use)
                               }
                               return(data.use)
                             });avg.exp.scaled
    
    avg.exp.scaled <- as.vector(x = t(x = avg.exp.scaled))
    if (split.colors) {
      avg.exp.scaled <- as.numeric(x = cut(x = avg.exp.scaled,
                                           breaks = 20))
    }
    data.plot$avg.exp.scaled <- avg.exp.scaled
    data.plot$features.plot <- factor(x = data.plot$features.plot,
                                      levels = features);data.plot
    data.plot$pct.exp[data.plot$pct.exp < dot.min] <- NA
    data.plot$pct.exp <- data.plot$pct.exp * 100 ;data.plot
    
    
    
    ############后面就是详细的画图步骤，暂时可以不看=======
    
    
    
    if (split.colors) {
      splits.use <- unlist(x = lapply(X = data.plot$id, FUN = function(x) sub(paste0(".*_(",
                                                                                     paste(sort(unique(x = splits), decreasing = TRUE),
                                                                                           collapse = "|"), ")$"), "\\1", x)))
      data.plot$colors <- mapply(FUN = function(color, value) {
        return(colorRampPalette(colors = c("grey", color))(20)[value])
      }, color = cols[splits.use], value = avg.exp.scaled)
    }
    
    
    color.by <- ifelse(test = split.colors, yes = "colors",
                       no = "avg.exp.scaled");color.by
    
    if (!is.na(x = scale.min)) {
      data.plot[data.plot$pct.exp < scale.min, "pct.exp"] <- scale.min
    }
    if (!is.na(x = scale.max)) {
      data.plot[data.plot$pct.exp > scale.max, "pct.exp"] <- scale.max
    }
    if (!is.null(x = feature.groups)) {
      data.plot$feature.groups <- factor(x = feature.groups[data.plot$features.plot],
                                         levels = unique(x = feature.groups))
    }
    
    plot <-ggplot2:: ggplot(data = data.plot, mapping = aes_string(x = "features.plot",
                                                                   y = "id")) + geom_point(mapping = aes_string(size = "pct.exp",
                                                                                                                color = color.by)) + scale.func(range = c(0, dot.scale),
                                                                                                                                                limits = c(scale.min, scale.max)) + theme(axis.title.x = element_blank(),
                                                                                                                                                                                          axis.title.y = element_blank()) + guides(size = guide_legend(title = "Percent Expressed")) +
      labs(x = "Features", y = ifelse(test = is.null(x = split.by),
                                      yes = "Identity", no = "Split Identity")) + cowplot::theme_cowplot()
    
    
    
    
    if ( !is.null(x = feature.groups)  ) {
      plot <- plot + facet_grid(facets = ~feature.groups,
                                scales = "free_x", space = "free_x", switch = "y") +
        theme(panel.spacing = unit(x = 1, units = "lines"),
              strip.background = element_blank())
    }
    if (split.colors) {
      plot <- plot + scale_color_identity()
    }
    else if (length(x = cols) == 1) {
      plot <- plot + scale_color_distiller(palette = cols)
    }
    else {
      plot <- plot + scale_color_gradient(low = cols[1], high = cols[2])
    }
    if (!split.colors) {
      plot <- plot + guides(color = guide_colorbar(title = "Average Expression"))
    }
    return(plot)
  }
  
  
}