#########################设置我自己的r包加载路径,通常你不需要运行这段代码-----
.libPaths(c(
'/home/rootyll/seurat_v5/',
"/usr/local/lib/R/site-library", "/usr/lib/R/site-library", "/usr/lib/R/library"
))
###################################################################################333
source('~/OLP_combine/scRNA_scripts/lib.R')
library(Seurat)
print(getwd())
print(.libPaths())
dir.create('~/gzh/20240220_获取表达marker基因的百分比pct')
setwd('~/gzh/20240220_获取表达marker基因的百分比pct')
sce.all.int=readRDS("~/gzh/pbmc3k_final.rds")
sce.all.int@assays$RNA@data
DimPlot(sce.all.int)
head(sce.all.int@meta.data)
sce.all.int$labels=Idents(sce.all.int)
sce.all.int$group=ifelse( sce.all.int$nFeature_RNA>800,'Healthy','OLP')
All.merge=sce.all.int
table(All.merge$group)
DimPlot(All.merge,label = TRUE)
head(sce.all.int@meta.data)
markers_croase=FindAllMarkers(All.merge,only.pos = TRUE)
head(markers_croase);print(getwd())
#write.csv(markers_croase,file = 'markers_croase.csv')
markers_croase=read.csv('./markers_croase.csv')
head(markers_croase);dim(markers_croase)
head(sce.all.int@meta.data)
DotPlot(sce.all.int,features = "CD3E")
#计算每个基因的pct和avg.exp,以细胞类型为细胞类型分组信息,信息在meta下面的labels----
object=sce.all.int
features= markers_croase$gene
assay = NULL;cols = c("lightgrey",
"blue");
col.min = -2.5; col.max = 2.5; dot.min = 0; dot.scale = 6;
idents = Idents(sce.all.int) ; group.by = "labels";
split.by = NULL; cluster.idents = FALSE;
scale = TRUE;
scale.by = "radius"; scale.min = NA; scale.max = NA
assay <- assay %||% DefaultAssay(object = object) ;assay
DefaultAssay(object = object) <- assay
split.colors <- !is.null(x = split.by) && !any(cols %in%
rownames(x = brewer.pal.info));split.colors
##SCALE------
scale.func <- switch(EXPR = scale.by, size = scale_size,
radius = scale_radius, stop("'scale.by' must be either 'size' or 'radius'"))
feature.groups <- NULL
#features是否是list----
if (is.list(features) | any(!is.na(names(features)))) {
feature.groups <- unlist(x = sapply(X = 1:length(features),
FUN = function(x) {
return(rep(x = names(x = features)[x], each = length(features[[x]])))
}))
if (any(is.na(x = feature.groups))) {
warning("Some feature groups are unnamed.", call. = FALSE,
immediate. = TRUE)
}
features <- unlist(x = features)
names(x = feature.groups) <- features
}
cells <- unlist(x = CellsByIdentities(object = object, cells = colnames(object[[assay]]),
idents =Idents(object) ));length(cells)
#3 获取画图数据--------
data.features <- FetchData(object = object, vars = features, #slot = 'data',
cells = cells);dim(data.features);
head(data.features[,1:10])
dim(data.features)
length(features)
unique(features) |>length()
# mydata=FetchData(sce.all.int,vars = features)
# head(mydata[,1:10])
data.features$id <- if ( is.null(x = group.by) ) {
Idents(object = object)[cells, drop = TRUE] #Drop unused levels
} else {
object[[group.by, drop = TRUE]][cells, drop = TRUE]
}
head(data.features);dim(data.features)
if (!is.factor(x = data.features$id)) {
data.features$id <- factor(x = data.features$id)
}
id.levels <- levels(x = data.features$id)
data.features$id <- as.vector(x = data.features$id)
head(data.features[,(ncol(data.features)-1):ncol(data.features)]);dim(data.features)
#4是否split------
if (!is.null(x = split.by)) {
splits <- FetchData(object = object, vars = split.by)[cells,
split.by]
if (split.colors) {
if (length(x = unique(x = splits)) > length(x = cols)) {
stop(paste0("Need to specify at least ", length(x = unique(x = splits)),
" colors using the cols parameter"))
}
cols <- cols[1:length(x = unique(x = splits))]
names(x = cols) <- unique(x = splits)
}
data.features$id <- paste(data.features$id, splits,
sep = "_")
unique.splits <- unique(x = splits)
id.levels <- paste0(rep(x = id.levels, each = length(x = unique.splits)),
"_", rep(x = unique(x = splits), times = length(x = id.levels)))
}
head(data.features);dim(data.features)
data.plot <- lapply(X = unique(x = data.features$id), FUN = function(ident) {
#ident="Myofibroblasts"
data.use <- data.features[data.features$id == ident,
1:(ncol(x = data.features) - 1), drop = FALSE]
head(data.use);dim(data.use)
#5 计算avg.exp和pct.exp----
avg.exp <- apply(X = data.use, MARGIN = 2, FUN = function(x) {
return(mean(x = expm1(x = x))) #expm1() function in R Language is used to compute exponential minus 1 i.e, exp()-1.
});head(avg.exp);expm1(1)
pct.exp <- apply(X = data.use, MARGIN = 2, FUN = PercentAbove, #Calculate the percentage of a vector above some threshold #表示要应用的函数是PercentAbove,这个函数会计算大于指定阈值的数值所占比例。
threshold = 0);pct.exp
return(list(avg.exp = avg.exp, pct.exp = pct.exp))
})
head(data.plot)
names(x = data.plot) <- unique(x = data.features$id) ;head(data.plot)
if (cluster.idents) {
mat <- do.call(what = rbind, args = lapply(X = data.plot,
FUN = unlist));mat
mat <- scale(x = mat);mat
id.levels <- id.levels[hclust(d = dist(x = mat))$order]
}
head(data.plot);head(data.use)
data.plot <- lapply(X = names(x = data.plot), FUN = function(x) {
data.use <- as.data.frame(x = data.plot[[x]])
data.use$features.plot <- rownames(x = data.use)
data.use$id <- x
return(data.use)
});head(data.plot)
data.plot <- do.call(what = "rbind", args = data.plot);data.plot
if ( !is.null(x = id.levels) ) {
data.plot$id <- factor(x = data.plot$id, levels = id.levels)
}
ngroup <- length(x = levels(x = data.plot$id));ngroup
if (ngroup == 1) {
scale <- FALSE
warning("Only one identity present, the expression values will be not scaled",
call. = FALSE, immediate. = TRUE)
}else if (ngroup < 5 & scale) {
warning("Scaling data with a low number of groups may produce misleading results",
call. = FALSE, immediate. = TRUE)
}
head(data.plot)
avg.exp.scaled <- sapply(X = unique(x = data.plot$features.plot),
FUN = function(x) {
data.use <- data.plot[data.plot$features.plot ==
x, "avg.exp"]
if (scale) {
data.use <- scale(x = log1p(data.use))
data.use <- MinMax(data = data.use, min = col.min,
max = col.max)
}
else {
data.use <- log1p(x = data.use)
}
return(data.use)
});avg.exp.scaled
avg.exp.scaled <- as.vector(x = t(x = avg.exp.scaled))
if (split.colors) {
avg.exp.scaled <- as.numeric(x = cut(x = avg.exp.scaled,
breaks = 20))
}
data.plot$avg.exp.scaled <- avg.exp.scaled
head(data.plot);dim(data.plot)
# data.plot$features.plot <- factor(x = data.plot$features.plot,
# levels = features);data.plot
# data.plot$pct.exp[data.plot$pct.exp < dot.min] <- NA
# data.plot$pct.exp <- data.plot$pct.exp * 100 ;data.plot
data.plot$gene=data.plot$features.plot
head(data.plot)
head(markers_croase)
merged_data <- merge( markers_croase,data.plot, by = "gene")
head(merged_data)
merged_data[merged_data$gene=='CD3E',]
dim(merged_data)
merged_data$'pct.exp*100'=merged_data$pct.exp*100
DotPlot(sce.all.int,features = "PTMA")
write.csv(merged_data,file = "markers_croase_with_pct_avg.exp.csv")
dim(merged_data)
#5 学习材料 dotplot源代码-----
??DotPlot
Seurat:: DotPlot(sce.all.int,features = 'ACTA2',assay = "RNA" )
debug(DotPlot)
debug(DotPlot(sce.all.int,features = 'OR4F5'))
debug(PercentAbove)
undebug(DotPlot)
{
function (object, features, assay = NULL, cols = c("lightgrey",
"blue"), col.min = -2.5, col.max = 2.5, dot.min = 0, dot.scale = 6,
idents = NULL, group.by = NULL, split.by = NULL, cluster.idents = FALSE,
scale = TRUE, scale.by = "radius", scale.min = NA, scale.max = NA)
{
object=sce.all.int
features="ACTA2"
assay = NULL;cols = c("lightgrey",
"blue");
col.min = -2.5; col.max = 2.5; dot.min = 0; dot.scale = 6;
idents = 'Myofibroblasts'; group.by = "labels";
split.by = NULL; cluster.idents = FALSE;
scale = TRUE;
scale.by = "radius"; scale.min = NA; scale.max = NA
assay <- assay %||% DefaultAssay(object = object)
DefaultAssay(object = object) <- assay
split.colors <- !is.null(x = split.by) && !any(cols %in%
rownames(x = brewer.pal.info));split.colors
##SCALE------
scale.func <- switch(EXPR = scale.by, size = scale_size,
radius = scale_radius, stop("'scale.by' must be either 'size' or 'radius'"))
feature.groups <- NULL
#features是否是list----
if (is.list(features) | any(!is.na(names(features)))) {
feature.groups <- unlist(x = sapply(X = 1:length(features),
FUN = function(x) {
return(rep(x = names(x = features)[x], each = length(features[[x]])))
}))
if (any(is.na(x = feature.groups))) {
warning("Some feature groups are unnamed.", call. = FALSE,
immediate. = TRUE)
}
features <- unlist(x = features)
names(x = feature.groups) <- features
}
cells <- unlist(x = CellsByIdentities(object = object, cells = colnames(object[[assay]]),
idents =Idents(object) ));length(cells)
#3 获取画图数据--------
data.features <- FetchData(object = object, vars = features,
cells = cells);dim(data.features);head(data.features)
data.features$id <- if ( is.null(x = group.by) ) {
Idents(object = object)[cells, drop = TRUE] #Drop unused levels
} else {
object[[group.by, drop = TRUE]][cells, drop = TRUE]
}
head(data.features);dim(data.features)
if (!is.factor(x = data.features$id)) {
data.features$id <- factor(x = data.features$id)
}
id.levels <- levels(x = data.features$id)
data.features$id <- as.vector(x = data.features$id)
head(data.features)
#4是否split------
if (!is.null(x = split.by)) {
splits <- FetchData(object = object, vars = split.by)[cells,
split.by]
if (split.colors) {
if (length(x = unique(x = splits)) > length(x = cols)) {
stop(paste0("Need to specify at least ", length(x = unique(x = splits)),
" colors using the cols parameter"))
}
cols <- cols[1:length(x = unique(x = splits))]
names(x = cols) <- unique(x = splits)
}
data.features$id <- paste(data.features$id, splits,
sep = "_")
unique.splits <- unique(x = splits)
id.levels <- paste0(rep(x = id.levels, each = length(x = unique.splits)),
"_", rep(x = unique(x = splits), times = length(x = id.levels)))
}
head(data.features);dim(data.features)
data.plot <- lapply(X = unique(x = data.features$id), FUN = function(ident) {
#ident="Myofibroblasts"
data.use <- data.features[data.features$id == ident,
1:(ncol(x = data.features) - 1), drop = FALSE]
head(data.use);dim(data.use)
#5 计算avg.exp和pct.exp----
avg.exp <- apply(X = data.use, MARGIN = 2, FUN = function(x) {
return(mean(x = expm1(x = x))) #expm1() function in R Language is used to compute exponential minus 1 i.e, exp()-1.
});head(avg.exp);expm1(1)
pct.exp <- apply(X = data.use, MARGIN = 2, FUN = PercentAbove, #Calculate the percentage of a vector above some threshold #表示要应用的函数是PercentAbove,这个函数会计算大于指定阈值的数值所占比例。
threshold = 0);pct.exp
return(list(avg.exp = avg.exp, pct.exp = pct.exp))
})
data.plot
names(x = data.plot) <- unique(x = data.features$id) ;head(data.plot)
if (cluster.idents) {
mat <- do.call(what = rbind, args = lapply(X = data.plot,
FUN = unlist));mat
mat <- scale(x = mat);mat
id.levels <- id.levels[hclust(d = dist(x = mat))$order]
}
head(data.plot);head(data.use)
data.plot <- lapply(X = names(x = data.plot), FUN = function(x) {
data.use <- as.data.frame(x = data.plot[[x]])
data.use$features.plot <- rownames(x = data.use)
data.use$id <- x
return(data.use)
});head(data.plot)
data.plot <- do.call(what = "rbind", args = data.plot);data.plot
if ( !is.null(x = id.levels) ) {
data.plot$id <- factor(x = data.plot$id, levels = id.levels)
}
ngroup <- length(x = levels(x = data.plot$id));ngroup
if (ngroup == 1) {
scale <- FALSE
warning("Only one identity present, the expression values will be not scaled",
call. = FALSE, immediate. = TRUE)
}else if (ngroup < 5 & scale) {
warning("Scaling data with a low number of groups may produce misleading results",
call. = FALSE, immediate. = TRUE)
}
head(data.plot)
avg.exp.scaled <- sapply(X = unique(x = data.plot$features.plot),
FUN = function(x) {
data.use <- data.plot[data.plot$features.plot ==
x, "avg.exp"]
if (scale) {
data.use <- scale(x = log1p(data.use))
data.use <- MinMax(data = data.use, min = col.min,
max = col.max)
}
else {
data.use <- log1p(x = data.use)
}
return(data.use)
});avg.exp.scaled
avg.exp.scaled <- as.vector(x = t(x = avg.exp.scaled))
if (split.colors) {
avg.exp.scaled <- as.numeric(x = cut(x = avg.exp.scaled,
breaks = 20))
}
data.plot$avg.exp.scaled <- avg.exp.scaled
data.plot$features.plot <- factor(x = data.plot$features.plot,
levels = features);data.plot
data.plot$pct.exp[data.plot$pct.exp < dot.min] <- NA
data.plot$pct.exp <- data.plot$pct.exp * 100 ;data.plot
############后面就是详细的画图步骤,暂时可以不看=======
if (split.colors) {
splits.use <- unlist(x = lapply(X = data.plot$id, FUN = function(x) sub(paste0(".*_(",
paste(sort(unique(x = splits), decreasing = TRUE),
collapse = "|"), ")$"), "\\1", x)))
data.plot$colors <- mapply(FUN = function(color, value) {
return(colorRampPalette(colors = c("grey", color))(20)[value])
}, color = cols[splits.use], value = avg.exp.scaled)
}
color.by <- ifelse(test = split.colors, yes = "colors",
no = "avg.exp.scaled");color.by
if (!is.na(x = scale.min)) {
data.plot[data.plot$pct.exp < scale.min, "pct.exp"] <- scale.min
}
if (!is.na(x = scale.max)) {
data.plot[data.plot$pct.exp > scale.max, "pct.exp"] <- scale.max
}
if (!is.null(x = feature.groups)) {
data.plot$feature.groups <- factor(x = feature.groups[data.plot$features.plot],
levels = unique(x = feature.groups))
}
plot <-ggplot2:: ggplot(data = data.plot, mapping = aes_string(x = "features.plot",
y = "id")) + geom_point(mapping = aes_string(size = "pct.exp",
color = color.by)) + scale.func(range = c(0, dot.scale),
limits = c(scale.min, scale.max)) + theme(axis.title.x = element_blank(),
axis.title.y = element_blank()) + guides(size = guide_legend(title = "Percent Expressed")) +
labs(x = "Features", y = ifelse(test = is.null(x = split.by),
yes = "Identity", no = "Split Identity")) + cowplot::theme_cowplot()
if ( !is.null(x = feature.groups) ) {
plot <- plot + facet_grid(facets = ~feature.groups,
scales = "free_x", space = "free_x", switch = "y") +
theme(panel.spacing = unit(x = 1, units = "lines"),
strip.background = element_blank())
}
if (split.colors) {
plot <- plot + scale_color_identity()
}
else if (length(x = cols) == 1) {
plot <- plot + scale_color_distiller(palette = cols)
}
else {
plot <- plot + scale_color_gradient(low = cols[1], high = cols[2])
}
if (!split.colors) {
plot <- plot + guides(color = guide_colorbar(title = "Average Expression"))
}
return(plot)
}
}