本质上是基于GENIE3/GRNBoost绘制的调控网络,也就是根据基因共表达判断的调控网络,这种调控就不一定是点对点的调控了,一个箭头中间可能有多种组分发挥作用,与利用cis-database进行筛选过的内容不一样,那个就是一对一调控,TF对motif的调控。
看到SCENIC的结果其实很想看看网络是怎么样的,就绘制了个调控网络,但是在阈值设计上非常不知所措,节点选择上也不知所措,其中有很大的调整空间,但是总归是能画一个,加了一些基因的注释,可以在网络图里呈现出来。
library(dplyr)
library(ggraph)
library(tidygraph)
library(igraph)
library(stringr)
library(ggtext)
# ========================= 参数设置 ========================= #
conserved_interest_gene <- read_lines("Conserved_genes_interested.txt")
cut_importance_values <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) # 多个 importance 阈值
node_count_threshold <- 1 # 连通分量节点数过滤阈值
grn <- read.table("adj.sample.tsv", sep = '\t', header = TRUE, stringsAsFactors = FALSE)
top_TF <- readLines("noto_TF.txt")
# ========================= 函数定义 ========================= #
# 1. 找到调控 conserved_gene 最多的 TF
find_tf_with_most_unregulated_conserved_genes <- function(df, unregulated_genes, selected_tfs) {
tf_counts <- sapply(unique(df$TF), function(tf) {
tf_genes <- df$target[df$TF == tf]
regulated <- sum(unregulated_genes %in% tf_genes)
return(regulated)
})
tf_counts <- tf_counts[!names(tf_counts) %in% selected_tfs] # 排除已选择的 TF
if (length(tf_counts) == 0 || all(tf_counts == 0)) {
return(NULL) # 没有合适的 TF
}
tf_with_max <- names(which.max(tf_counts))
return(tf_with_max)
}
# ========================= 主程序循环 ========================= #
for (cut_importance in cut_importance_values) {
# --------------------- 数据准备 --------------------- #
# 筛选 importance 大于 5 且 target 属于 conserved_interest_gene
grn_filtered <- grn %>%
filter(importance > cut_importance) %>%
filter(target %in% c(markers_noto$...1[markers_noto$p_val_adj<0.0001],conserved_interest_gene, top_TF) | TF %in% c(markers_noto$...1[markers_noto$p_val_adj<0.0001],conserved_interest_gene, top_TF))
# # 确保每组至少保留 5 条记录
# grn_filtered <- grn_filtered %>%
# group_by(target) %>%
# mutate(group_size = n()) %>% # 计算每个组的大小
# arrange(desc(importance), .by_group = TRUE) %>%
# slice(1:max(5, group_size)) %>% # 至少保留 5 条记录
# ungroup() %>%
# select(-group_size) %>% # 移除临时列 group_size
# arrange(target, desc(importance)) # 按 target 和 importance 排序
# 初始化变量
selected_tfs <- c() # 用于记录选择的 TF
regulated_genes <- character() # 已被调控的基因
conserved_genes_set <- setdiff(conserved_interest_gene, regulated_genes) # 初始未被调控的基因
# --------------------- TF 迭代选择 --------------------- #
while (length(conserved_genes_set) > 0) {
tf_to_regulate <- find_tf_with_most_unregulated_conserved_genes(grn_filtered,
conserved_genes_set,
selected_tfs)
if (is.null(tf_to_regulate)) {
cat("No more TFs can regulate the remaining conserved genes.\n")
break
}
selected_tfs <- c(selected_tfs, tf_to_regulate)
regulated_genes <- c(regulated_genes, grn_filtered$target[grn_filtered$TF == tf_to_regulate])
regulated_genes <- unique(regulated_genes[regulated_genes %in% conserved_interest_gene])
conserved_genes_set <- setdiff(conserved_interest_gene, regulated_genes)
}
selected_tfs_total <- c(selected_tfs_total,selected_tfs)
# 输出选择的 TF 和调控结果
cat("\nResults for Cut Importance:", cut_importance, "\n")
cat("Selected TFs:", selected_tfs, "\n")
cat("Total conserved genes regulated:", length(regulated_genes), "\n")
# --------------------- 构建网络图 --------------------- #
graph <- as_tbl_graph(grn_filtered, directed = TRUE)
# 识别 top_TF 和 conserved_TF
top_TF <- readLines("noto_TF.txt")
conserved_TF <- conserved_interest_gene[conserved_interest_gene %in% grn$TF]
# 节点属性预处理
graph <- graph %>%
activate(nodes) %>%
mutate(
related_to_noto = ifelse(name %in% top_TF, "noto_TF", "others"),
TF_type = case_when(
name %in% conserved_TF ~ "conserved_TF",
name %in% conserved_interest_gene ~ "conserved_genes",
name %in% selected_tfs ~ "select_TF",
TRUE ~ "TF"
)
)
# 过滤小连通分量
graph <- graph %>%
activate(nodes) %>%
mutate(component = group_components()) %>%
group_by(component) %>%
filter(n() > node_count_threshold) %>% # 连通分量节点数过滤
ungroup() %>%
select(-component)
# --------------------- 绘制网络图 --------------------- #
# 统计节点总数
node_count <- graph %>% activate(nodes) %>% as_tibble() %>% nrow()
# 生成 PDF 文件名
pdf_filename <- paste0("network_markers_cut_", cut_importance, "_nodes_", node_count, ".pdf")
pdf(pdf_filename, width = 14, height = 6)
# 绘制网络图
ggraph(graph, layout = "fr") +
geom_edge_link(aes(width = importance, color = importance),
alpha = 0.8,
arrow = arrow(length = unit(2, 'mm'), type = "closed"),
end_cap = circle(2, 'mm')) +
scale_edge_color_gradient(low = "blue", high = "red") +
scale_edge_width(range = c(0.5, 2)) +
geom_node_point(aes(color = related_to_noto, fill = TF_type),
shape = 21, size = 5, stroke = 1.5) +
scale_color_manual(values = c("noto_TF" = "red", "others" = "black")) +
scale_fill_manual(values = c("conserved_TF" = "orange",
"conserved_genes" = "yellow",
"select_TF" = "#A9D179",
"TF" = "lightblue")) +
# 添加节点标签,selected_tfs 加粗并变红
geom_node_text(aes(label = name, fontface = ifelse(name %in% selected_tfs, "bold", "plain")),
color = ifelse(graph %>% activate(nodes) %>% pull(name) %in% selected_tfs, "red", "black"), # 手动设置颜色
size = 3,
repel = TRUE,
max.overlaps = 100) +
theme_void() +
ggtitle(paste0("SCENIC Regulatory Network (Cut Importance > ",
cut_importance,
", selected_tfs: ",
paste(selected_tfs, collapse = ", "),
" ;cut nodes > ",
node_count_threshold, ")"))
# 关闭 PDF 输出
dev.off()
cat("Network plot saved to:", pdf_filename, "\n")
}