使用SCENIC的adj.tsv结果在R中绘制基因调控网络

本质上是基于GENIE3/GRNBoost绘制的调控网络,也就是根据基因共表达判断的调控网络,这种调控就不一定是点对点的调控了,一个箭头中间可能有多种组分发挥作用,与利用cis-database进行筛选过的内容不一样,那个就是一对一调控,TF对motif的调控。
看到SCENIC的结果其实很想看看网络是怎么样的,就绘制了个调控网络,但是在阈值设计上非常不知所措,节点选择上也不知所措,其中有很大的调整空间,但是总归是能画一个,加了一些基因的注释,可以在网络图里呈现出来。

library(dplyr)
library(ggraph)
library(tidygraph)
library(igraph)
library(stringr)
library(ggtext) 

# ========================= 参数设置 ========================= #
conserved_interest_gene <- read_lines("Conserved_genes_interested.txt")
cut_importance_values <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)  # 多个 importance 阈值
node_count_threshold <- 1  # 连通分量节点数过滤阈值
grn <- read.table("adj.sample.tsv", sep = '\t', header = TRUE, stringsAsFactors = FALSE)
top_TF <- readLines("noto_TF.txt")
# ========================= 函数定义 ========================= #
# 1. 找到调控 conserved_gene 最多的 TF
find_tf_with_most_unregulated_conserved_genes <- function(df, unregulated_genes, selected_tfs) {
  tf_counts <- sapply(unique(df$TF), function(tf) {
    tf_genes <- df$target[df$TF == tf]
    regulated <- sum(unregulated_genes %in% tf_genes)
    return(regulated)
  })
  tf_counts <- tf_counts[!names(tf_counts) %in% selected_tfs]  # 排除已选择的 TF
  
  if (length(tf_counts) == 0 || all(tf_counts == 0)) {
    return(NULL)  # 没有合适的 TF
  }
  
  tf_with_max <- names(which.max(tf_counts))
  return(tf_with_max)
}

# ========================= 主程序循环 ========================= #
for (cut_importance in cut_importance_values) {
  # --------------------- 数据准备 --------------------- #

  # 筛选 importance 大于 5 且 target 属于 conserved_interest_gene
  grn_filtered <- grn %>%
    filter(importance > cut_importance) %>%
    filter(target %in% c(markers_noto$...1[markers_noto$p_val_adj<0.0001],conserved_interest_gene, top_TF) | TF %in% c(markers_noto$...1[markers_noto$p_val_adj<0.0001],conserved_interest_gene, top_TF))
  
  # # 确保每组至少保留 5 条记录
  # grn_filtered <- grn_filtered %>%
  #   group_by(target) %>%
  #   mutate(group_size = n()) %>%                               # 计算每个组的大小
  #   arrange(desc(importance), .by_group = TRUE) %>%
  #   slice(1:max(5, group_size)) %>%                            # 至少保留 5 条记录
  #   ungroup() %>%
  #   select(-group_size) %>%                                    # 移除临时列 group_size
  #   arrange(target, desc(importance))                         # 按 target 和 importance 排序
  
  # 初始化变量
  selected_tfs <- c()  # 用于记录选择的 TF
  regulated_genes <- character()  # 已被调控的基因
  conserved_genes_set <- setdiff(conserved_interest_gene, regulated_genes)  # 初始未被调控的基因
  
  # --------------------- TF 迭代选择 --------------------- #
  while (length(conserved_genes_set) > 0) {
    tf_to_regulate <- find_tf_with_most_unregulated_conserved_genes(grn_filtered, 
                                                                    conserved_genes_set, 
                                                                    selected_tfs)
    if (is.null(tf_to_regulate)) {
      cat("No more TFs can regulate the remaining conserved genes.\n")
      break
    }
    
    selected_tfs <- c(selected_tfs, tf_to_regulate)
    regulated_genes <- c(regulated_genes, grn_filtered$target[grn_filtered$TF == tf_to_regulate])
    regulated_genes <- unique(regulated_genes[regulated_genes %in% conserved_interest_gene])
    conserved_genes_set <- setdiff(conserved_interest_gene, regulated_genes)
  }
  selected_tfs_total <- c(selected_tfs_total,selected_tfs)
  # 输出选择的 TF 和调控结果
  cat("\nResults for Cut Importance:", cut_importance, "\n")
  cat("Selected TFs:", selected_tfs, "\n")
  cat("Total conserved genes regulated:", length(regulated_genes), "\n")
  
  # --------------------- 构建网络图 --------------------- #
  graph <- as_tbl_graph(grn_filtered, directed = TRUE)
  
  # 识别 top_TF 和 conserved_TF
  top_TF <- readLines("noto_TF.txt")
  conserved_TF <- conserved_interest_gene[conserved_interest_gene %in% grn$TF]
  
  # 节点属性预处理
  graph <- graph %>% 
    activate(nodes) %>% 
    mutate(
      related_to_noto = ifelse(name %in% top_TF, "noto_TF", "others"),
      TF_type = case_when(
        name %in% conserved_TF ~ "conserved_TF",
        name %in% conserved_interest_gene ~ "conserved_genes",
        name %in% selected_tfs ~ "select_TF",
        TRUE ~ "TF"
      )
    )
  
  # 过滤小连通分量
  graph <- graph %>% 
    activate(nodes) %>% 
    mutate(component = group_components()) %>%
    group_by(component) %>% 
    filter(n() > node_count_threshold) %>%  # 连通分量节点数过滤
    ungroup() %>% 
    select(-component)
  
  # --------------------- 绘制网络图 --------------------- #
  # 统计节点总数
  node_count <- graph %>% activate(nodes) %>% as_tibble() %>% nrow()
  
  # 生成 PDF 文件名
  pdf_filename <- paste0("network_markers_cut_", cut_importance, "_nodes_", node_count, ".pdf")
  pdf(pdf_filename, width = 14, height = 6)
  
  # 绘制网络图
  ggraph(graph, layout = "fr") +
    geom_edge_link(aes(width = importance, color = importance), 
                   alpha = 0.8, 
                   arrow = arrow(length = unit(2, 'mm'), type = "closed"), 
                   end_cap = circle(2, 'mm')) +
    scale_edge_color_gradient(low = "blue", high = "red") +
    scale_edge_width(range = c(0.5, 2)) +
    geom_node_point(aes(color = related_to_noto, fill = TF_type), 
                    shape = 21, size = 5, stroke = 1.5) +
    scale_color_manual(values = c("noto_TF" = "red", "others" = "black")) +
    scale_fill_manual(values = c("conserved_TF" = "orange", 
                                 "conserved_genes" = "yellow", 
                                 "select_TF" = "#A9D179", 
                                 "TF" = "lightblue")) +
    
    # 添加节点标签,selected_tfs 加粗并变红
    geom_node_text(aes(label = name, fontface = ifelse(name %in% selected_tfs, "bold", "plain")),
                   color = ifelse(graph %>% activate(nodes) %>% pull(name) %in% selected_tfs, "red", "black"), # 手动设置颜色
                   size = 3, 
                   repel = TRUE, 
                   max.overlaps = 100) +
    
    theme_void() +
    ggtitle(paste0("SCENIC Regulatory Network (Cut Importance > ", 
                   cut_importance, 
                   ", selected_tfs: ", 
                   paste(selected_tfs, collapse = ", "), 
                   " ;cut nodes > ", 
                   node_count_threshold, ")"))
  
   # 关闭 PDF 输出
  dev.off()
  cat("Network plot saved to:", pdf_filename, "\n")
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值