代码复现-甜甜圈富集分析
代码复现-甜甜圈富集分析
本文复现了Medina 等人在 Nature (2020) 文章中,一种类似“甜甜圈”形状的富集分析图。简单来说,这份 R 代码把复杂的统计结果转化成一个直观、信息量大的可视化图:中心告诉你“总体上调/下调基因情况”,外围告诉你“具体哪些通路参与,以及它们的上调/下调基因数”。这种图特别适合做功能富集分析的展示,因为它既有整体概览,也有分支细节,能帮助读者快速理解基因表达变化与生物学功能之间的对应关系。
这种图表的直观特色是:中间一个大甜甜圈展示总体上调/下调基因的分布,外围一圈一圈的小甜甜圈代表不同的功能通路或类别,每个小圈再细分成红色(上调基因数)和蓝色(下调基因数)。为了让视觉更清晰,代码里设计了两道“车道”,让小甜甜圈交错排列,同时根据每个类别包含基因数量的多少调整了小圈大小,避免看起来都一样大。代码还特别设置了数字上下分开显示(红字在上、蓝字在下),并且用连线把外围的小甜甜圈和中心大甜甜圈关联起来,让人一眼能看出这些类别都是从整体中分解出来的。
Medina CB, Mehrotra P, Arandjelovic S, Perry JSA, Guo Y, Morioka S, Barron B, Walk SF, Ghesquière B, Krupnick AS, Lorenz U, Ravichandran KS. Metabolites released from apoptotic cells act as tissue messengers. Nature. 2020 Apr;580(7801):130-135. doi: 10.1038/s41586-020-2121-3.
# =========================================================
# 甜甜圈富集分析(按总数缩放小圈大小,面积≈∝ up+down)
# 依赖:tidyverse, ggforce, scales
# =========================================================
library(tidyverse)
library(ggforce)
library(scales)# -------------------------
# 参数集中管理
# -------------------------
cfg <- list(R_pos = 8.5, # 外圈整体半径lane_offset = 1.5, # 两道车道错开r_outer_base = 1.50, # 小圈外半径基准(仅做缩放幅度基准)inner_ratio = 0.70, # 小圈厚度比例:r_in = inner_ratio * r_outnum_offset = 0.25, # 圆内数字上下偏移(乘 r_out)center_r_out = 4.0, # 中心大环外半径center_inner_ratio = 0.7, # 中心大环厚度比例start_base = 0, # 扇区起点:从正上方开始col_up = "#E41A1C",col_down = "#1F4ED8",r_out_range = c(0.8, 1.6) # 小圈外半径相对范围(越大差异越明显)
)# -------------------------
# 数据(按需修改)
# -------------------------
dat <- tribble(~category, ~up, ~down,"Pro-inflammatory", 31, 91,"Anti-inflammatory", 68, 30,"OXPHOS", 12, 8,"Carbohydrate\nmetabolism", 53, 37,"DNA damage", 53, 35,"Glycosylation", 15, 5,"UPR/ER stress", 22, 19,"Anti-apoptotic", 79, 39,"Pro-apoptotic", 41, 82,"Oxidative\nstress", 29, 44,"Migration", 97, 186,"Actin rearrangement/\ncell motility", 45, 70,"Adherence", 25, 33,"Lipid/cholesterol/\nfatty acid metab.", 27, 56,"Vesicle transport", 47, 90,"Autophagy", 11, 62,"Cell size/\nvolume", 9, 21
)# -------------------------
# 小函数:由 up/down 生成两段扇区
# -------------------------
mk_arcs <- function(up, down, x0, y0, r_in, r_out, start_base = -pi/2) {tot <- up + downfrac_up <- if (tot == 0) 0 else up / tottibble(part = factor(c("Upregulated", "Downregulated"),levels = c("Upregulated", "Downregulated")),start = c(start_base, start_base + 2*pi*frac_up),end = c(start_base + 2*pi*frac_up, start_base + 2*pi),x0 = x0, y0 = y0, r0 = r_in, r = r_out)
}# -------------------------
# 角度/车道与尺寸定位(半径∝√total → 面积≈∝total)
# -------------------------
n <- nrow(dat)
angles <- seq(0, 2*pi, length.out = n + 1)[-(n + 1)] # 12点起顺时针
lane <- rep(c(1, -1), length.out = n) # 车道交替dat2 <- dat %>%mutate(angle = angles,lane = lane,total = up + down,# 半径缩放:把 √total 线性映射到 r_out_range,再乘基准r_out = cfg$r_outer_base * rescale(sqrt(total), to = cfg$r_out_range),r_in = r_out * cfg$inner_ratio,# 仅外推(更大者略外推,减小重叠)R_i = cfg$R_pos + cfg$lane_offset * lane + rescale(r_out, to = c(0, 0.35)),x0 = R_i * cos(angle),y0 = R_i * sin(angle),hjust_lab = if_else(cos(angle) >= 0, 0, 1),# 与中心环的连线起止点(随半径联动,避免穿透)link_r0 = cfg$center_r_out + 0.25,link_r1 = pmax(R_i - r_out - 0.25, cfg$center_r_out + 0.35),x_start = link_r0 * cos(angle), y_start = link_r0 * sin(angle),x_end = link_r1 * cos(angle), y_end = link_r1 * sin(angle))# 外圈全部小圈扇区
arcs <- pmap_dfr(dat2[, c("up", "down", "x0", "y0", "r_in", "r_out")],~ mk_arcs(..1, ..2, ..3, ..4, ..5, ..6, start_base = cfg$start_base)
) %>%mutate(id = rep(seq_len(n), each = 2)) %>%left_join(dat2 %>% mutate(id = row_number()) %>%select(id, category, angle, R_i, r_out),by = "id")# -------------------------
# 中心大甜甜圈(总 Up/Down)
# -------------------------
center_up <- sum(dat$up)
center_down <- sum(dat$down)
center_tot <- center_up + center_down
center_frac_up <- if (center_tot == 0) 0 else center_up / center_totcenter_ring <- tibble(part = factor(c("Upregulated", "Downregulated"),levels = c("Upregulated", "Downregulated")),start = c(cfg$start_base, cfg$start_base + 2*pi*center_frac_up),end = c(cfg$start_base + 2*pi*center_frac_up, cfg$start_base + 2*pi),x0 = 0, y0 = 0,r0 = cfg$center_inner_ratio * cfg$center_r_out,r = cfg$center_r_out
)# -------------------------
# 绘图
# -------------------------
p <- ggplot() +# 1) 直线连线(底层)geom_segment(data = dat2,aes(x = x_start, y = y_start, xend = x_end, yend = y_end),linewidth = 0.6, color = "grey70", lineend = "round", alpha = 0.95) +# 2) 中心大环geom_arc_bar(data = center_ring,aes(x0 = x0, y0 = y0, r0 = r0, r = r, start = start, end = end, fill = part),color = "white", linewidth = 0.6) +# 3) 外圈小环geom_arc_bar(data = arcs,aes(x0 = x0, y0 = y0, r0 = r0, r = r, start = start, end = end, fill = part),color = "white", linewidth = 0.6) +# 4) 类别标签(随半径外扩)geom_text(data = dat2,aes(x = 1.05 * (R_i + r_out) * cos(angle),y = 1.05 * (R_i + r_out) * sin(angle),hjust = hjust_lab, label = category),size = 3.6, lineheight = 0.95) +# 5) 圆内红/蓝数字(上下调)geom_text(data = dat2,aes(x = x0, y = y0 + cfg$num_offset * r_out, label = up),size = 3.8, fontface = "bold", color = cfg$col_up) +geom_text(data = dat2,aes(x = x0, y = y0 - cfg$num_offset * r_out, label = down),size = 3.8, fontface = "bold", color = cfg$col_down) +# 6) 中心标题与总数annotate("text", x = 0, y = 0.8, label = "Significant genes",fontface = "bold", size = 4.3) +annotate("text", x = 0, y = -0.5,label = paste0("Up = ", center_up, "\nDown = ", center_down),size = 4, lineheight = 1.05, color = "gray20") +coord_fixed(xlim = c(-10, 10), ylim = c(-10, 10), clip = "off") +scale_fill_manual(values = c("Upregulated" = cfg$col_up,"Downregulated" = cfg$col_down)) +guides(fill = guide_legend(title = NULL, override.aes = list(color = NA))) +theme_void(base_size = 12) +theme(legend.position = c(0.99, 0.99),plot.margin = margin(40, 40, 40, 40))p