RNA-seq分析之TMB分析(TCGA版)
如题,TCGA版的TMB分析,非TCGA版的还在摸索
# 00 首次运行:装包 + 加载 -------------------------------------------------
if (!requireNamespace("BiocManager", quietly = TRUE))install.packages("BiocManager")
if (!"maftools" %in% rownames(installed.packages()))BiocManager::install("maftools")
library(maftools)
library(dplyr)
library(ggplot2)
library(scales) # 给 Y 轴加千位分隔符# 01 工作目录(自行改)------------------------------------------------------
work.dir <- "G:/ZNF503/RNA-seq/TMB分析/GDC_COAD"
dir.create(work.dir, recursive = TRUE, showWarnings = FALSE)
setwd(work.dir)# 02 读取 TCGA *.gz MAF -----------------------------------------------------
maf.files <- list.files(pattern = "\\.gz$", recursive = TRUE)
all.mut <- data.frame()
for (file in maf.files) {tmp <- read.delim(file, skip = 7, header = TRUE, fill = TRUE, sep = "\t",stringsAsFactors = FALSE)all.mut <- rbind(all.mut, tmp)
}# 03 生成 MAF 对象 ----------------------------------------------------------
maf <- read.maf(all.mut) # 后续函数都能直接吃它# 04 构建基因×样本 0/1 突变矩阵 -------------------------------------------
mut.dt <- maf@data %>%select(Hugo_Symbol, Variant_Classification, Tumor_Sample_Barcode) %>%mutate(Tumor_Sample_Barcode = substring(Tumor_Sample_Barcode, 1, 12))gene.vec <- unique(mut.dt$Hugo_Symbol)
sam.vec <- unique(mut.dt$Tumor_Sample_Barcode)
mat.01 <- matrix(0,nrow = length(gene.vec),ncol = length(sam.vec),dimnames = list(gene.vec, sam.vec))
for (i in seq_len(nrow(mut.dt))) {mat.01[mut.dt$Hugo_Symbol[i], mut.dt$Tumor_Sample_Barcode[i]] <- 1
}# 05 统计每个基因突变次数并保存 --------------------------------------------
gene.count <- data.frame(gene = rownames(mat.01),count = rowSums(mat.01)) %>%arrange(desc(count))
topN <- 20 # 想画前多少就改这里
gene.top <- gene.count$gene[1:topN]
save(mat.01, gene.count, file = "TMB.rda")
write.csv(mat.01, "all_mut_01_matrix.csv", quote = FALSE)# 06 瀑布图 ----------------------------------------------------------------
oncoplot(maf = maf,top = 30,fontSize = 0.6,showTumorSampleBarcodes = FALSE)# 07 计算 TMB(每 Mb 突变数)-----------------------------------------------
tmb.table <- tmb(maf = maf, logScale = FALSE) # 列名:total_perMB
write.csv(tmb.table, "tmb_results.csv", quote = FALSE)# 08 可选:突变互作 / VAF ---------------------------------------------------
somaticInteractions(maf = maf, top = 25, pvalue = c(0.05, 0.1))
maf.copy <- maf
maf.copy@data$TumorVAF <- with(maf.copy@data, t_alt_count / t_depth)
plotVaf(maf = maf.copy, vafCol = "TumorVAF")
tcgaCompare(maf = maf, cohortName = "COAD-maf",logscale = TRUE, capture_size = 50)# 09 读取 ZNF503 分组信息 --------------------------------------------------
group.df <- read.csv("G:/ZNF503/RNA-seq/data/group.csv",header = TRUE, row.names = 1)
group.df$sample <- rownames(group.df)# 统一 barcode:保留前 4 段,去掉结尾 -------------------------------
trimBarcode <- function(x) {sapply(strsplit(as.character(x), "-"),function(v) paste(v[1:4], collapse = "-"))
}
tmb.table$Tumor_Sample_Barcode <- trimBarcode(tmb.table$Tumor_Sample_Barcode)
group.df$sample <- trimBarcode(group.df$sample)# 合并分组
tmb.merge <- merge(tmb.table, group.df,by.x = "Tumor_Sample_Barcode",by.y = "sample", all.x = TRUE)# 10 按 ZNF503 高低比较 TMB -----------------------------------------------
tmb.clean <- tmb.merge %>%filter(!is.na(group),!is.na(total_perMB)) # 注意列名:total_perMB
wilcox.res <- wilcox.test(total_perMB ~ group, data = tmb.clean)
p.val <- wilcox.res$p.value
cat(">>> Wilcoxon P =", format.pval(p.val, digits = 3), "\n")# 11 箱线图 + 保存 ---------------------------------------------------------
ggplot(tmb.clean,aes(x = group, y = total_perMB, color = group)) +geom_boxplot(alpha = 0.9, width = 0.5, outlier.shape = NA) +geom_jitter(width = 0.2, alpha = 0.6) +scale_y_continuous(labels = comma) +labs(title = "Tumor Mutational Burden by ZNF503 Status",subtitle = paste("Wilcoxon P =", format.pval(p.val, digits = 3)),x = NULL, y = "TMB (mutations / Mb)") +theme_classic(base_size = 14) +theme(legend.position = "none",plot.title = element_text(hjust = 0.5, face = "bold"),plot.subtitle = element_text(hjust = 0.5))ggsave("TMB_ZNF503_boxplot.pdf", width = 6, height = 5, device = cairo_pdf)
ggsave("TMB_ZNF503_boxplot.png", width = 6, height = 5, dpi = 600)message(">>> 全流程结束!结果已保存至:", getwd())