R语言对excel中多个sheet子表批量进行地理探测器计算
## ================= 基本设置 =================
## 1) 设定你的工作目录(保持你的原路径不变)
setwd("D:/*****/*****/******")## 2) 文件名(与xlsx实际名字保持一致)
xlsx_file <- "驱动因素(中低收入).xlsx"## ================= 依赖包 =================
need_pkgs <- c("readxl", "GD")
to_install <- need_pkgs[!need_pkgs %in% installed.packages()[, "Package"]]
if (length(to_install) > 0) install.packages(to_install)
library(readxl)
library(GD)## ================= 配置参数 =================
discmethod <- c("equal","natural","quantile","geometric","sd") # 离散化方法
discitv <- 3:6 # 分成5~10类
x_vars <- paste0("X", 1:18) # X1~X18
y_var <- "Y"## 你给的7个子表名(与excel中的工作表名保持一致)
target_sheets <- c("1","2","3","4","5","6","7")## ================= 辅助函数 =================
sanitize_filename <- function(x) {# 用于把工作表名安全地转为文件名x <- gsub("[\\\\/:*?\"<>|]", "_", x)x <- gsub("\\s+", "_", x)enc2utf8(x)
}coerce_numeric <- function(df, cols) {# 强制把指定列转为数值(如果有字符/因子)for (cc in cols) {if (cc %in% names(df)) {# 保留纯数字或可转的,其他变 NAdf[[cc]] <- suppressWarnings(as.numeric(df[[cc]]))}}df
}## ================= 主流程:逐表批量运算 =================
# 读取excel文件的所有表名,校验是否包含目标表
all_sheets <- readxl::excel_sheets(xlsx_file)
missing_sheets <- setdiff(target_sheets, all_sheets)
if (length(missing_sheets) > 0) {warning(sprintf("以下工作表在 %s 中未找到:%s",xlsx_file, paste(missing_sheets, collapse = ", ")))
}
sheets_to_run <- intersect(target_sheets, all_sheets)if (length(sheets_to_run) == 0) {stop("未找到可运行的工作表,请检查 xlsx 文件及表名。")
}for (sh in sheets_to_run) {cat(sprintf(">>> 正在处理工作表:%s ...\n", sh))# 读取当前工作表df <- readxl::read_excel(path = xlsx_file, sheet = sh)df <- as.data.frame(df, stringsAsFactors = FALSE)# 检查必要列need_cols <- c(y_var, x_vars)lost_cols <- setdiff(need_cols, names(df))if (length(lost_cols) > 0) {warning(sprintf("工作表 %s 缺少列:%s,跳过该表。", sh, paste(lost_cols, collapse = ", ")))next}# 类型转换:确保 Y 与 X1~X18 为数值df <- coerce_numeric(df, need_cols)# 去除在 Y 或任一 X 上有缺失的行keep <- complete.cases(df[, need_cols])na_drop_n <- sum(!keep)if (na_drop_n > 0) {message(sprintf("工作表 %s:剔除含缺失的行 %d 行。", sh, na_drop_n))}df_clean <- df[keep, , drop = FALSE]# 构建公式fm <- as.formula(paste0(y_var, " ~ ", paste(x_vars, collapse = " + ")))# 运行地理探测器 gdmdatagdm <- GD::gdm(formula = fm,continuous_variable = x_vars, # 都是连续变量data = df_clean,discmethod = discmethod,discitv = discitv)# 将结果保存到 txtout_name <- paste0("GDM结果_", sanitize_filename(sh), ".txt")out_path <- file.path(getwd(), out_name)res_txt <- capture.output({cat("文件:", xlsx_file, "\n", sep = "")cat("工作表:", sh, "\n", sep = "")cat("公式:", deparse(fm), "\n", sep = "")cat("离散化方法:", paste(discmethod, collapse = ", "), "\n", sep = "")cat("分组数:", paste(discitv, collapse = ", "), "\n\n", sep = "")print(datagdm)})writeLines(res_txt, out_path, useBytes = TRUE)cat(sprintf("保存结果:%s\n", out_path))## 可选:如需同时保存每个表的图,取消以下注释# png_name <- paste0("GDM图_", sanitize_filename(sh), ".png")# png(file.path(getwd(), png_name), width = 1000, height = 800, res = 120)# plot(datagdm)# dev.off()# cat(sprintf("保存图形:%s\n", file.path(getwd(), png_name)))
}cat("=== 全部完成 ===\n")
假设excel文件中有7个子表,每个子表都要进行一遍最优地理探测器运算,之前的做法是分别把子表另存为新文件,然后一个一个跑,此代码实现了一次性批量计算子表的相关指标,并保存为txt文件。此代码假设所有变量都是连续变量,如果有类别变量,可结合AI进一步调整修改