Win文件批量格式转换为UTF8 chardet
Virtual Studio的文件编码真的是依托。
准备环境
自己先下载个python,如果以后常用,建议是用python3.11.9
下载完成后执行命令安装pip和chardet
# 1. 确保 pip 是最新版本
python.exe -m pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple# 2. 安装 chardet 库
python.exe -m pip install chardet -i https://pypi.tuna.tsinghua.edu.cn/simple
脚本
让AI润色了一下,目前自己用还可以。
就是如果你用unix的文件换行符,可能会有问题,注意\r\n。
import os
import chardet
from typing import Dict, List# 确认项目常用的文件扩展名
SUPPORTED_EXTENSIONS = ('.cpp', '.h', '.cs', '.shader', '.rc')def convert_file_to_utf8(file_path: str) -> str:"""Detects the encoding of a single file and converts it to UTF-8 if necessary,preserving original newline characters.Args:file_path: The path to the file.Returns:A string indicating the result: 'converted', 'skipped', 'error'."""try:with open(file_path, 'rb') as f:raw_data = f.read()if not raw_data:print(f"⚪ 文件为空,跳过: {file_path}")return 'skipped'result = chardet.detect(raw_data)encoding = result['encoding']confidence = result['confidence']if not encoding or confidence < 0.7:print(f"🟡 无法准确识别编码 (置信度: {confidence:.2f}),跳过: {file_path}")return 'skipped'if encoding.lower() in ('utf-8', 'ascii'):print(f"🟢 编码兼容 (UTF-8/ASCII),无需转换: {file_path}")return 'skipped'text = raw_data.decode(encoding, errors='replace')# --- 核心修改点 ---# -> 优化: newline='' 用于禁止写入时自动转换换行符,避免产生额外的换行with open(file_path, 'w', encoding='utf-8', newline='') as f_utf8:f_utf8.write(text)# --- 修改结束 ---print(f"✅ 转换成功: {file_path} (原编码: {encoding})")return 'converted'except Exception as e:print(f"❌ 处理文件时发生错误 {file_path}: {e}")return 'error'def convert_folder(folder_path: str) -> Dict[str, int]:"""Walks through a folder and converts all supported files to UTF-8.Args:folder_path: The root directory to start the conversion from.Returns:A dictionary containing statistics of the conversion process."""stats = {"processed": 0,"converted": 0,"skipped": 0,"error": 0}print(f"\n🚀 开始扫描文件夹: {folder_path}\n")for root, dirs, files in os.walk(folder_path):for filename in files:if filename.endswith(SUPPORTED_EXTENSIONS):stats["processed"] += 1file_path = os.path.join(root, filename)result = convert_file_to_utf8(file_path)if result in stats:stats[result] += 1return statsif __name__ == "__main__":folder = input("请输入要转换的文件夹路径:").strip('" ')if os.path.isdir(folder):summary = convert_folder(folder)print("\n" + "="*20 + " 转换报告 " + "="*20)if summary["processed"] == 0:print(f"在路径 '{folder}' 下未找到任何支持的文件 ({', '.join(SUPPORTED_EXTENSIONS)})。")else:print(f"共扫描文件: {summary['processed']} 个")print(f"成功转换: {summary['converted']} 个")print(f"跳过(兼容/空文件): {summary['skipped']} 个")print(f"转换失败: {summary['error']} 个")print("="*52)print("\n🎉 所有转换完成!")else:print("❌ 提供的路径无效,请检查后再试。")