python之---递归选择文件并生成新文件保持目录结构
python递归处理文件,保持被处理文件夹内的目录结构,生成处理结构的时候保持相同的目录结构
import json
import osdef run(data_dir, res_dir, clean_dir):for root, dirs, files in os.walk(data_dir):# 计算当前目录相对于源目录的相对路径rel_path = os.path.relpath(root, data_dir)# 在目标目录创建对应子目录res_subdir = os.path.join(res_dir, rel_path)os.makedirs(res_subdir, exist_ok=True) # 自动创建不存在的目录clean_subdir = os.path.join(clean_dir, rel_path)os.makedirs(clean_subdir, exist_ok=True) # 自动创建不存在的目录# 处理当前目录下的JSON文件for file in files:if file.lower().endswith('.json'):src_path = os.path.join(root, file)res_path = os.path.join(res_subdir, file)clean_path = os.path.join(clean_subdir, file)try:# 读取JSON数据with open(src_path, 'r', encoding='utf-8') as f:res = json.load(f) # 解析JSON内容[3,9](@ref)# 使用自定义函数处理数据new_res = []clean_res = []for i in res:i.pop('content')i.pop('translate_time')translate_content = i.pop('translate_content')num_one = translate_content.count(' .')num_two = translate_content.count(' ...')num_thr = translate_content.count(' _')num_fou = translate_content.count(' |')all_num = (num_one - num_two) * 2 + num_two * 4 + num_thr * 2 + num_fou * 3if len(translate_content) <= 0:i['translate_content'] = translate_contentclean_res.append(i)continueprint(all_num)print(len(translate_content))print(all_num / len(translate_content))if all_num / len(translate_content) > 0.6 or len(translate_content.split()) < 500:i['translate_content'] = translate_contentclean_res.append(i)continuetranslate_content = translate_content.replace(' ...', '').replace(' .', '').replace(' _','').replace(' |','')i['translate_content'] = translate_contentnew_res.append(i)with open(res_path, 'w', encoding='utf-8') as f:json.dump(new_res, f, ensure_ascii=False, indent=4)if clean_res:with open(clean_path, 'w', encoding='utf-8') as f:json.dump(clean_res, f, ensure_ascii=False, indent=4)except Exception as e:print(f"处理文件失败 {src_path}: {str(e)}")if __name__ == '__main__':# 数据源data_path = 'data'# 处理后数据res_path = 'res'# 清理掉的数据clean_path = 'clean'run(data_path, res_path, clean_path)