当前位置：首页 > news >正文

Python实例题：文件内容搜索工具

news 来源：原创 2025/6/28 18:36:16

Python实例题

题目

文件内容搜索工具

要求：

实现一个命令行工具，用于在指定目录下搜索包含特定文本的文件。
支持以下功能：
- 递归搜索子目录
- 区分大小写 / 不区分大小写搜索
- 搜索特定文件类型（如 .txt、.py）
- 显示匹配行号和上下文
- 统计匹配文件数和匹配行数
添加进度显示和结果高亮功能。

解题思路：

使用 os.walk 递归遍历目录。
通过正则表达式实现文本匹配。
利用 ANSI 转义码实现终端文本高亮。

代码实现：

import os
import re
import argparse
from pathlib import Path
from termcolor import colored
import progressbardef search_file(file_path, search_text, case_sensitive=True, context_lines=0):"""在单个文件中搜索文本，返回匹配结果"""matches = []try:with open(file_path, 'r', encoding='utf-8') as f:lines = f.readlines()for line_num, line in enumerate(lines, 1):if case_sensitive:if search_text in line:matches.append((line_num, line.strip()))else:if search_text.lower() in line.lower():matches.append((line_num, line.strip()))# 添加上下文if context_lines > 0 and matches:result_with_context = []for line_num, line in matches:start = max(1, line_num - context_lines)end = min(len(lines), line_num + context_lines)context = []for i in range(start, end + 1):context_line = lines[i-1].strip()is_match = i == line_numcontext.append((i, context_line, is_match))result_with_context.append(context)return result_with_contextelse:return [(m[0], [m]) for m in matches]except (UnicodeDecodeError, PermissionError) as e:print(f"无法读取文件 {file_path}: {e}")return []def search_directory(root_dir, search_text, file_extensions=None, case_sensitive=True, recursive=True, context_lines=0, progress_callback=None):"""在目录中搜索包含特定文本的文件"""total_matches = []file_count = 0match_count = 0# 准备文件扩展名筛选器if file_extensions:file_extensions = [ext.lower() if ext.startswith('.') else f'.{ext}' for ext in file_extensions]# 遍历目录for root, _, files in os.walk(root_dir):for filename in files:if file_extensions and not any(filename.lower().endswith(ext) for ext in file_extensions):continuefile_path = os.path.join(root, filename)matches = search_file(file_path, search_text, case_sensitive, context_lines)if matches:total_matches.append((file_path, matches))match_count += len(matches)file_count += 1if progress_callback:progress_callback()if not recursive:breakreturn total_matches, file_count, match_countdef highlight_text(text, search_text, case_sensitive=True):"""高亮显示匹配的文本"""if not case_sensitive:pattern = re.compile(re.escape(search_text), re.IGNORECASE)else:pattern = re.compile(re.escape(search_text))return pattern.sub(colored(r'\g<0>', 'red', attrs=['bold']), text)def main():parser = argparse.ArgumentParser(description='文件内容搜索工具')parser.add_argument('directory', help='搜索目录')parser.add_argument('search_text', help='要搜索的文本')parser.add_argument('-e', '--extensions', nargs='+', help='限制搜索的文件扩展名，例如: txt py')parser.add_argument('-i', '--ignore-case', action='store_true', help='忽略大小写')parser.add_argument('-r', '--recursive', action='store_true', help='递归搜索子目录')parser.add_argument('-c', '--context', type=int, default=0, help='显示匹配行的上下文行数')args = parser.parse_args()# 验证目录是否存在if not os.path.isdir(args.directory):print(f"错误: 目录 '{args.directory}' 不存在")return# 计算总文件数用于进度显示total_files = 0for root, _, files in os.walk(args.directory):if args.extensions:files = [f for f in files if any(f.lower().endswith(ext) for ext in args.extensions)]total_files += len(files)if not args.recursive:breakprint(f"在目录 '{args.directory}' 中搜索 '{args.search_text}'...")print(f"总文件数: {total_files}")# 创建进度条widgets = ['搜索进度: ', progressbar.Percentage(),' ', progressbar.Bar(marker='█', fill='-'),' ', progressbar.ETA()]bar = progressbar.ProgressBar(max_value=total_files, widgets=widgets).start()progress_counter = [0]  # 使用列表以便在闭包中修改def update_progress():progress_counter[0] += 1bar.update(progress_counter[0])# 执行搜索matches, file_count, match_count = search_directory(args.directory, args.search_text, args.extensions, not args.ignore_case, args.recursive, args.context,update_progress)bar.finish()# 显示结果print(f"\n搜索完成！")print(f"匹配文件数: {file_count}")print(f"匹配行数: {match_count}")if matches:print("\n匹配结果:")for file_path, file_matches in matches:print(f"\n{colored(file_path, 'blue', attrs=['underline'])}")for context in file_matches:for line_num, line_text, is_match in context:prefix = colored(f"{line_num:>5} | ", 'cyan')if is_match:line_text = highlight_text(line_text, args.search_text, not args.ignore_case)print(prefix + colored(line_text, 'yellow'))else:print(prefix + line_text)else:print("\n没有找到匹配的内容。")if __name__ == "__main__":main()