相似图像处理程序
工作中遇到很多相似的照片,但是我们想要保留一份。所以写了如下这个python程序来实现这个功能。
1 导入必要的包
import os
import shutil
from datetime import datetime
from collections import defaultdict
from pathlib import Path
from PIL import Image
import imagehash
from tqdm import tqdm
2 核心代码
path = r"E:\...\图片获取\xxx" # 要扫描的文件夹
THRESHOLD = 12 # 汉明距离 ≤ THRESHOLD 视为相似
MOVE_FOLDER = True
IMG_EXTS = {'.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff', '.webp'}
def all_images(root):for p in Path(root).rglob('*'):if p.suffix.lower() in IMG_EXTS:yield str(p)def hash_image(path):try:with Image.open(path) as img:return imagehash.phash(img)except Exception as e:print(f"[WARN] 无法读取 {path}: {e}")return Nonedef group_by_similarity(paths, threshold):hashes = {}for p in tqdm(paths, desc="计算哈希"):h = hash_image(p)if h is not None:hashes[p] = h# 将 ndarray 转成真正的 intitems = [(int.from_bytes(h.hash.astype('uint8').tobytes(), 'big'), p)for p, h in hashes.items()]items.sort(key=lambda t: t[0])groups = []used = set()n = len(items)for i in range(n):if i in used:continuegrp = [items[i][1]]for j in range(i + 1, n):if j in used:continueham = bin(items[i][0] ^ items[j][0]).count('1')if ham <= threshold:grp.append(items[j][1])used.add(j)groups.append(grp)return groupsdef pick_best(group):best = max(group, key=lambda p: os.path.getsize(p))return best, [p for p in group if p != best]def move_duplicates(dup_list):if not dup_list:returndesktop = Path.home() / 'Desktop'recycle = desktop / f"duplicates_{datetime.now():%Y%m%d_%H%M%S}"recycle.mkdir(exist_ok=True)for p in dup_list:try:shutil.move(str(p), recycle / Path(p).name)except Exception as e:print(f"[ERROR] 移动失败 {p}: {e}")def main():paths = list(all_images(path))if not paths:print("未找到任何图片。")returngroups = group_by_similarity(paths, THRESHOLD)to_move = []for g in groups:keep, dups = pick_best(g)to_move.extend(dups)if dups:print(f"保留: {keep}")for d in dups:print(f" 移动: {d}")if to_move:print(f"\n共 {len(to_move)} 张相似图片将被移动。")if MOVE_FOLDER:move_duplicates(to_move)print("已移动到桌面的 duplicates_* 文件夹。")else:for d in to_move:os.remove(d)print("已直接删除。")else:print("未发现相似图片。")
if __name__ == "__main__":main()