python chardet文本编码识别代码
python chardet文本编码识别代码
import chardetdef guess_text_from_bytes( bytes):# 尝试常见编码encodings = ['utf-8', 'ascii', 'gbk', 'utf-16le', 'utf-16be']for enc in encodings:try:text = data.decode(enc)if text.isprintable() or '\n' in text or len(text.strip()) > 5:print(f"[可能] {enc.upper()}: {repr(text)}")except:pass# 使用 chardet 自动检测detected = chardet.detect(data)encoding = detected['encoding']confidence = detected['confidence']if confidence > 0.7:try:text = data.decode(encoding)print(f"[检测] {encoding} (置信度: {confidence:.2f}): {repr(text)}")except:pass