Ubuntu 切换 SOCKS5代理 和 HTTP 代理并下载 Hugging Face 模型
0. 平常运行SOCKS5代理下载
import os
from sentence_transformers import SentenceTransformer# --------------------------------------------------
# 0. 设置代理
# --------------------------------------------------
os.environ['http_proxy'] = 'socks5h://127.0.0.1:1080'
os.environ['https_proxy'] = 'socks5h://127.0.0.1:1080'# --------------------------------------------------
# 3. 用 BGE-M3 编码(768 维)
# --------------------------------------------------
model = SentenceTransformer('BAAI/bge-base-zh-v1.5', device='cuda') # 显存小就 cpu
embeddings = model.encode(df['text'], show_progress_bar=True, normalize_embeddings=True)
1. 使用SOCKS5代理下载报错
import os
from huggingface_hub import snapshot_download# 设置 Trojan 代理 (socks5)
os.environ["HTTP_PROXY"] = "socks5://127.0.0.1:1080"
os.environ["HTTPS_PROXY"] = "socks5://127.0.0.1:1080"# Hugging Face 模型 ID
model_id = "unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit"# 本地存储路径
local_dir = "./models/deepseek-14b-bnb-4bit"print(f"开始下载模型: {model_id} ...")# 下载所有 safetensors 权重和配置文件
snapshot_download(repo_id=model_id,local_dir=local_dir,resume_download=True, # 断点续传local_dir_use_symlinks=False, # 避免软链接allow_patterns=["*.safetensors", "*.json", "*.py", "*.txt"]
)print(f"✅ 模型已下载完成,保存在: {local_dir}")
启动 Trojan 代理,
$ curl ifconfig.info
111.55.204.176
运行代码报错:
开始下载模型: unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit ...
^CTraceback (most recent call last):File "/home/xjg/workspace/AI全栈开发实战营/deepseek-quickstart/distill/python download_model.py", line 17, in <module>snapshot_download(~~~~~~~~~~~~~~~~~^repo_id=model_id,^^^^^^^^^^^^^^^^^...<3 lines>...allow_patterns=["*.safetensors", "*.json", "*.py", "*.txt"]^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^)^File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fnreturn fn(*args, **kwargs)File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/huggingface_hub/_snapshot_download.py", line 165, in snapshot_downloadrepo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision)File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fnreturn fn(*args, **kwargs)File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/huggingface_hub/hf_api.py", line 2853, in repo_inforeturn method(repo_id,...<4 lines>...files_metadata=files_metadata,)File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fnreturn fn(*args, **kwargs)File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/huggingface_hub/hf_api.py", line 2637, in model_infor = get_session().get(path, headers=headers, timeout=timeout, params=params)File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/requests/sessions.py", line 602, in getreturn self.request("GET", url, **kwargs)~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/requests/sessions.py", line 589, in requestresp = self.send(prep, **send_kwargs)File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/requests/sessions.py", line 703, in sendr = adapter.send(request, **kwargs)File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/huggingface_hub/utils/_http.py", line 96, in sendreturn super().send(request, *args, **kwargs)~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/requests/adapters.py", line 644, in sendresp = conn.urlopen(method=request.method,...<9 lines>...chunked=chunked,)File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/urllib3/connectionpool.py", line 787, in urlopenresponse = self._make_request(conn,...<10 lines>...**response_kw,)File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/urllib3/connectionpool.py", line 464, in _make_requestself._validate_conn(conn)~~~~~~~~~~~~~~~~~~~^^^^^^File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/urllib3/connectionpool.py", line 1093, in _validate_connconn.connect()~~~~~~~~~~~~^^File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/urllib3/connection.py", line 741, in connectsock_and_verified = _ssl_wrap_socket_and_match_hostname(sock=sock,...<14 lines>...assert_fingerprint=self.assert_fingerprint,)File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/urllib3/connection.py", line 920, in _ssl_wrap_socket_and_match_hostnamessl_sock = ssl_wrap_socket(sock=sock,...<8 lines>...tls_in_tls=tls_in_tls,)File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/urllib3/util/ssl_.py", line 480, in ssl_wrap_socketssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname)File "/home/xjg/.conda/envs/deepseek/lib/python3.13/site-packages/urllib3/util/ssl_.py", line 524, in _ssl_wrap_socket_implreturn ssl_context.wrap_socket(sock, server_hostname=server_hostname)~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^File "/home/xjg/.conda/envs/deepseek/lib/python3.13/ssl.py", line 455, in wrap_socketreturn self.sslsocket_class._create(~~~~~~~~~~~~~~~~~~~~~~~~~~~~^sock=sock,^^^^^^^^^^...<5 lines>...session=session^^^^^^^^^^^^^^^)^File "/home/xjg/.conda/envs/deepseek/lib/python3.13/ssl.py", line 1076, in _createself.do_handshake()~~~~~~~~~~~~~~~~~^^File "/home/xjg/.conda/envs/deepseek/lib/python3.13/ssl.py", line 1372, in do_handshakeself._sslobj.do_handshake()~~~~~~~~~~~~~~~~~~~~~~~~~^^
KeyboardInterrupt
Hugging Face 内部调用的 XetHub (CAS 服务) 这一层用 Rust 的 reqwest 库,它并不认 socks5h://,所以直接挂掉
本地跑的是 SOCKS5 代理(比如 127.0.0.1:1080),但是这些工具(比如 requests、huggingface_hub)只认 HTTP(S) 代理,这时候需要一个“转换器”把 SOCKS5 → HTTP
2. HTTP 代理
privoxy 可以把 SOCKS 转换成 HTTP 代理
2.1 安装
sudo apt install privoxy
2.2 修改配置
配置文件 /etc/privoxy/config 里加:
forward-socks5t / 127.0.0.1:1080 .
listen-address 127.0.0.1:8118
listen-address [::1]:8118
2.3 重启服务
sudo systemctl restart privox
2.4 查看端口
$ ss -tuln | grep 8118
tcp LISTEN 0 128 127.0.0.1:8118 0.0.0.0:*
tcp LISTEN 0 128 [::1]:8118 [::]:*
2.5 运行代码
import os
from huggingface_hub import snapshot_download# 设置 Trojan 代理 (HTTP)
os.environ["HTTP_PROXY"] = "http://127.0.0.1:8118"
os.environ["HTTPS_PROXY"] = "http://127.0.0.1:8118"# Hugging Face 模型 ID
model_id = "unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit"# 本地存储路径
local_dir = "./models/deepseek-14b-bnb-4bit"print(f"开始下载模型: {model_id} ...")# 下载所有 safetensors 权重和配置文件
snapshot_download(repo_id=model_id,local_dir=local_dir,resume_download=True, # 断点续传local_dir_use_symlinks=False, # 避免软链接allow_patterns=["*.safetensors", "*.json", "*.py", "*.txt"]
)print(f"✅ 模型已下载完成,保存在: {local_dir}")
运行结果:
For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.warnings.warn(
tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 11.4M/11.4M [00:05<00:00, 1.92MB/s]
model-00001-of-00003.safetensors: 0%| | 0.00/4.94G [00:00<?, ?B/s在这里插入代码片0003.safetensors: 20%|██████████████▋ | 980M/4.94G [28:54<1:41:50, 648kB/s]
model-00001-of-00003.safetensors: 55%|███████████████████████████████████████▏ | 2.72G/4.94G [1:01:10<3:20:16, 184kB/s]
model-00002-of-00003.safetensors: 31%|██████████████████████▊ | 1.53G/4.97G [29:01<44:59, 1.27MB/s]
model-00002-of-00003.safetensors: 47%|█████████████████████████████████▎ | 2.33G/4.97G [1:01:09<2:56:25, 249kB/s]
model-00003-of-00003.safetensors: 90%|█████████████████████████████████████████████████████████████████▊ | 3.92G/4.35G [1:00:58<14:55, 476kB/s]
3. 整合结果
3.1 检测那个代理可以下载,并测速
import os
import time
import requestsdef set_socks5_proxy(host="127.0.0.1", port=1080):"""设置 SOCKS5 代理环境变量"""proxy = f"socks5://{host}:{port}"os.environ["HTTP_PROXY"] = proxyos.environ["HTTPS_PROXY"] = proxyprint(f"[+] 已设置 SOCKS5 代理: {proxy}")def set_http_proxy(host="127.0.0.1", port=8118):"""设置 HTTP 代理环境变量"""proxy = f"http://{host}:{port}"os.environ["HTTP_PROXY"] = proxyos.environ["HTTPS_PROXY"] = proxyprint(f"[+] 已切换到 HTTP 代理: {proxy}")def test_download_speed(url="https://huggingface.co", timeout=10):"""简单测试下载速度"""print(f"[*] 正在测试下载速度: {url}")try:start = time.time()r = requests.get(url, timeout=timeout, stream=True)total_bytes = 0for chunk in r.iter_content(chunk_size=1024*1024):if chunk:total_bytes += len(chunk)end = time.time()speed_mbps = (total_bytes / 1024 / 1024) / (end - start)print(f"[✓] 下载完成: {total_bytes / 1024 / 1024:.2f} MB,用时 {end-start:.2f} 秒,速度 {speed_mbps:.2f} MB/s")return speed_mbpsexcept Exception as e:print(f"[-] 下载失败: {e}")return 0if __name__ == "__main__":# 1️⃣ 先用 SOCKS5 代理set_socks5_proxy("127.0.0.1", 1080)speed_socks5 = test_download_speed()# 2️⃣ 再用 HTTP 代理set_http_proxy("127.0.0.1", 8118)speed_http = test_download_speed()# 3️⃣ 输出结果print("\n=== 测试结果 ===")print(f"SOCKS5 代理速度: {speed_socks5:.2f} MB/s")print(f"HTTP 代理速度 : {speed_http:.2f} MB/s")
3.2 一键下载
import os
import time
from huggingface_hub import snapshot_download
import requests# ---------------- 代理设置 ---------------- #
def set_socks5_proxy(host="127.0.0.1", port=1080):proxy = f"socks5://{host}:{port}"os.environ["HTTP_PROXY"] = proxyos.environ["HTTPS_PROXY"] = proxyprint(f"[+] 已设置 SOCKS5 代理: {proxy}")def set_http_proxy(host="127.0.0.1", port=8118):proxy = f"http://{host}:{port}"os.environ["HTTP_PROXY"] = proxyos.environ["HTTPS_PROXY"] = proxyprint(f"[+] 已切换到 HTTP 代理: {proxy}")# ---------------- 测试速度 ---------------- #
def test_download_speed(url="https://huggingface.co", timeout=10):"""简单测试下载速度"""print(f"[*] 测试下载速度: {url}")try:start = time.time()r = requests.get(url, timeout=timeout, stream=True)total_bytes = 0for chunk in r.iter_content(chunk_size=1024*1024):if chunk:total_bytes += len(chunk)end = time.time()speed_mbps = (total_bytes / 1024 / 1024) / (end - start)print(f"[✓] 下载完成: {total_bytes / 1024 / 1024:.2f} MB, 用时 {end-start:.2f} s, 速度 {speed_mbps:.2f} MB/s")return speed_mbpsexcept Exception as e:print(f"[-] 下载失败: {e}")return 0# ---------------- 下载模型 ---------------- #
def download_model(model_id, local_dir):print(f"[*] 开始下载模型: {model_id}")snapshot_download(repo_id=model_id,local_dir=local_dir,resume_download=True, # 断点续传local_dir_use_symlinks=False,allow_patterns=["*.safetensors", "*.json", "*.py", "*.txt"])print(f"[✓] 模型已下载完成,保存在: {local_dir}")# ---------------- 主流程 ---------------- #
if __name__ == "__main__":model_id = "unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit"local_dir = "./models/deepseek-14b-bnb-4bit"# 1️⃣ 测试 SOCKS5set_socks5_proxy("127.0.0.1", 1080)speed_socks5 = test_download_speed()# 2️⃣ 测试 HTTPset_http_proxy("127.0.0.1", 8118)speed_http = test_download_speed()# 3️⃣ 自动选择更快的代理if speed_socks5 >= speed_http:set_socks5_proxy("127.0.0.1", 1080)print("[*] 使用 SOCKS5 代理下载模型")else:set_http_proxy("127.0.0.1", 8118)print("[*] 使用 HTTP 代理下载模型")# 4️⃣ 下载模型download_model(model_id, local_dir)
3.3 优化后的下载代码
import os
import time
import requests
from huggingface_hub import snapshot_download
from tqdm import tqdm# ---------------- 代理设置 ---------------- #
def set_proxy(proxy_type="socks5", host="127.0.0.1", port=1080):if proxy_type.lower() == "socks5":proxy = f"socks5://{host}:{port}"else:proxy = f"http://{host}:{port}"os.environ["HTTP_PROXY"] = proxyos.environ["HTTPS_PROXY"] = proxyprint(f"[+] 设置代理: {proxy_type.upper()} -> {proxy}")return proxy# ---------------- 代理可用性检测 ---------------- #
def check_proxy(proxy_type, host, port, test_url="https://huggingface.co", timeout=5):proxy = set_proxy(proxy_type, host, port)try:r = requests.get(test_url, timeout=timeout, proxies={"http": proxy, "https": proxy})if r.status_code == 200:print(f"[✓] {proxy_type.upper()} 代理可用")return Trueexcept Exception as e:print(f"[-] {proxy_type.upper()} 代理不可用: {e}")return False# ---------------- 测试下载速度 ---------------- #
def test_speed(test_url="https://huggingface.co", timeout=10):try:start = time.time()r = requests.get(test_url, timeout=timeout, stream=True)total_bytes = 0for chunk in r.iter_content(chunk_size=1024*1024):if chunk:total_bytes += len(chunk)end = time.time()speed_mbps = (total_bytes / 1024 / 1024) / (end - start)return speed_mbpsexcept:return 0# ---------------- 下载模型(带进度条) ---------------- #
def download_model(model_id, local_dir, retries=3):for attempt in range(1, retries+1):try:print(f"[*] 开始下载模型: {model_id}")snapshot_download(repo_id=model_id,local_dir=local_dir,resume_download=True,local_dir_use_symlinks=False,allow_patterns=["*.safetensors", "*.json", "*.py", "*.txt"],max_workers=4,progress_callback=progress_bar)print(f"[✓] 模型下载完成: {local_dir}")returnexcept Exception as e:print(f"[!] 下载失败 (尝试 {attempt}/{retries}): {e}")time.sleep(3)print("[-] 下载失败,请检查网络或代理设置")# ---------------- tqdm 进度条回调 ---------------- #
pbar = None
def progress_bar(current, total, filename=None):global pbarif pbar is None or pbar.total != total:if pbar is not None:pbar.close()pbar = tqdm(total=total, unit="B", unit_scale=True, desc=filename or "Downloading")pbar.update(current - pbar.n)if pbar.n >= total:pbar.close()pbar = None# ---------------- 主流程 ---------------- #
if __name__ == "__main__":model_id = "unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit"local_dir = "./models/deepseek-14b-bnb-4bit"proxies = [{"type": "socks5", "host": "127.0.0.1", "port": 1080},{"type": "http", "host": "127.0.0.1", "port": 8118}]available_proxies = []print("[*] 检测可用代理...")for p in proxies:if check_proxy(p["type"], p["host"], p["port"]):available_proxies.append(p)if not available_proxies:print("[-] 没有可用代理,请先启动 SOCKS5 或 HTTP 代理")exit(1)# 测速选择最快代理speeds = []for p in available_proxies:set_proxy(p["type"], p["host"], p["port"])speed = test_speed()print(f"[i] {p['type'].upper()} 代理测速: {speed:.2f} MB/s")speeds.append(speed)fastest = available_proxies[speeds.index(max(speeds))]print(f"[*] 使用最快代理: {fastest['type'].upper()} -> {fastest['host']}:{fastest['port']}")set_proxy(fastest['type'], fastest['host'], fastest['port'])# 下载模型download_model(model_id, local_dir)