当前位置: 首页 > news >正文

利用python脚本从dockerhub上下载镜像,可以选择arm架构还是x86架构

利用python脚本从dockerhub上下载镜像,可以选择arm架构还是x86架构

  • python脚本使用方法
    • install.py文件内容

python脚本使用方法

python install.py gpustack/gpustack:v0.7.1-npu arm64

支持平台选择 arm64或者amd64
支持断点续传
从官方docker hub下载,需要梯子
更新日志:
2025.8.27 增加了下载速度显示,增加了需要下载的总大小和每层大小
2025.8.27 17:36 增加了每层的检测,如果层级下载的大小等有问题,会重新下载

install.py文件内容

import os
import sys
import gzip
import json
import hashlib
import shutil
import requests
import tarfile
import urllib3
import timeurllib3.disable_warnings()if len(sys.argv) < 2:print('Usage:\n\tdocker_pull.py [registry/][repository/]image[:tag|@digest] [arch]\n')print('\tarch default: amd64, other options: arm64, arm/v7')exit(1)image_input = sys.argv[1]
target_arch = sys.argv[2] if len(sys.argv) > 2 else "amd64"
target_os = "linux"repo = 'library'
tag = 'latest'
imgparts = image_input.split('/')
try:img, tag = imgparts[-1].split('@')
except ValueError:try:img, tag = imgparts[-1].split(':')except ValueError:img = imgparts[-1]if len(imgparts) > 1 and ('.' in imgparts[0] or ':' in imgparts[0]):registry = imgparts[0]repo = '/'.join(imgparts[1:-1])
else:registry = 'registry-1.docker.io'if len(imgparts[:-1]) != 0:repo = '/'.join(imgparts[:-1])else:repo = 'library'
repository = f'{repo}/{img}'# Authentication
auth_url = 'https://auth.docker.io/token'
reg_service = 'registry.docker.io'
resp = requests.get(f'https://{registry}/v2/', verify=False)
if resp.status_code == 401:auth_header = resp.headers['WWW-Authenticate']auth_url = auth_header.split('"')[1]try:reg_service = auth_header.split('"')[3]except IndexError:reg_service = ""def get_auth_head(mtype):resp = requests.get(f'{auth_url}?service={reg_service}&scope=repository:{repository}:pull', verify=False)token = resp.json()['token']return {'Authorization': f'Bearer {token}', 'Accept': mtype}def format_size(bytes_size):for unit in ['B', 'KB', 'MB', 'GB']:if bytes_size < 1024.0:return f"{bytes_size:.2f}{unit}"bytes_size /= 1024.0return f"{bytes_size:.2f}TB"def progress_bar(ublob, nb_traits, speed=None):sys.stdout.write('\r' + ublob[7:19] + ': Downloading [')for i in range(0, nb_traits):sys.stdout.write('>' if i == nb_traits - 1 else '=')for i in range(0, 50 - nb_traits):sys.stdout.write(' ')if speed is not None:sys.stdout.write(f'] {speed:.2f} KB/s')else:sys.stdout.write(']')sys.stdout.flush()def sha256sum(filename):h = hashlib.sha256()with open(filename, "rb") as f:for chunk in iter(lambda: f.read(8192), b""):h.update(chunk)return "sha256:" + h.hexdigest()# Step 1: Get manifest
auth_head = get_auth_head('application/vnd.docker.distribution.manifest.v2+json')
resp = requests.get(f'https://{registry}/v2/{repository}/manifests/{tag}', headers=auth_head, verify=False)# Step 2: Fallback to manifest list if necessary
if resp.status_code != 200 or 'layers' not in resp.json():print('[*] Falling back to manifest list...')auth_head = get_auth_head('application/vnd.docker.distribution.manifest.list.v2+json')resp = requests.get(f'https://{registry}/v2/{repository}/manifests/{tag}', headers=auth_head, verify=False)if resp.status_code != 200:print(f'[-] Cannot fetch manifest list for {repository} [HTTP {resp.status_code}]')exit(1)manifest_list = resp.json()matched = Falsefor manifest in manifest_list.get('manifests', []):platform = manifest.get('platform', {})if platform.get('architecture') == target_arch and platform.get('os') == target_os:digest = manifest['digest']print(f"[+] Found {target_arch}/{target_os} manifest: {digest}")auth_head = get_auth_head('application/vnd.docker.distribution.manifest.v2+json')resp = requests.get(f'https://{registry}/v2/{repository}/manifests/{digest}', headers=auth_head, verify=False)matched = Truebreakif not matched or 'layers' not in resp.json():print('[-] Could not find matching architecture or manifest is invalid')exit(1)layers = resp.json()['layers']
imgdir = f'tmp_{img}_{tag.replace(":", "@")}'
os.makedirs(imgdir, exist_ok=True)
print(f'Creating image structure in: {imgdir}')config = resp.json()['config']['digest']
confresp = requests.get(f'https://{registry}/v2/{repository}/blobs/{config}', headers=auth_head, verify=False)
with open(f'{imgdir}/{config[7:]}.json', 'wb') as file:file.write(confresp.content)content = [{'Config': config[7:] + '.json', 'RepoTags': [f'{img}:{tag}'], 'Layers': []}]
empty_json = '{"created":"1970-01-01T00:00:00Z","container_config":{"Hostname":"","Domainname":"","User":"","AttachStdin":false,"AttachStdout":false,"AttachStderr":false,"Tty":false,"OpenStdin":false,"StdinOnce":false,"Env":null,"Cmd":null,"Image":"","Volumes":null,"WorkingDir":"","Entrypoint":null,"OnBuild":null,"Labels":null}}'total_size = sum(layer['size'] for layer in layers)
print(f"Total Image Size To Download [{format_size(total_size)}]")parentid = ''
for layer in layers:ublob = layer['digest']size_expected = layer['size']print(f"{ublob[7:19]}: This Layer Need Download [{format_size(size_expected)}]")fake_layerid = hashlib.sha256((parentid + '\n' + ublob + '\n').encode('utf-8')).hexdigest()layerdir = f'{imgdir}/{fake_layerid}'os.makedirs(layerdir, exist_ok=True)gzfile_path = f'{layerdir}/layer_gzip.tar'layerfile_path = f'{layerdir}/layer.tar'# Skip if already extractedif os.path.exists(layerfile_path) and os.path.getsize(layerfile_path) > 0:print(f"{ublob[7:19]}: already extracted, skipping")parentid = fake_layeridcontent[0]['Layers'].append(fake_layerid + '/layer.tar')continue# Resume downloadheaders = auth_head.copy()downloaded = 0if os.path.exists(gzfile_path):downloaded = os.path.getsize(gzfile_path)if downloaded >= size_expected:print(f"{ublob[7:19]}: corrupted gzip detected, re-downloading")os.remove(gzfile_path)downloaded = 0else:headers['Range'] = f'bytes={downloaded}-'sys.stdout.write(f'{ublob[7:19]}: Downloading...\n')sys.stdout.flush()bresp = requests.get(f'https://{registry}/v2/{repository}/blobs/{ublob}', headers=headers, stream=True, verify=False)bresp.raise_for_status()mode = 'ab' if downloaded else 'wb'total = int(bresp.headers.get('Content-Length', 0)) + downloadedacc = downloadednb_traits = int(acc / max(1, total // 50))start_time = time.time()last_time = start_timelast_acc = accwith open(gzfile_path, mode) as file:last_update_time = time.time()for chunk in bresp.iter_content(chunk_size=8192):if chunk:file.write(chunk)acc += len(chunk)now = time.time()if now - last_update_time >= 1.0:elapsed = now - last_timespeed = (acc - last_acc) / 1024 / max(elapsed, 0.001)last_time = nowlast_acc = accnb_traits = int(acc / max(1, total // 50))progress_bar(ublob, nb_traits, speed)last_update_time = nowsys.stdout.write("\n")# Verify sha256 digestcalc_digest = sha256sum(gzfile_path)if calc_digest != ublob:print(f"[-] Digest mismatch for {ublob[7:19]}: expected {ublob}, got {calc_digest}")os.remove(gzfile_path)if os.path.exists(layerfile_path):os.remove(layerfile_path)continueelse:print(f"[+] Digest verified for {ublob[7:19]}")# Extract layertry:sys.stdout.write(f'{ublob[7:19]}: Extracting...\n')with open(layerfile_path, "wb") as file:with gzip.open(gzfile_path, 'rb') as unzLayer:shutil.copyfileobj(unzLayer, file)os.remove(gzfile_path)except Exception as e:print(f"[-] Extraction failed for {ublob[7:19]}: {e}, retrying...")if os.path.exists(gzfile_path):os.remove(gzfile_path)if os.path.exists(layerfile_path):os.remove(layerfile_path)continue  # 重新下载content[0]['Layers'].append(fake_layerid + '/layer.tar')# Create layer jsonwith open(f'{layerdir}/json', 'w') as file:if layers[-1]['digest'] == layer['digest']:json_obj = json.loads(confresp.content)json_obj.pop('history', None)json_obj.pop('rootfs', None)else:json_obj = json.loads(empty_json)json_obj['id'] = fake_layeridif parentid:json_obj['parent'] = parentidparentid = json_obj['id']file.write(json.dumps(json_obj))with open(f'{imgdir}/manifest.json', 'w') as file:file.write(json.dumps(content, indent=2))with open(f'{imgdir}/repositories', 'w') as file:file.write(json.dumps({img: {tag: fake_layerid}}, indent=2))docker_tar = f'{repo.replace("/", "_")}_{img}.tar'
print("Creating archive...")
with tarfile.open(docker_tar, "w") as tar:tar.add(imgdir, arcname=os.path.sep)
shutil.rmtree(imgdir)
print(f'Docker image pulled: {docker_tar}')
http://www.dtcms.com/a/352959.html

相关文章:

  • 福建地区通信安全员考试题库及答案
  • 基于FPGA的情绪感知系统设计方案:心理健康监测应用(四)
  • FPGA入门学习路径
  • Go变量作用域全解析
  • Zynq介绍和命名方式
  • FPGA学习笔记——Verilog中可综合和不可综合语句
  • 德克西尔氢气探测器:工业安全守护核心
  • 【Linux】用户与用户组管理
  • 6.8 学习ui组件方法和Element Plus介绍
  • 嵌入式C语言进阶:高效数学运算的艺术与实战
  • Java全栈开发面试实战:从基础到微服务架构的深度解析
  • 革新固态电池失效分析技术:AFM-SEM联用技术助力突破瓶颈
  • Java 大视界 -- Java 大数据机器学习模型在电商推荐系统冷启动问题解决与推荐效果提升中的应用(403)
  • Unity Shader unity文档学习笔记(二十一):几种草体的实现方式(透明度剔除,GPU Instaning, 曲面细分+几何着色器实现)
  • Axios 整理常用形式及涉及的参数
  • Vue3 + Vue Router 实现动态面包屑导航(支持点击跳转)
  • Techub News 与 TOKENPOST 达成战略合作以推动中韩 Web3 资讯互通
  • 有鹿机器人如何用科技与创新模式破解行业难题
  • 「LangChain 学习笔记」LangChain大模型应用开发:模型链(Chains)
  • 外汇中高频 CTA 风控策略回测案例
  • 宝塔面板零基础搭建 WordPress 个人博客与外贸网站 | 新手10分钟上手指南
  • 国内股指期货合约的最小变动价位是多少?
  • 大语言模型的“引擎室”:深入剖析现代计算与算法优化
  • 企业落地版 AutoGen 工程示例:自动化市场分析报告生成系统
  • 代码随想录刷题Day42
  • 【芯片低功耗设计中的UPF:从理论到实践详解】
  • windows 子系统 wsl 命令的用法
  • lvgl(一)
  • Java全栈工程师面试实录:从基础到实战的深度技术探索
  • 集成电路学习:什么是YOLO一次性检测器