当前位置：首页 > news >正文

利用python脚本从dockerhub上下载镜像，可以选择arm架构还是x86架构

news 2025/8/30 6:01:22

利用python脚本从dockerhub上下载镜像，可以选择arm架构还是x86架构

python脚本使用方法
- install.py文件内容

python脚本使用方法

python install.py gpustack/gpustack:v0.7.1-npu arm64

支持平台选择 arm64或者amd64
支持断点续传
从官方docker hub下载，需要梯子
更新日志：
2025.8.27 增加了下载速度显示，增加了需要下载的总大小和每层大小
2025.8.27 17:36 增加了每层的检测，如果层级下载的大小等有问题，会重新下载

install.py文件内容

import os
import sys
import gzip
import json
import hashlib
import shutil
import requests
import tarfile
import urllib3
import timeurllib3.disable_warnings()if len(sys.argv) < 2:print('Usage:\n\tdocker_pull.py [registry/][repository/]image[:tag|@digest] [arch]\n')print('\tarch default: amd64, other options: arm64, arm/v7')exit(1)image_input = sys.argv[1]
target_arch = sys.argv[2] if len(sys.argv) > 2 else "amd64"
target_os = "linux"repo = 'library'
tag = 'latest'
imgparts = image_input.split('/')
try:img, tag = imgparts[-1].split('@')
except ValueError:try:img, tag = imgparts[-1].split(':')except ValueError:img = imgparts[-1]if len(imgparts) > 1 and ('.' in imgparts[0] or ':' in imgparts[0]):registry = imgparts[0]repo = '/'.join(imgparts[1:-1])
else:registry = 'registry-1.docker.io'if len(imgparts[:-1]) != 0:repo = '/'.join(imgparts[:-1])else:repo = 'library'
repository = f'{repo}/{img}'# Authentication
auth_url = 'https://auth.docker.io/token'
reg_service = 'registry.docker.io'
resp = requests.get(f'https://{registry}/v2/', verify=False)
if resp.status_code == 401:auth_header = resp.headers['WWW-Authenticate']auth_url = auth_header.split('"')[1]try:reg_service = auth_header.split('"')[3]except IndexError:reg_service = ""def get_auth_head(mtype):resp = requests.get(f'{auth_url}?service={reg_service}&scope=repository:{repository}:pull', verify=False)token = resp.json()['token']return {'Authorization': f'Bearer {token}', 'Accept': mtype}def format_size(bytes_size):for unit in ['B', 'KB', 'MB', 'GB']:if bytes_size < 1024.0:return f"{bytes_size:.2f}{unit}"bytes_size /= 1024.0return f"{bytes_size:.2f}TB"def progress_bar(ublob, nb_traits, speed=None):sys.stdout.write('\r' + ublob[7:19] + ': Downloading [')for i in range(0, nb_traits):sys.stdout.write('>' if i == nb_traits - 1 else '=')for i in range(0, 50 - nb_traits):sys.stdout.write(' ')if speed is not None:sys.stdout.write(f'] {speed:.2f} KB/s')else:sys.stdout.write(']')sys.stdout.flush()def sha256sum(filename):h = hashlib.sha256()with open(filename, "rb") as f:for chunk in iter(lambda: f.read(8192), b""):h.update(chunk)return "sha256:" + h.hexdigest()# Step 1: Get manifest
auth_head = get_auth_head('application/vnd.docker.distribution.manifest.v2+json')
resp = requests.get(f'https://{registry}/v2/{repository}/manifests/{tag}', headers=auth_head, verify=False)# Step 2: Fallback to manifest list if necessary
if resp.status_code != 200 or 'layers' not in resp.json():print('[*] Falling back to manifest list...')auth_head = get_auth_head('application/vnd.docker.distribution.manifest.list.v2+json')resp = requests.get(f'https://{registry}/v2/{repository}/manifests/{tag}', headers=auth_head, verify=False)if resp.status_code != 200:print(f'[-] Cannot fetch manifest list for {repository} [HTTP {resp.status_code}]')exit(1)manifest_list = resp.json()matched = Falsefor manifest in manifest_list.get('manifests', []):platform = manifest.get('platform', {})if platform.get('architecture') == target_arch and platform.get('os') == target_os:digest = manifest['digest']print(f"[+] Found {target_arch}/{target_os} manifest: {digest}")auth_head = get_auth_head('application/vnd.docker.distribution.manifest.v2+json')resp = requests.get(f'https://{registry}/v2/{repository}/manifests/{digest}', headers=auth_head, verify=False)matched = Truebreakif not matched or 'layers' not in resp.json():print('[-] Could not find matching architecture or manifest is invalid')exit(1)layers = resp.json()['layers']
imgdir = f'tmp_{img}_{tag.replace(":", "@")}'
os.makedirs(imgdir, exist_ok=True)
print(f'Creating image structure in: {imgdir}')config = resp.json()['config']['digest']
confresp = requests.get(f'https://{registry}/v2/{repository}/blobs/{config}', headers=auth_head, verify=False)
with open(f'{imgdir}/{config[7:]}.json', 'wb') as file:file.write(confresp.content)content = [{'Config': config[7:] + '.json', 'RepoTags': [f'{img}:{tag}'], 'Layers': []}]
empty_json = '{"created":"1970-01-01T00:00:00Z","container_config":{"Hostname":"","Domainname":"","User":"","AttachStdin":false,"AttachStdout":false,"AttachStderr":false,"Tty":false,"OpenStdin":false,"StdinOnce":false,"Env":null,"Cmd":null,"Image":"","Volumes":null,"WorkingDir":"","Entrypoint":null,"OnBuild":null,"Labels":null}}'total_size = sum(layer['size'] for layer in layers)
print(f"Total Image Size To Download [{format_size(total_size)}]")parentid = ''
for layer in layers:ublob = layer['digest']size_expected = layer['size']print(f"{ublob[7:19]}: This Layer Need Download [{format_size(size_expected)}]")fake_layerid = hashlib.sha256((parentid + '\n' + ublob + '\n').encode('utf-8')).hexdigest()layerdir = f'{imgdir}/{fake_layerid}'os.makedirs(layerdir, exist_ok=True)gzfile_path = f'{layerdir}/layer_gzip.tar'layerfile_path = f'{layerdir}/layer.tar'# Skip if already extractedif os.path.exists(layerfile_path) and os.path.getsize(layerfile_path) > 0:print(f"{ublob[7:19]}: already extracted, skipping")parentid = fake_layeridcontent[0]['Layers'].append(fake_layerid + '/layer.tar')continue# Resume downloadheaders = auth_head.copy()downloaded = 0if os.path.exists(gzfile_path):downloaded = os.path.getsize(gzfile_path)if downloaded >= size_expected:print(f"{ublob[7:19]}: corrupted gzip detected, re-downloading")os.remove(gzfile_path)downloaded = 0else:headers['Range'] = f'bytes={downloaded}-'sys.stdout.write(f'{ublob[7:19]}: Downloading...\n')sys.stdout.flush()bresp = requests.get(f'https://{registry}/v2/{repository}/blobs/{ublob}', headers=headers, stream=True, verify=False)bresp.raise_for_status()mode = 'ab' if downloaded else 'wb'total = int(bresp.headers.get('Content-Length', 0)) + downloadedacc = downloadednb_traits = int(acc / max(1, total // 50))start_time = time.time()last_time = start_timelast_acc = accwith open(gzfile_path, mode) as file:last_update_time = time.time()for chunk in bresp.iter_content(chunk_size=8192):if chunk:file.write(chunk)acc += len(chunk)now = time.time()if now - last_update_time >= 1.0:elapsed = now - last_timespeed = (acc - last_acc) / 1024 / max(elapsed, 0.001)last_time = nowlast_acc = accnb_traits = int(acc / max(1, total // 50))progress_bar(ublob, nb_traits, speed)last_update_time = nowsys.stdout.write("\n")# Verify sha256 digestcalc_digest = sha256sum(gzfile_path)if calc_digest != ublob:print(f"[-] Digest mismatch for {ublob[7:19]}: expected {ublob}, got {calc_digest}")os.remove(gzfile_path)if os.path.exists(layerfile_path):os.remove(layerfile_path)continueelse:print(f"[+] Digest verified for {ublob[7:19]}")# Extract layertry:sys.stdout.write(f'{ublob[7:19]}: Extracting...\n')with open(layerfile_path, "wb") as file:with gzip.open(gzfile_path, 'rb') as unzLayer:shutil.copyfileobj(unzLayer, file)os.remove(gzfile_path)except Exception as e:print(f"[-] Extraction failed for {ublob[7:19]}: {e}, retrying...")if os.path.exists(gzfile_path):os.remove(gzfile_path)if os.path.exists(layerfile_path):os.remove(layerfile_path)continue  # 重新下载content[0]['Layers'].append(fake_layerid + '/layer.tar')# Create layer jsonwith open(f'{layerdir}/json', 'w') as file:if layers[-1]['digest'] == layer['digest']:json_obj = json.loads(confresp.content)json_obj.pop('history', None)json_obj.pop('rootfs', None)else:json_obj = json.loads(empty_json)json_obj['id'] = fake_layeridif parentid:json_obj['parent'] = parentidparentid = json_obj['id']file.write(json.dumps(json_obj))with open(f'{imgdir}/manifest.json', 'w') as file:file.write(json.dumps(content, indent=2))with open(f'{imgdir}/repositories', 'w') as file:file.write(json.dumps({img: {tag: fake_layerid}}, indent=2))docker_tar = f'{repo.replace("/", "_")}_{img}.tar'
print("Creating archive...")
with tarfile.open(docker_tar, "w") as tar:tar.add(imgdir, arcname=os.path.sep)
shutil.rmtree(imgdir)
print(f'Docker image pulled: {docker_tar}')