harbor拉取全部镜像
本脚本通过harbor v2.0 API请求,采用三层for循环,分别是project、repository、tag三层来将拉取全部镜像到本地,tag的名称列表输出到imagelist
文件中。并有两三个优化改编版本。
文章目录
- 拉取harbor仓库全部镜像
- 拉取全部repository中最新的几个镜像
- 拉取某个project下所有镜像
- 用法
- 脚本
拉取harbor仓库全部镜像
- pull-whole-reg.sh
#!/bin/bash
# Harbor 配置
HARBOR_URL="dockerhub.local:41104"
USERNAME="admin"
PASSWORD='<your_pass_word_here>'
# 日志文件
LOGFILE="harbor_pull.log"
IMAGELIST="imagelist"
# 清空日志文件和镜像列表
> "$LOGFILE"
> "$IMAGELIST"
# 禁用缓冲并统一日志输出
exec > >(stdbuf -oL tee -a "$LOGFILE") 2>&1
# 登录 Harbor
echo "Logging in to Harbor..."
if ! docker login "$HARBOR_URL" -u "$USERNAME" -p "$PASSWORD"; then
echo "[ERROR] Failed to login to Harbor. Check $LOGFILE for details."
exit 1
fi
# 获取总项目数量
TOTAL_PROJECTS=$(curl -s -X GET -I -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects" | grep -i "X-Total-Count" | awk '{print $2}' | tr -d '\r')
echo "Total projects: $TOTAL_PROJECTS"
# 分页大小
PAGE_SIZE=10
# 计算总页数
TOTAL_PAGES=$(( (TOTAL_PROJECTS + PAGE_SIZE - 1) / PAGE_SIZE ))
# 获取所有项目
PROJECTS=""
for ((PAGE=1; PAGE<=TOTAL_PAGES; PAGE++)); do
echo "Fetching projects (page $PAGE of $TOTAL_PAGES)..."
PAGE_PROJECTS=$(curl -s -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects?page=$PAGE&page_size=$PAGE_SIZE" | jq -r '.[].name')
PROJECTS="$PROJECTS $PAGE_PROJECTS"
done
# 将字符串转换为数组
project_array=($PROJECTS)
# 获取数组的长度
array_length=${#project_array[@]}
# 遍历数组并输出索引和值
for ((index = 0; index < array_length; index++)); do
value=${project_array[$index]}
echo "[INFO] detect repository $((index+1)): $value"
done
# 遍历每个项目
for PROJECT in $PROJECTS; do
echo "Processing project: $PROJECT"
# 获取项目下的所有镜像仓库(分页)
TOTAL_REPOS=$(curl -s -X GET -I -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects/$PROJECT/repositories" | grep -i "X-Total-Count" | awk '{print $2}' | tr -d '\r')
REPO_PAGES=$(( (TOTAL_REPOS + PAGE_SIZE - 1) / PAGE_SIZE ))
REPOS=""
for ((REPO_PAGE=1; REPO_PAGE<=REPO_PAGES; REPO_PAGE++)); do
echo "Fetching repositories for project $PROJECT (page $REPO_PAGE of $REPO_PAGES)..."
PAGE_REPOS=$(curl -s -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects/$PROJECT/repositories?page=$REPO_PAGE&page_size=$PAGE_SIZE" | jq -r '.[].name')
REPOS="$REPOS $PAGE_REPOS"
done
# 遍历每个镜像仓库
for REPO in $REPOS; do
echo "Processing repository: $REPO"
# 提取 repository_name 部分(去掉项目名称)
REPO_NAME=$(echo "$REPO" | awk -F '/' '{print $2}')
# 如果 repository_name 包含多层路径,则进行双重 URL 编码
if [[ "$REPO" == */*/* ]]; then
REPO_NAME=$(echo "$REPO" | awk -F '/' '{print $2 "/" $3}' | sed 's|/|%252F|g')
fi
# 获取镜像仓库的所有 artifacts(分页)
TOTAL_ARTIFACTS=$(curl -s -X GET -I -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects/$PROJECT/repositories/$REPO_NAME/artifacts" | grep -i "X-Total-Count" | awk '{print $2}' | tr -d '\r')
ARTIFACT_PAGES=$(( (TOTAL_ARTIFACTS + PAGE_SIZE - 1) / PAGE_SIZE ))
ARTIFACTS=""
for ((ARTIFACT_PAGE=1; ARTIFACT_PAGE<=ARTIFACT_PAGES; ARTIFACT_PAGE++)); do
echo "Fetching artifacts for repository $REPO (page $ARTIFACT_PAGE of $ARTIFACT_PAGES)..."
PAGE_ARTIFACTS=$(curl -s -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects/$PROJECT/repositories/$REPO_NAME/artifacts?page=$ARTIFACT_PAGE&page_size=$PAGE_SIZE" | jq -r '.[].digest')
ARTIFACTS="$ARTIFACTS $PAGE_ARTIFACTS"
done
# 遍历每个 artifact 并获取其 tags
for ARTIFACT in $ARTIFACTS; do
TAGS=$(curl -s -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects/$PROJECT/repositories/$REPO_NAME/artifacts/$ARTIFACT/tags" | jq -r '.[].name')
for TAG in $TAGS; do
IMAGE_NAME="$HARBOR_URL/$REPO:$TAG"
echo "Pulling image: $IMAGE_NAME"
if docker pull "$IMAGE_NAME"; then
echo "Dumping tags to $IMAGELIST: $IMAGE_NAME"
echo "$IMAGE_NAME" >> "$IMAGELIST"
sync # 强制写入磁盘
else
echo "[ERROR] Failed to pull image: $IMAGE_NAME"
fi
echo "---------------------------------------------------------------"
done
done
done
done
echo "All images have been pulled. Check $LOGFILE for details."
拉取全部repository中最新的几个镜像
有的时候太多的老的镜像版本并没有用,那么按照时间排序,最新的在最前面,每个repository拉取最新的几个。
比如
library/loki:v4
library/loki:v3
library/loki:v2
library/loki:v1
只要前三个也就是v2-v4
, 此时使用如下脚本:
#!/bin/bash
# 默认获取所有 artifacts
TOP_N="all"
DRY_RUN=false
# 解析命令行参数
while [[ $# -gt 0 ]]; do
case "$1" in
-n|--topN)
TOP_N="$2"
shift 2
;;
--dry-run)
DRY_RUN=true
shift
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done
# Harbor 配置
HARBOR_URL="dockerhub.local:41104"
USERNAME="admin"
PASSWORD='<your_pass_word_here>'
# 日志文件
LOGFILE="harbor_pull.log"
ERR_LOGFILE="harbor_pull_err.log"
IMAGELIST="repo-TopN-imagelist"
STATISTIC_LOG="statistic.log"
# 清空日志文件和镜像列表
> "$LOGFILE"
> "$IMAGELIST"
> "$ERR_LOGFILE"
> "$STATISTIC_LOG"
# 禁用缓冲并统一日志输出
exec > >(stdbuf -oL tee -a "$LOGFILE") 2>&1
# 登录 Harbor
echo "Logging in to Harbor..."
if ! docker login "$HARBOR_URL" -u "$USERNAME" -p "$PASSWORD"; then
echo "[ERROR] Failed to login to Harbor. Check $LOGFILE for details."
exit 1
fi
# 获取总项目数量
TOTAL_PROJECTS=$(curl -s -X GET -I -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects" | grep -i "X-Total-Count" | awk '{print $2}' | tr -d '\r')
echo "Total projects: $TOTAL_PROJECTS"
echo "Total projects: $TOTAL_PROJECTS" >> "$STATISTIC_LOG"
# 分页大小
PAGE_SIZE=10
# 计算总页数
TOTAL_PAGES=$(( (TOTAL_PROJECTS + PAGE_SIZE - 1) / PAGE_SIZE ))
# 获取所有项目
PROJECTS=""
for ((PAGE=1; PAGE<=TOTAL_PAGES; PAGE++)); do
echo "Fetching projects (page $PAGE of $TOTAL_PAGES)..."
PAGE_PROJECTS=$(curl -s -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects?page=$PAGE&page_size=$PAGE_SIZE" | jq -r '.[].name')
PROJECTS="$PROJECTS $PAGE_PROJECTS"
done
# 将字符串转换为数组
project_array=($PROJECTS)
# 获取数组的长度
array_length=${#project_array[@]}
# 遍历数组并输出索引和值
for ((index = 0; index < array_length; index++)); do
value=${project_array[$index]}
echo "[INFO] detect repository $((index+1)): $value"
done
# 长分隔符
SEPARATOR="================================================================================="
# 遍历每个项目
for PROJECT in $PROJECTS; do
echo "$SEPARATOR" >> "$STATISTIC_LOG"
echo "Processing project: $PROJECT"
echo "Processing project: $PROJECT" >> "$STATISTIC_LOG"
# 获取项目下的所有镜像仓库(分页)
TOTAL_REPOS=$(curl -s -X GET -I -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects/$PROJECT/repositories" | grep -i "X-Total-Count" | awk '{print $2}' | tr -d '\r')
REPO_PAGES=$(( (TOTAL_REPOS + PAGE_SIZE - 1) / PAGE_SIZE ))
REPOS=""
for ((REPO_PAGE=1; REPO_PAGE<=REPO_PAGES; REPO_PAGE++)); do
echo "Fetching repositories for project $PROJECT (page $REPO_PAGE of $REPO_PAGES)..."
PAGE_REPOS=$(curl -s -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects/$PROJECT/repositories?page=$REPO_PAGE&page_size=$PAGE_SIZE" | jq -r '.[].name')
REPOS="$REPOS $PAGE_REPOS"
done
# 统计项目下的镜像仓库数量
echo "Total repositories in project $PROJECT: $(echo "$REPOS" | wc -w)" >> "$STATISTIC_LOG"
# 遍历每个镜像仓库
for REPO in $REPOS; do
echo "Processing repository: $REPO"
echo " Repository: $REPO" >> "$STATISTIC_LOG"
# 提取 repository_name 部分(去掉项目名称)
REPO_NAME=$(echo "$REPO" | awk -F '/' '{print $2}')
# 如果 repository_name 包含多层路径,则进行双重 URL 编码
if [[ "$REPO" == */*/* ]]; then
REPO_NAME=$(echo "$REPO" | awk -F '/' '{print $2 "/" $3}' | sed 's|/|%252F|g')
fi
# 如果指定了 TOP_N,则只取最新的 N 个 artifacts
if [[ "$TOP_N" != "all" ]]; then
echo "[INFO] fetching [$PROJECT/$REPO_NAME] $TOP_N tags..."
else
TOP_N=10
fi
ARTIFACTS=$(curl -s -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects/$PROJECT/repositories/$REPO_NAME/artifacts?page=1&page_size=$TOP_N" | jq -r '.[].digest')
# 统计镜像仓库下的 Tag 数量
TAG_COUNT=0
# 遍历每个 artifact 并获取其 tags
for ARTIFACT in $ARTIFACTS; do
TAGS=$(curl -s -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects/$PROJECT/repositories/$REPO_NAME/artifacts/$ARTIFACT/tags" | jq -r '.[].name')
for TAG in $TAGS; do
IMAGE_NAME="$HARBOR_URL/$REPO:$TAG"
echo " Tag: $TAG" >> "$STATISTIC_LOG"
if [[ "$DRY_RUN" == true ]]; then
echo "Dry-run: Found image: $IMAGE_NAME"
echo "$IMAGE_NAME" >> "$IMAGELIST"
else
echo "Pulling image: $IMAGE_NAME"
if docker pull "$IMAGE_NAME"; then
echo "Dumping tags to $IMAGELIST: $IMAGE_NAME"
echo "$IMAGE_NAME" >> "$IMAGELIST"
sync # 强制写入磁盘
else
echo "[ERROR] Failed to pull image: $IMAGE_NAME" >> "$ERR_LOGFILE"
fi
fi
echo "---------------------------------------------------------------"
TAG_COUNT=$((TAG_COUNT + 1))
done
done
echo " Total tags in repository $REPO: $TAG_COUNT" >> "$STATISTIC_LOG"
done
done
echo "All images have been pulled. Check $LOGFILE for details."
拉取某个project下所有镜像
那么如果想要拉取某个project下所有镜像,一般一个系统的所有镜像都会放到一个项目中。
比如dockerhub.local:31104/test-sys/minio:v1
中test-sys
就是一个project。
用法
bash pull-project-topN.sh -p test-sys -n 3
脚本
#!/bin/bash
# 默认获取所有 artifacts
TOP_N="all"
DRY_RUN=false
SPECIFIC_PROJECT=""
# 解析命令行参数
while [[ $# -gt 0 ]]; do
case "$1" in
-n|--topN)
TOP_N="$2"
shift 2
;;
--dry-run)
DRY_RUN=true
shift
;;
-p|--project)
SPECIFIC_PROJECT="$2"
shift 2
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done
# Harbor 配置
HARBOR_URL="dockerhub.local:41104"
USERNAME="admin"
PASSWORD='<your_pass_word_here>'
# 日志文件
LOGFILE="harbor_pull.log"
ERR_LOGFILE="harbor_pull_err.log"
IMAGELIST="repo-TopN-imagelist"
STATISTIC_LOG="statistic.log"
# 分页大小
PAGE_SIZE=10
# 检查 PAGE_SIZE 是否为 0
if [ "$PAGE_SIZE" -eq 0 ]; then
echo "[ERROR] PAGE_SIZE cannot be 0. Please set a valid value."
exit 1
fi
# 清空日志文件和镜像列表
> "$LOGFILE"
> "$IMAGELIST"
> "$ERR_LOGFILE"
> "$STATISTIC_LOG"
# 禁用缓冲并统一日志输出
exec > >(stdbuf -oL tee -a "$LOGFILE") 2>&1
# 登录 Harbor
echo "Logging in to Harbor..."
if ! docker login "$HARBOR_URL" -u "$USERNAME" -p "$PASSWORD"; then
echo "[ERROR] Failed to login to Harbor. Check $LOGFILE for details."
exit 1
fi
# 如果指定了特定项目,直接处理该项目,否则获取所有项目
if [ -n "$SPECIFIC_PROJECT" ]; then
PROJECTS="$SPECIFIC_PROJECT"
echo "Processing specific project: $SPECIFIC_PROJECT"
else
# 获取总项目数量
TOTAL_PROJECTS=$(curl -s -X GET -I -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects" | grep -i "X-Total-Count" | awk '{print $2}' | tr -d '\r')
echo "Total projects: $TOTAL_PROJECTS"
echo "Total projects: $TOTAL_PROJECTS" >> "$STATISTIC_LOG"
# 计算总页数
TOTAL_PAGES=$(( (TOTAL_PROJECTS + PAGE_SIZE - 1) / PAGE_SIZE ))
# 获取所有项目
PROJECTS=""
for ((PAGE=1; PAGE<=TOTAL_PAGES; PAGE++)); do
echo "Fetching projects (page $PAGE of $TOTAL_PAGES)..."
PAGE_PROJECTS=$(curl -s -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects?page=$PAGE&page_size=$PAGE_SIZE" | jq -r '.[].name')
PROJECTS="$PROJECTS $PAGE_PROJECTS"
done
fi
# 将字符串转换为数组
project_array=($PROJECTS)
# 获取数组的长度
array_length=${#project_array[@]}
# 遍历数组并输出索引和值
for ((index = 0; index < array_length; index++)); do
value=${project_array[$index]}
echo "[INFO] detect repository $((index+1)): $value"
done
# 长分隔符
SEPARATOR="================================================================================="
# 遍历每个项目
for PROJECT in $PROJECTS; do
echo "$SEPARATOR" >> "$STATISTIC_LOG"
echo "Processing project: $PROJECT"
echo "Processing project: $PROJECT" >> "$STATISTIC_LOG"
# 获取项目下的所有镜像仓库(分页)
TOTAL_REPOS=$(curl -s -X GET -I -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects/$PROJECT/repositories" | grep -i "X-Total-Count" | awk '{print $2}' | tr -d '\r')
# 计算仓库总页数
REPO_PAGES=$(( (TOTAL_REPOS + PAGE_SIZE - 1) / PAGE_SIZE ))
REPOS=""
for ((REPO_PAGE=1; REPO_PAGE<=REPO_PAGES; REPO_PAGE++)); do
echo "Fetching repositories for project $PROJECT (page $REPO_PAGE of $REPO_PAGES)..."
PAGE_REPOS=$(curl -s -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects/$PROJECT/repositories?page=$REPO_PAGE&page_size=$PAGE_SIZE" | jq -r '.[].name')
REPOS="$REPOS $PAGE_REPOS"
done
# 统计项目下的镜像仓库数量
echo "Total repositories in project $PROJECT: $(echo "$REPOS" | wc -w)" >> "$STATISTIC_LOG"
# 遍历每个镜像仓库
for REPO in $REPOS; do
echo "Processing repository: $REPO"
echo " Repository: $REPO" >> "$STATISTIC_LOG"
# 提取 repository_name 部分(去掉项目名称)
REPO_NAME=$(echo "$REPO" | awk -F '/' '{print $2}')
# 如果 repository_name 包含多层路径,则进行双重 URL 编码
if [[ "$REPO" == */*/* ]]; then
REPO_NAME=$(echo "$REPO" | awk -F '/' '{print $2 "/" $3}' | sed 's|/|%252F|g')
fi
# 如果指定了 TOP_N,则只取最新的 N 个 artifacts
if [[ "$TOP_N" != "all" ]]; then
echo "[INFO] fetching [$PROJECT/$REPO_NAME] $TOP_N tags..."
else
TOP_N=10
fi
ARTIFACTS=$(curl -s -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects/$PROJECT/repositories/$REPO_NAME/artifacts?page=1&page_size=$TOP_N" | jq -r '.[].digest')
# 统计镜像仓库下的 Tag 数量
TAG_COUNT=0
# 遍历每个 artifact 并获取其 tags
for ARTIFACT in $ARTIFACTS; do
TAGS=$(curl -s -u "$USERNAME:$PASSWORD" "$HARBOR_URL/api/v2.0/projects/$PROJECT/repositories/$REPO_NAME/artifacts/$ARTIFACT/tags" | jq -r '.[].name')
for TAG in $TAGS; do
IMAGE_NAME="$HARBOR_URL/$REPO:$TAG"
echo " Tag: $TAG" >> "$STATISTIC_LOG"
if [[ "$DRY_RUN" == true ]]; then
echo "Dry-run: Found image: $IMAGE_NAME"
echo "$IMAGE_NAME" >> "$IMAGELIST"
else
echo "Pulling image: $IMAGE_NAME"
if docker pull "$IMAGE_NAME"; then
echo "Dumping tags to $IMAGELIST: $IMAGE_NAME"
echo "$IMAGE_NAME" >> "$IMAGELIST"
sync # 强制写入磁盘
else
echo "[ERROR] Failed to pull image: $IMAGE_NAME" >> "$ERR_LOGFILE"
fi
fi
echo "---------------------------------------------------------------"
TAG_COUNT=$((TAG_COUNT + 1))
done
done
echo " Total tags in repository $REPO: $TAG_COUNT" >> "$STATISTIC_LOG"
done
done
echo "All images have been pulled. Check $LOGFILE for details."