广东网站设计工具加工订单网
安装前修改自己所需变量,安装后会有如下提示,之后追加或修改prometheus配置即可。
安装完成后会有详细提示
[✓] Webhook服务安装完成
[*] 创建测试脚本...
[✓] 测试脚本已创建: /data/webhook/test-webhook.sh===== 安装完成 =====
Prometheus访问地址: http://10.234.210.88:9090
Alertmanager访问地址: http://10.234.210.88:9093
Webhook服务地址: http://10.234.210.88:58888重要提示:
1. 使用以下命令测试webhook是否正常工作:/data/webhook/test-webhook.sh
2. 使用以下命令重新加载Prometheus配置:curl -X POST http://localhost:9090/-/reload
3. 查看服务状态:systemctl status prometheussystemctl status alertmanagersystemctl status prometheus-webhook
#!/bin/bash
#
# 一键部署Prometheus监控系统脚本 (精简版)
# 该脚本将自动安装和配置Prometheus和Alertmanagerset -e# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
NC='\033[0m' # No Color# 安装路径
INSTALL_DIR="/data"
PROMETHEUS_VERSION="2.49.1"
ALERTMANAGER_VERSION="0.26.0"# Webhook配置
WEBHOOK_PORT="58888"
LARK_WEBHOOK_URL="https://open.larksuite.com/open-apis/bot/v2/hook/-0627-4bc8--"# 检查是否为root用户
if [ "$(id -u)" != "0" ]; thenecho -e "${RED}此脚本必须以root用户身份运行${NC}" 1>&2exit 1
fiecho -e "${GREEN}===== 开始部署Prometheus监控系统 =====${NC}"# 创建安装目录
mkdir -p ${INSTALL_DIR}
cd ${INSTALL_DIR}# 创建必要的目录结构
echo -e "${YELLOW}[*] 创建目录结构...${NC}"
mkdir -p ${INSTALL_DIR}/prometheus/{config,rules,data}
mkdir -p ${INSTALL_DIR}/alertmanager# 下载并安装Prometheus
install_prometheus() {# 检查是否已安装if command -v prometheus &> /dev/null; thenecho -e "${GREEN}[✓] Prometheus已安装,跳过安装步骤${NC}"returnfiecho -e "${YELLOW}[*] 下载并安装Prometheus...${NC}"# 下载Prometheusif [ ! -f "prometheus-${PROMETHEUS_VERSION}.linux-amd64.tar.gz" ]; thenwget https://github.com/prometheus/prometheus/releases/download/v${PROMETHEUS_VERSION}/prometheus-${PROMETHEUS_VERSION}.linux-amd64.tar.gzfi# 解压Prometheustar -xzf prometheus-${PROMETHEUS_VERSION}.linux-amd64.tar.gzcp prometheus-${PROMETHEUS_VERSION}.linux-amd64/prometheus ${INSTALL_DIR}/prometheus/cp prometheus-${PROMETHEUS_VERSION}.linux-amd64/promtool ${INSTALL_DIR}/prometheus/cp -r prometheus-${PROMETHEUS_VERSION}.linux-amd64/consoles ${INSTALL_DIR}/prometheus/cp -r prometheus-${PROMETHEUS_VERSION}.linux-amd64/console_libraries ${INSTALL_DIR}/prometheus/# 创建Prometheus配置文件cat >${INSTALL_DIR}/prometheus/prometheus.yml<<EOF
# my global config
global:scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.# scrape_timeout is set to the global default (10s).# Alertmanager configuration
alerting:alertmanagers:- static_configs:- targets:- localhost:9093
rule_files:- "${INSTALL_DIR}/prometheus/rules/*.yaml"# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:# The job name is added as a label \`job=<job_name>\` to any timeseries scraped from this config.- job_name: "prometheus"# metrics_path defaults to '/metrics'# scheme defaults to 'http'.static_configs:- targets: ["localhost:9090"]
EOF# 创建示例告警规则mkdir -p ${INSTALL_DIR}/prometheus/rulescat >${INSTALL_DIR}/prometheus/rules/basic-alert.yaml<<EOF
groups:- name: basic-alertsrules:- alert: InstanceDownexpr: up == 0for: 0mlabels:severity: criticalannotations:title: 'Instance down'description: "实例 {{ \$labels.instance }} 已经宕机"
EOF# 创建Prometheus systemd服务cat >/etc/systemd/system/prometheus.service<<EOF
[Unit]
Description=Prometheus Service
Wants=network-online.target
After=network-online.target[Service]
User=root
Group=root
Type=simple
ExecStart=${INSTALL_DIR}/prometheus/prometheus --config.file=${INSTALL_DIR}/prometheus/prometheus.yml --web.enable-lifecycle --storage.tsdb.path=${INSTALL_DIR}/prometheus/data
Restart=on-failure[Install]
WantedBy=multi-user.target
EOF# 重新加载systemd并启动Prometheussystemctl daemon-reloadsystemctl enable prometheussystemctl start prometheusecho -e "${GREEN}[✓] Prometheus安装完成${NC}"
}# 下载并安装Alertmanager
install_alertmanager() {# 检查是否已安装if command -v alertmanager &> /dev/null; thenecho -e "${GREEN}[✓] Alertmanager已安装,跳过安装步骤${NC}"returnfiecho -e "${YELLOW}[*] 下载并安装Alertmanager...${NC}"# 下载Alertmanagerif [ ! -f "alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz" ]; thenwget https://github.com/prometheus/alertmanager/releases/download/v${ALERTMANAGER_VERSION}/alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gzfi# 解压Alertmanagertar -xzf alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gzcp alertmanager-${ALERTMANAGER_VERSION}.linux-amd64/alertmanager ${INSTALL_DIR}/alertmanager/cp alertmanager-${ALERTMANAGER_VERSION}.linux-amd64/amtool ${INSTALL_DIR}/alertmanager/# 创建Alertmanager配置文件cat >${INSTALL_DIR}/alertmanager/alertmanager.yml<<EOF
global:resolve_timeout: 1m
route:receiver: 'webhook'group_by: ['alertname']group_wait: 10sgroup_interval: 10srepeat_interval: 30m
receivers:- name: 'webhook'webhook_configs:- url: 'http://127.0.0.1:${WEBHOOK_PORT}/send'send_resolved: true
inhibit_rules:- source_match:severity: 'critical'target_match:severity: 'warning'equal: ['alertname', 'instance']
EOF# 创建Alertmanager systemd服务cat >/etc/systemd/system/alertmanager.service<<EOF
[Unit]
Description=Alertmanager Service
Wants=network-online.target
After=network-online.target[Service]
User=root
Group=root
Type=simple
ExecStart=${INSTALL_DIR}/alertmanager/alertmanager --config.file=${INSTALL_DIR}/alertmanager/alertmanager.yml
Restart=on-failure[Install]
WantedBy=multi-user.target
EOF# 重新加载systemd并启动Alertmanagersystemctl daemon-reloadsystemctl enable alertmanagersystemctl start alertmanagerecho -e "${GREEN}[✓] Alertmanager安装完成${NC}"
}# 安装并配置简单版Webhook服务
install_webhook() {echo -e "${YELLOW}[*] 安装简单版Webhook服务...${NC}"# 检查Python3是否安装if ! command -v python3 &> /dev/null; thenecho -e "${YELLOW}[*] Python3未安装,正在安装...${NC}"apt-get updateapt-get install -y python3fi# 单独检查pip3是否安装if ! command -v pip3 &> /dev/null; thenecho -e "${YELLOW}[*] pip3未安装,正在安装...${NC}"apt-get updateapt-get install -y python3-pipfi# 安装所需的Python包pip3 install flask requests# 创建webhook目录mkdir -p ${INSTALL_DIR}/webhook# 创建简化版的Flask应用,避免JSON解析错误cat >${INSTALL_DIR}/webhook/app.py<<EOF
from flask import Flask, request, jsonify
import requests
import json
from datetime import datetimeapp = Flask(__name__)def send_to_lark(status, title, description, start_time, end_time="", severity="Unknown", instance="Unknown", alertname="Unknown"):"""发送消息到飞书"""# 设置飞书webhook URLurl = "${LARK_WEBHOOK_URL}"# 格式化时间try:# 将UTC时间转为本地时间start_dt = datetime.strptime(start_time, "%Y-%m-%dT%H:%M:%S.%fZ")start_time_fmt = start_dt.strftime("%Y-%m-%d %H:%M:%S")if end_time:end_dt = datetime.strptime(end_time, "%Y-%m-%dT%H:%M:%S.%fZ")end_time_fmt = end_dt.strftime("%Y-%m-%d %H:%M:%S")else:end_time_fmt = "未结束"except:start_time_fmt = start_timeend_time_fmt = end_time if end_time else "未结束"# 设置消息颜色color = "red" if status == "firing" else "green"status_text = "🔥告警触发" if status == "firing" else "✅告警恢复"# 构建简单卡片消息card = {"msg_type": "interactive","card": {"config": {"wide_screen_mode": True},"header": {"template": color,"title": {"content": f"{status_text}: {title}", "tag": "plain_text"}},"elements": [{"tag": "div","text": {"tag": "lark_md", "content": f"**告警名称**: {alertname}"}},{"tag": "hr"},{"tag": "div","fields": [{"is_short": True,"text": {"tag": "lark_md", "content": f"**状态**: {status}"}},{"is_short": True,"text": {"tag": "lark_md", "content": f"**级别**: {severity}"}}]},{"tag": "div","fields": [{"is_short": True,"text": {"tag": "lark_md", "content": f"**开始时间**: {start_time_fmt}"}},{"is_short": True,"text": {"tag": "lark_md", "content": f"**结束时间**: {end_time_fmt}"}}]},{"tag": "div","fields": [{"is_short": True,"text": {"tag": "lark_md", "content": f"**实例**: {instance}"}}]},{"tag": "hr"},{"tag": "div","text": {"tag": "lark_md", "content": f"**详细信息**: {description}"}}]}}# 发送请求headers = {'Content-Type': 'application/json'}try:response = requests.post(url, json=card, headers=headers)return response.json()except Exception as e:print(f"发送消息失败: {e}")return {"error": str(e)}@app.route("/")
def hello_world():return "<p>Prometheus Alert Webhook Server</p>"@app.route("/send", methods=['POST', 'GET'])
def send_msg():if request.method == 'GET':return "<p>请使用POST请求发送告警信息!</p>"try:# 获取请求数据try:data = request.jsonexcept:return jsonify({"status": "error", "message": "无效的JSON数据"}), 400print("接收到的告警数据:")print(data)# 处理告警responses = []if "alerts" in data:for alert in data["alerts"]:# 提取基本信息status = alert.get("status", "Unknown")start_time = alert.get("startsAt", "Unknown")end_time = alert.get("endsAt", "")# 提取标签labels = alert.get("labels", {})alertname = labels.get("alertname", "Unknown")severity = labels.get("severity", "Unknown")instance = labels.get("instance", "Unknown")# 提取注释annotations = alert.get("annotations", {})title = annotations.get("title", alertname)description = annotations.get("description", "无详细信息")# 发送到飞书response = send_to_lark(status, title, description, start_time, end_time, severity, instance, alertname)responses.append(response)return jsonify({"status": "success", "responses": responses})except Exception as e:print(f"处理请求时出错: {e}")return jsonify({"status": "error", "message": str(e)}), 500if __name__ == '__main__':app.run(host='0.0.0.0', port=${WEBHOOK_PORT})
EOF# 创建systemd服务cat >/etc/systemd/system/prometheus-webhook.service<<EOF
[Unit]
Description=Prometheus Alert Webhook Service
Wants=network-online.target
After=network-online.target[Service]
User=root
Group=root
Type=simple
ExecStart=/usr/bin/python3 ${INSTALL_DIR}/webhook/app.py
Restart=on-failure
WorkingDirectory=${INSTALL_DIR}/webhook[Install]
WantedBy=multi-user.target
EOF# 重新加载systemd并启动webhook服务systemctl daemon-reloadsystemctl enable prometheus-webhooksystemctl start prometheus-webhookecho -e "${GREEN}[✓] Webhook服务安装完成${NC}"
}# 创建测试脚本
create_test_script() {echo -e "${YELLOW}[*] 创建测试脚本...${NC}"cat >${INSTALL_DIR}/webhook/test-webhook.sh<<EOF
#!/bin/bash# 设置webhook地址
WEBHOOK_URL="http://localhost:${WEBHOOK_PORT}/send"# 当前时间(UTC格式)
CURRENT_TIME=\$(date -u +"%Y-%m-%dT%H:%M:%S.000Z")# 模拟一个测试告警的JSON数据
curl -X POST \$WEBHOOK_URL \\-H "Content-Type: application/json" \\-d '{"alerts": [{"status": "firing","labels": {"alertname": "测试告警","severity": "critical","instance": "test-server-01"},"annotations": {"title": "测试告警标题","description": "这是一条测试告警,用于验证webhook是否正常工作"},"startsAt": "'\$CURRENT_TIME'","endsAt": "","generatorURL": "http://prometheus.example.com/graph","fingerprint": "c1bb9a35f9844428"}]
}'echo "测试告警已发送,请检查飞书是否收到消息"
EOFchmod +x ${INSTALL_DIR}/webhook/test-webhook.shecho -e "${GREEN}[✓] 测试脚本已创建: ${INSTALL_DIR}/webhook/test-webhook.sh${NC}"
}# 显示访问信息
show_info() {echo -e "\n${GREEN}===== 安装完成 =====${NC}"echo -e "${YELLOW}Prometheus访问地址: http://$(hostname -I | awk '{print $1}'):9090${NC}"echo -e "${YELLOW}Alertmanager访问地址: http://$(hostname -I | awk '{print $1}'):9093${NC}"echo -e "${YELLOW}Webhook服务地址: http://$(hostname -I | awk '{print $1}'):${WEBHOOK_PORT}${NC}"echo -e "\n${GREEN}重要提示:${NC}"echo -e "1. 使用以下命令测试webhook是否正常工作:"echo -e " ${YELLOW}${INSTALL_DIR}/webhook/test-webhook.sh${NC}"echo -e "2. 使用以下命令重新加载Prometheus配置:"echo -e " ${YELLOW}curl -X POST http://localhost:9090/-/reload${NC}"echo -e "3. 查看服务状态:"echo -e " ${YELLOW}systemctl status prometheus${NC}"echo -e " ${YELLOW}systemctl status alertmanager${NC}"echo -e " ${YELLOW}systemctl status prometheus-webhook${NC}"
}# 主函数
main() {install_prometheusinstall_alertmanagerinstall_webhookcreate_test_scriptshow_info
}# 执行主函数
main
