监控告警


- 部署

1. 部署Alertmanager
[root@slave-2 monitor]# wget https://github.com/prometheus/alertmanager/releases/download/v0.23.0/alertmanager-0.23.0.linux-amd64.tar.gz
[root@slave-2 monitor]# tar -zxvf alertmanager-0.23.0.linux-amd64.tar.gz
[root@slave-2 monitor]# mv alertmanager-0.23.0.linux-amd64 alertmanager

[root@slave-2 system]# vi alertmanager.service
[Unit]
Description=alertmanager
[Service]
ExecStart=/opt/monitor/alertmanager/alertmanager --config.file=/opt/monitor/alertmanager/alertmanager.yml
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
[Install]
WantedBy=multi-user.target

[root@slave-2 system]# systemctl daemon-reload
[root@slave-2 system]# systemctl start alertmanager
[root@slave-2 system]# systemctl enable alertmanager

http://192.168.2.62:9093/

2. 配置告警接收人
[root@slave-2 alertmanager]# vi alertmanager.yml

global:
resolve_timeout: 5m
# 邮箱服务器
smtp_smarthost: 'smtp.163.com:25'
smtp_from: '****@163.com'
smtp_auth_username: '****@163.com'
smtp_auth_password: '****'
smtp_require_tls: false
# 配置路由树
route:
group_by: ['alertname'] # 根据告警规则组名进行分组
group_wait: 10s # 分组内第一个告警等待时间,
10s内如有第二个告警会合并一个告警
group_interval: 10s # 发送新告警间隔时间
repeat_interval: 1h # 重复告警间隔发送时间
receiver: 'mail'
# 接收人
receivers:
- name: 'mail'
email_configs:
- to: '****@qq.com'

[root@slave-2 alertmanager]# systemctl restart alertmanager
3. 配置Prometheus与Alertmanager通信

[root@slave-2 prometheus]# vi prometheus.yml
# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
       - 192.168.2.62:9093
4. 在Prometheus中创建告警规则
[root@slave-2 prometheus]# vi prometheus.yml
rule_files:
   - "rules/*.yml"
  # - "second_rules.yml"

- 告警状态

- 告警收敛(分组,抑制,静默)

- Prometheus一条告警怎么触发的

- 编写告警规则案例