Prometheus 钉钉告警模板
Alertrules
groups:
- name: 'node running status'
rules:
- alert: 'Instance Down'
expr: 'up == 0'
for: 5s
annotations:
title: 'Instance Down'
description: "{{ $labels.instance }}down"
labels:
robot: 'jcss'
severity: 'warning'
owner: 'xxxxxxxxxxx'
- name: 'node memory usage'
rules:
- alert: 'memory usage'
expr: '((node_memory_MemTotal_bytes - node_memory_MemFree_bytes) / node_memory_MemTotal_bytes * 100)> 85'
for: 5s
annotations:
title: 'Mem'
description: '{{ $labels.instance }} Memusage {{ $value }}'
labels:
robot: 'jcss'
ops: 'true'
severity: 'warning'
owner: "xxxxxxxxxxx"
Alertmanager Router
route:
group_by: ['alertname']
group_wait: 30s
group_interval: 1s
repeat_interval: 30s
receiver: 'ops'
routes:
- match:
ops: 'true'
receiver: 'ops'
continue: true
- match:
robot : 'jcss'
receiver: 'jcss'
receivers:
- name: 'ops'
webhook_configs:
- url: 'http://notice.liyblog.com:8060/dingtalk/ops/send'
- name: 'jcss'
webhook_configs:
- url: 'http://notice.liyblog.com:8060/dingtalk/jcss/send'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
Prometheus-Webhook-Dingtalk
config.yml
templates:
- contrib/templates/*.tmpl
targets:
jcss:
url: https://oapi.dingtalk.com/robot/send?access_token=
secret:
mention:
mobiles: ['xxxxxxxxxxx']
ops:
url: https://oapi.dingtalk.com/robot/send?access_token=
secret:
message:
title: '{{ template "ops.title" . }}'
text: '{{ template "ops.content" . }}'
default.tmpl
{{ define "__subject" }}
[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}]
{{ end }}
{{ define "__alert_list" }}{{ range . }}
---
{{ if .Labels.owner }}@{{ .Labels.owner }}{{ end }}
**告警名称**: {{ index .Annotations "title" }}
**告警级别**: {{ .Labels.severity }}
**告警主机**: {{ .Labels.instance }}
**告警信息**: {{ index .Annotations "description" }}
**告警时间**: {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
{{ end }}{{ end }}
{{ define "__resolved_list" }}{{ range . }}
---
{{ if .Labels.owner }}@{{ .Labels.owner }}{{ end }}
**告警名称**: {{ index .Annotations "title" }}
**告警级别**: {{ .Labels.severity }}
**告警主机**: {{ .Labels.instance }}
**告警信息**: {{ index .Annotations "description" }}
**告警时间**: {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
**恢复时间**: {{ dateInZone "2006.01.02 15:04:05" (.EndsAt) "Asia/Shanghai" }}
{{ end }}{{ end }}
{{ define "default.title" }}
{{ template "__subject" . }}
{{ end }}
{{ define "default.content" }}
{{ if gt (len .Alerts.Firing) 0 }}
**====侦测到{{ .Alerts.Firing | len }}个故障====**
{{ template "__alert_list" .Alerts.Firing }}
---
{{ end }}
{{ if gt (len .Alerts.Resolved) 0 }}
**====恢复{{ .Alerts.Resolved | len }}个故障====**
{{ template "__resolved_list" .Alerts.Resolved }}
{{ end }}
{{ end }}
{{ define "ding.link.title" }}{{ template "default.title" . }}{{ end }}
{{ define "ding.link.content" }}{{ template "default.content" . }}{{ end }}
{{ template "default.title" . }}
{{ template "default.content" . }}
ops.tmpl
{{ define "__ops_alert_list" }}{{ range . }}
---
**告警名称**: {{ index .Annotations "title" }}
**告警级别**: {{ .Labels.severity }}
**告警主机**: {{ .Labels.instance }}
**告警信息**: {{ index .Annotations "description" }}
**告警时间**: {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
{{ end }}{{ end }}
{{ define "__ops_resolved_list" }}{{ range . }}
---
**告警名称**: {{ index .Annotations "title" }}
**告警级别**: {{ .Labels.severity }}
**告警主机**: {{ .Labels.instance }}
**告警信息**: {{ index .Annotations "description" }}
**告警时间**: {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
**恢复时间**: {{ dateInZone "2006.01.02 15:04:05" (.EndsAt) "Asia/Shanghai" }}
{{ end }}{{ end }}
{{ define "ops.title" }}
{{ template "__subject" . }}
{{ end }}
{{ define "ops.content" }}
{{ if gt (len .Alerts.Firing) 0 }}
**====侦测到{{ .Alerts.Firing | len }}个故障====**
{{ template "__ops_alert_list" .Alerts.Firing }}
---
{{ end }}
{{ if gt (len .Alerts.Resolved) 0 }}
**====恢复{{ .Alerts.Resolved | len }}个故障====**
{{ template "__ops_resolved_list" .Alerts.Resolved }}
{{ end }}
{{ end }}
参考资料
Prometheus 官网
作者【SoulChild随笔记】的alertmanager自定义告警模板