Files
wx_service/deploy/monitoring/expiry_alert_rules.yml
T

30 lines
1.1 KiB
YAML

groups:
- name: expiry-api-rules
rules:
- alert: ExpiryApiDown
expr: up{job="wx_service_api"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "wx_service API 实例不可用"
description: "实例 {{ $labels.instance }} 已连续 1 分钟不可用"
- alert: ExpiryApiHighLatencyP95
expr: histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{job="wx_service_api",path=~"/api/expiry/.*"}[5m])) by (le)) > 0.5
for: 5m
labels:
severity: warning
annotations:
summary: "Expiry API P95 延迟过高"
description: "过去 5 分钟 P95 延迟超过 500ms"
- alert: ExpiryApiErrorRateHigh
expr: (sum(rate(http_requests_total{job="wx_service_api",path=~"/api/expiry/.*",status=~"5.."}[5m])) / sum(rate(http_requests_total{job="wx_service_api",path=~"/api/expiry/.*"}[5m]))) > 0.03
for: 5m
labels:
severity: warning
annotations:
summary: "Expiry API 5xx 错误率过高"
description: "过去 5 分钟 5xx 错误率超过 3%"