groups: - name: expiry-api-rules rules: - alert: ExpiryApiDown expr: up{job="wx_service_api"} == 0 for: 1m labels: severity: critical annotations: summary: "wx_service API 实例不可用" description: "实例 {{ $labels.instance }} 已连续 1 分钟不可用" - alert: ExpiryApiHighLatencyP95 expr: histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{job="wx_service_api",path=~"/api/expiry/.*"}[5m])) by (le)) > 0.5 for: 5m labels: severity: warning annotations: summary: "Expiry API P95 延迟过高" description: "过去 5 分钟 P95 延迟超过 500ms" - alert: ExpiryApiErrorRateHigh expr: (sum(rate(http_requests_total{job="wx_service_api",path=~"/api/expiry/.*",status=~"5.."}[5m])) / sum(rate(http_requests_total{job="wx_service_api",path=~"/api/expiry/.*"}[5m]))) > 0.03 for: 5m labels: severity: warning annotations: summary: "Expiry API 5xx 错误率过高" description: "过去 5 分钟 5xx 错误率超过 3%"