feat(monitoring): regras de alerta Athletic Map (backup/health)
This commit is contained in:
@@ -0,0 +1,24 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: athleticmap-rules
|
||||
namespace: monitoring
|
||||
labels: { release: monitoring }
|
||||
spec:
|
||||
groups:
|
||||
- name: athleticmap.backup
|
||||
rules:
|
||||
- alert: AthleticMapBackupStale
|
||||
expr: time() - kube_cronjob_status_last_successful_time{cronjob="pg-backup"} > 93600
|
||||
for: 15m
|
||||
labels: { severity: warning }
|
||||
annotations:
|
||||
summary: "Backup do tenant {{ \.namespace }} sem sucesso ha >26h"
|
||||
- name: athleticmap.health
|
||||
rules:
|
||||
- alert: AthleticMapTenantPodNotReady
|
||||
expr: kube_pod_status_ready{namespace=~".*-prod", condition="true"} == 0
|
||||
for: 10m
|
||||
labels: { severity: warning }
|
||||
annotations:
|
||||
summary: "Pod {{ \.pod }} ({{ \.namespace }}) nao-Ready ha >10m"
|
||||
Reference in New Issue
Block a user