From e537ee274de95b62eb4012c807e70ddc7b465fd4 Mon Sep 17 00:00:00 2001 From: ATM Platform Date: Tue, 16 Jun 2026 21:34:41 +0000 Subject: [PATCH] feat(monitoring): regras de alerta Athletic Map (backup/health) --- platform/monitoring/athleticmap-rules.yaml | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 platform/monitoring/athleticmap-rules.yaml diff --git a/platform/monitoring/athleticmap-rules.yaml b/platform/monitoring/athleticmap-rules.yaml new file mode 100644 index 0000000..1d19d99 --- /dev/null +++ b/platform/monitoring/athleticmap-rules.yaml @@ -0,0 +1,24 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: athleticmap-rules + namespace: monitoring + labels: { release: monitoring } +spec: + groups: + - name: athleticmap.backup + rules: + - alert: AthleticMapBackupStale + expr: time() - kube_cronjob_status_last_successful_time{cronjob="pg-backup"} > 93600 + for: 15m + labels: { severity: warning } + annotations: + summary: "Backup do tenant {{ \.namespace }} sem sucesso ha >26h" + - name: athleticmap.health + rules: + - alert: AthleticMapTenantPodNotReady + expr: kube_pod_status_ready{namespace=~".*-prod", condition="true"} == 0 + for: 10m + labels: { severity: warning } + annotations: + summary: "Pod {{ \.pod }} ({{ \.namespace }}) nao-Ready ha >10m"