Path: blob/main/operations/observability/mixins/self-hosted/rules/observability-stack/prometheus-operator.yaml
3608 views
# Copyright (c) 2022 Gitpod GmbH. All rights reserved.1# Licensed under the GNU Affero General Public License (AGPL).2# See License.AGPL.txt in the project root for license information.34apiVersion: monitoring.coreos.com/v15kind: PrometheusRule6metadata:7labels:8app.kubernetes.io/name: kubernetes9app.kubernetes.io/part-of: kube-prometheus10prometheus: k8s11role: alert-rules12name: prometheus-operator-monitoring-rules13namespace: monitoring-satellite14spec:15groups:16- name: prometheus-operator17rules:18- alert: PrometheusOperatorListErrors19annotations:20description: Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.21summary: Errors while performing list operations in controller.22expr: |23(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring-satellite"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring-satellite"}[10m]))) > 0.424for: 15m25labels:26severity: warning27team: delivery-operations-experience28- alert: PrometheusOperatorWatchErrors29annotations:30description: Errors while performing watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.31summary: Errors while performing watch operations in controller.32expr: |33(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring-satellite"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring-satellite"}[5m]))) > 0.434for: 15m35labels:36severity: warning37team: delivery-operations-experience38- alert: PrometheusOperatorReconcileErrors39annotations:40description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.'41summary: Errors while reconciling controller.42expr: |43(sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring-satellite"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator",namespace="monitoring-satellite"}[5m]))) > 0.144for: 10m45labels:46severity: warning47team: delivery-operations-experience48- alert: ConfigReloaderSidecarErrors49annotations:50description: |-51Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace.52As a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.53summary: config-reloader sidecar has not had a successful reload for 10m54expr: |55max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 056for: 10m57labels:58severity: warning59team: delivery-operations-experience606162