Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gitpod-io
GitHub Repository: gitpod-io/gitpod
Path: blob/main/operations/observability/mixins/meta/rules/proxy.yaml
2500 views
1
apiVersion: monitoring.coreos.com/v1
2
kind: PrometheusRule
3
metadata:
4
labels:
5
prometheus: k8s
6
role: alert-rules
7
name: proxy-monitoring-rules
8
spec:
9
groups:
10
- name: dashboard
11
rules:
12
- alert: ProxyHighCPUUsage
13
# Reasoning: high rates of CPU consumption should only be temporary.
14
expr: avg(rate(container_cpu_usage_seconds_total{container!="POD", pod=~"proxy-.*"}[5m])) by (cluster) > 0.1
15
for: 10m
16
labels:
17
# sent to the team internal channel until we fine tuned it
18
severity: warning
19
team: webapp
20
annotations:
21
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/WebAppServicesHighCPUUsage.md
22
summary: Proxy has excessive CPU usage.
23
description: Proxy is consumming too much CPU. Please investigate.
24
dashboard_url: https://grafana.gitpod.io/d/6581e46e4e5c7ba40a07646395ef7b23/kubernetes-compute-resources-pod?var-cluster={{ $labels.cluster }}&var-namespace=default
25
- name: proxy
26
rules:
27
- alert: ProxyBadGateway
28
# Reasoning: The highest peak of 502's for PAYG is 0.00007 in 5m, and this was not impactful for users.
29
expr: |
30
sum(increase(caddy_http_response_duration_seconds_count{code="502"}[5m])) / sum(increase(caddy_http_response_duration_seconds_count[5m])) > 0.001
31
labels:
32
severity: critical
33
team: webapp
34
annotations:
35
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/ProxyBadGateway.md
36
summary: Caddy is having trouble serving requests for backends
37
description: The user experience is degraded, analyze logs to see which routes are impacted
38
39