Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gitpod-io
GitHub Repository: gitpod-io/gitpod
Path: blob/main/operations/observability/mixins/workspace/rules/satellite/workspaces.yaml
2506 views
1
# Copyright (c) 2022 Gitpod GmbH. All rights reserved.
2
# Licensed under the GNU Affero General Public License (AGPL).
3
# See License.AGPL.txt in the project root for license information.
4
5
apiVersion: monitoring.coreos.com/v1
6
kind: PrometheusRule
7
metadata:
8
labels:
9
prometheus: k8s
10
role: alert-rules
11
name: workspace-monitoring-satellite-rules
12
spec:
13
groups:
14
- name: workspace-rules
15
rules:
16
- record: gitpod_workspace_regular_not_active_percentage_mk2
17
expr: |
18
sum(gitpod_ws_manager_mk2_workspace_activity_total{active="false"}) by (cluster) / sum(gitpod_ws_manager_mk2_workspace_activity_total) by (cluster)
19
20
- name: workspace-alerts
21
rules:
22
- alert: GitpodWorkspaceTooManyRegularNotActiveMk2
23
labels:
24
severity: critical
25
for: 10m
26
annotations:
27
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodWorkspaceRegularNotActive.md
28
summary: too many running but inactive workspaces
29
description: too many running but inactive workspaces. lower bound is 20 "regular not active" workspaces to reduce the false-positive rate.
30
# bumped from 20 to 40 temporarily
31
expr: |
32
sum(gitpod_workspace_regular_not_active_percentage_mk2) by(cluster) > 0.08
33
AND
34
sum (gitpod_ws_manager_mk2_workspace_activity_total{active="false"}) by (cluster) > 40
35
36
- alert: GitpodWorkspacesNotStartingMk2
37
labels:
38
severity: critical
39
for: 10m
40
annotations:
41
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodWorkspaceRegularNotActive.md
42
summary: workspaces are not starting.
43
description: inactive regular workspaces exists but workspaces are not being started.
44
expr: |
45
sum by(cluster) (avg_over_time(gitpod_workspace_regular_not_active_percentage_mk2[1m]) > 0)
46
AND
47
sum by(cluster) (rate(gitpod_ws_manager_mk2_workspace_startup_seconds_sum{type="Regular"}[1m])) == 0
48
- alert: GitpodWsManagerMk2BackupFailureError
49
labels:
50
severity: error
51
team: engine
52
annotations:
53
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/WorkspaceBackupFailures.md
54
summary: Workspace backups failed recently in cluster {{ $labels.cluster }}
55
description: This can happen when a single node has failed in the cloud provider
56
expr: |
57
sum by (cluster) (increase(gitpod_ws_manager_mk2_workspace_backups_failure_total{cluster!~"ephemeral.*"}[1h])) > 0
58
AND
59
sum by (cluster) (increase(gitpod_ws_manager_mk2_workspace_backups_failure_total{cluster!~"ephemeral.*"}[1h])) < 16
60
- alert: GitpodWsManagerMk2BackupFailureCritical
61
labels:
62
severity: critical
63
team: engine
64
annotations:
65
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/WorkspaceBackupFailures.md
66
summary: Workspace backups failed recently in cluster {{ $labels.cluster }}
67
description: This can be an indicator of two or more nodes failing in a cloud provider
68
expr: |
69
sum by (cluster) (increase(gitpod_ws_manager_mk2_workspace_backups_failure_total{cluster!~"ephemeral.*"}[1h])) >= 16
70
71