Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gitpod-io
GitHub Repository: gitpod-io/gitpod
Path: blob/main/operations/observability/mixins/meta/rules/usage.yaml
2500 views
1
# Copyright (c) 2022 Gitpod GmbH. All rights reserved.
2
# Licensed under the GNU Affero General Public License (AGPL).
3
# See License.AGPL.txt in the project root for license information.
4
5
apiVersion: monitoring.coreos.com/v1
6
kind: PrometheusRule
7
metadata:
8
labels:
9
prometheus: k8s
10
role: alert-rules
11
name: usage-monitoring-rules
12
spec:
13
groups:
14
- name: usage
15
rules:
16
- alert: GitpodUsageReconcileUsageFailures
17
expr: sum(increase(grpc_server_handled_total{grpc_service="usage.v1.UsageService", grpc_method="ReconcileUsage", grpc_code!="OK"}[30m])) > 1
18
for: 30m
19
labels:
20
severity: warning
21
team: webapp
22
annotations:
23
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodUsageReconcileUsageFailures.md
24
summary: There are failed usage reconciliations.
25
description: We have accumulated {{ printf "%.2f" $value }} failures. This affects how up-to-date usage data is.
26
27
- alert: GitpodUsageReconcileInvoicesFailures
28
expr: sum(increase(grpc_server_handled_total{grpc_service="usage.v1.BillingService", grpc_method="ReconcileInvoices", grpc_code!="OK"}[30m])) > 1
29
for: 30m
30
labels:
31
severity: warning
32
team: webapp
33
annotations:
34
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodUsageReconcileInvoicesFailures.md
35
summary: There are failed Stripe invoice reconciliations.
36
description: We have accumulated {{ printf "%.2f" $value }} failures. This affects how much customers will be billed.
37
38
- alert: GitpodUsageBillingServiceFinalizeInvoiceFailures
39
expr: sum(increase(grpc_server_handled_total{grpc_service="usage.v1.BillingService", grpc_method="FinalizeInvoice", grpc_code!="OK"}[30m])) > 1
40
for: 30m
41
labels:
42
severity: warning
43
team: webapp
44
annotations:
45
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodUsageBillingServiceFinalizeInvoiceFailures.md
46
summary: Invoice finalization is failing. We are not balancing out user/team usage.
47
description: We have accumulated {{ printf "%.2f" $value }} failures. This affects if customers have their balance reset and can therefore start new workspaces.
48
49
- alert: GitpodUsageTooLongSinceLastSuccessfulLedgerReconciliation
50
expr: (time() - gitpod_usage_ledger_last_completed_time{outcome="success"}) > 60 * 60
51
for: 30m
52
labels:
53
severity: warning
54
team: webapp
55
annotations:
56
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodUsageTooLongSinceLastSuccessfulLedgerReconciliation.md
57
summary: Usage reconciliation has not run successfully for {{ printf "%.2f" $value }} seconds. Usage data is stale.
58
description: We have not executed scheduled usage reconciliation for {{ printf "%.2f" $value }} seconds. We expect the data to update every 15 minutes to avoid stale usage records and stale invoices.
59
60
- alert: UsageHighCPUUsage
61
# Reasoning: high rates of CPU consumption should only be temporary.
62
expr: avg(rate(container_cpu_usage_seconds_total{container!="POD", pod=~"usage-.*"}[5m])) by (cluster) > 0.2
63
for: 10m
64
labels:
65
# sent to the team internal channel until we fine tuned it
66
severity: warning
67
team: webapp
68
annotations:
69
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/WebAppServicesHighCPUUsage.md
70
summary: Usage has excessive CPU usage.
71
description: Usage is consumming too much CPU. Please investigate.
72
dashboard_url: https://grafana.gitpod.io/d/6581e46e4e5c7ba40a07646395ef7b23/kubernetes-compute-resources-pod?var-cluster={{ $labels.cluster }}&var-namespace=default
73
74