Path: blob/main/operations/observability/mixins/workspace/rules/central/image-builder.yaml
2506 views
# Copyright (c) 2022 Gitpod GmbH. All rights reserved.1# Licensed under the GNU Affero General Public License (AGPL).2# See License.AGPL.txt in the project root for license information.34apiVersion: monitoring.coreos.com/v15kind: PrometheusRule6metadata:7labels:8prometheus: k8s9role: alert-rules10name: image-builder-central-monitoring-rules11spec:12groups:13- name: image-builder-central14rules:15- alert: GitpodImageBuildDurationAnomaly16labels:17severity: warning18team: engine19annotations:20runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodImageBuildDurationAnomaly.md21summary: image builds are happening too frequently in cluster {{ $labels.cluster }}22description: Users are waiting more often for image builds23expr: |24(25(26avg_over_time(gitpod_ws_manager_mk2_workspace_phase_total{phase="Running", type="ImageBuild", cluster!~"ephemeral.*"}[4h])27- avg_over_time(gitpod_ws_manager_mk2_workspace_phase_total{phase="Running", type="ImageBuild", cluster!~"ephemeral.*"}[7d])28)29/ stddev_over_time(gitpod_ws_manager_mk2_workspace_phase_total{phase="Running", type="ImageBuild", cluster!~"ephemeral.*"}[30d])30) > 2.531- alert: GitpodImageBuilderCrashlooping32labels:33severity: critical34dedicated: included35annotations:36runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodWorkspaceDeploymentCrashlooping.md37summary: image-builder-mk3 is crash looping in cluster {{ $labels.cluster }}38description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 3 minutes.39expr: |40increase(kube_pod_container_status_restarts_total{container="image-builder-mk3", cluster!~"ephemeral.*"}[1m]) > 341for: 3m42- alert: GitpodImageBuilderReplicasMismatch43labels:44severity: critical45dedicated: included46annotations:47runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodWorkspaceDeploymentReplicaMismatch.md48summary: Desired number of replicas for image-builder-mk3 are not available in cluster {{ $labels.cluster }}49description: 'Desired number of replicas for image-builder-mk3 are not available in cluster {{ $labels.cluster }}: {{ printf "%.2f" $value }} are missing'50expr: |51kube_deployment_spec_replicas{deployment="image-builder-mk3", cluster!~"ephemeral.*"} != kube_deployment_status_replicas_available{deployment="image-builder-mk3", cluster!~"ephemeral.*"}52for: 3m53- alert: GitpodImageBuilderMk3ReplicaUnavailable54labels:55# TODO(gpl): warning for now, to set it up and fine-tune it56severity: warning57annotations:58runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodWorkspaceDeploymentReplicaMismatch.md59summary: image-builder-mk3 replicas are unavailable in cluster {{ $labels.cluster }}60description: 'image-builder-mk3 pods are unavailable in cluster {{ $labels.cluster }}: {{ printf "%.2f" $value }}'61expr: |62kube_deployment_status_replicas_unavailable{deployment="image-builder-mk3", cluster!~"ephemeral.*"} > 063for: 10m64- alert: GitpodImageBuilderMk3InternalErrors65labels:66severity: warning67dedicated: included68team: engine69annotations:70# runbook is 404 for now71runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodImageBuilderMk3InternalErrors.md72summary: image-builder-mk3 is returning unexpected internal errors73description: 'Check the logs for image-builder-mk3 to inspect the high rate: {{ printf "%.2f" $value }}'74expr: |75sum by() (rate(grpc_server_handled_total{service="image-builder-mk3", grpc_code=~"Internal", grpc_method="Build"}[1h])) > 0.00176for: 1h777879