Path: blob/main/components/ws-manager-bridge/src/metrics.ts
2498 views
/**1* Copyright (c) 2020 Gitpod GmbH. All rights reserved.2* Licensed under the GNU Affero General Public License (AGPL).3* See License.AGPL.txt in the project root for license information.4*/56import * as prom from "prom-client";7import { injectable } from "inversify";8import { WorkspaceInstance } from "@gitpod/gitpod-protocol";9import { WorkspaceClusterWoTLS } from "@gitpod/gitpod-protocol/lib/workspace-cluster";10import { WorkspaceType } from "@gitpod/gitpod-protocol";1112@injectable()13export class Metrics {14protected readonly workspaceStartupTimeHistogram: prom.Histogram<string>;15protected readonly timeToFirstUserActivityHistogram: prom.Histogram<string>;16protected readonly clusterScore: prom.Gauge<string>;17protected readonly clusterCordoned: prom.Gauge<string>;18protected readonly statusUpdatesTotal: prom.Counter<string>;19protected readonly staleStatusUpdatesTotal: prom.Counter<string>;20protected readonly stalePrebuildEventsTotal: prom.Counter<string>;21protected readonly prebuildsCompletedTotal: prom.Counter<string>;22protected readonly instanceMarkedStoppedTotal: prom.Counter<string>;2324protected readonly workspaceInstanceUpdateStartedTotal: prom.Counter<string>;25protected readonly workspaceInstanceUpdateCompletedSeconds: prom.Histogram<string>;2627protected readonly updatesPublishedTotal: prom.Counter<string>;2829protected activeClusterNames = new Set<string>();3031constructor() {32this.workspaceStartupTimeHistogram = new prom.Histogram({33name: "workspace_startup_time",34help: "The time until a workspace instance is marked running",35labelNames: ["neededImageBuild", "region"],36buckets: prom.exponentialBuckets(2, 2, 10),37});38this.timeToFirstUserActivityHistogram = new prom.Histogram({39name: "first_user_activity_time",40help: "The time between a workspace is running and first user activity",41labelNames: ["region"],42buckets: prom.exponentialBuckets(2, 2, 10),43});44this.clusterScore = new prom.Gauge({45name: "gitpod_ws_manager_bridge_cluster_score",46help: "Score of the individual registered workspace cluster",47labelNames: ["workspace_cluster"],48});49this.clusterCordoned = new prom.Gauge({50name: "gitpod_ws_manager_bridge_cluster_cordoned",51help: "Cordoned status of the individual registered workspace cluster",52labelNames: ["workspace_cluster"],53});54this.statusUpdatesTotal = new prom.Counter({55name: "gitpod_ws_manager_bridge_status_updates_total",56help: "Total workspace status updates received",57labelNames: ["workspace_cluster", "known_instance"],58});59this.staleStatusUpdatesTotal = new prom.Counter({60name: "gitpod_ws_manager_bridge_stale_status_updates_total",61help: "Total count of stale status updates received by workspace manager bridge",62});63this.stalePrebuildEventsTotal = new prom.Counter({64name: "gitpod_ws_manager_bridge_stale_prebuild_events_total",65help: "Total count of stale prebuild events received by workspace manager bridge",66});6768this.workspaceInstanceUpdateStartedTotal = new prom.Counter({69name: "gitpod_ws_manager_bridge_workspace_instance_update_started_total",70help: "Total number of workspace instance updates that started processing",71labelNames: ["workspace_cluster", "workspace_instance_type"],72});7374this.workspaceInstanceUpdateCompletedSeconds = new prom.Histogram({75name: "gitpod_ws_manager_bridge_workspace_instance_update_completed_seconds",76help: "Histogram of completed workspace instance updates, by outcome",77labelNames: ["workspace_cluster", "workspace_instance_type", "outcome"],78buckets: prom.exponentialBuckets(0.05, 2, 8),79});8081this.prebuildsCompletedTotal = new prom.Counter({82name: "gitpod_prebuilds_completed_total",83help: "Counter of total prebuilds ended.",84labelNames: ["state"],85});8687this.instanceMarkedStoppedTotal = new prom.Counter({88name: "gitpod_ws_instances_marked_stopped_total",89help: "Counter of total instances marked stopped by the ws-manager-bridge",90labelNames: ["previous_phase"],91});92}9394observeWorkspaceStartupTime(instance: WorkspaceInstance): void {95const timeToRunningSecs =96(new Date(instance.startedTime!).getTime() - new Date(instance.creationTime).getTime()) / 1000;97this.workspaceStartupTimeHistogram.observe(98{99neededImageBuild: JSON.stringify(instance.status.conditions.neededImageBuild),100region: instance.region,101},102timeToRunningSecs,103);104}105106observeFirstUserActivity(instance: WorkspaceInstance, firstUserActivity: string): void {107if (!instance.startedTime) {108return;109}110111const timeToFirstUserActivity =112(new Date(firstUserActivity).getTime() - new Date(instance.startedTime!).getTime()) / 1000;113this.timeToFirstUserActivityHistogram.observe(114{115region: instance.region,116},117timeToFirstUserActivity,118);119}120121updateClusterMetrics(clusters: WorkspaceClusterWoTLS[]): void {122const newActiveClusterNames = new Set<string>();123clusters.forEach((cluster) => {124this.clusterCordoned.labels(cluster.name).set(cluster.state === "cordoned" ? 1 : 0);125this.clusterScore.labels(cluster.name).set(cluster.score);126newActiveClusterNames.add(cluster.name);127});128129const noLongerActiveCluster = Array.from(this.activeClusterNames).filter((c) => !newActiveClusterNames.has(c));130noLongerActiveCluster.forEach((clusterName) => {131this.clusterCordoned.remove(clusterName);132this.clusterScore.remove(clusterName);133});134this.activeClusterNames = newActiveClusterNames;135}136137statusUpdateReceived(installation: string, knownInstance: boolean): void {138this.statusUpdatesTotal.labels(installation, knownInstance ? "true" : "false").inc();139}140141recordStaleStatusUpdate(): void {142this.staleStatusUpdatesTotal.inc();143}144145recordStalePrebuildEvent(): void {146this.stalePrebuildEventsTotal.inc();147}148149reportWorkspaceInstanceUpdateStarted(workspaceCluster: string, type: WorkspaceType): void {150this.workspaceInstanceUpdateStartedTotal.labels(workspaceCluster, type).inc();151}152153reportWorkspaceInstanceUpdateCompleted(154durationSeconds: number,155workspaceCluster: string,156type: WorkspaceType,157skippedUpdate: boolean,158error?: Error,159): void {160const outcome = skippedUpdate ? "skipped" : error ? "error" : "success";161this.workspaceInstanceUpdateCompletedSeconds.labels(workspaceCluster, type, outcome).observe(durationSeconds);162}163164increasePrebuildsCompletedCounter(state: string) {165this.prebuildsCompletedTotal.inc({ state });166}167168increaseInstanceMarkedStoppedCounter(previous_phase: string) {169this.instanceMarkedStoppedTotal.inc({ previous_phase });170}171}172173174