Path: blob/main/components/gitpod-protocol/src/metrics.ts
2498 views
/*---------------------------------------------------------------------------------------------1* Copyright (c) Gitpod. All rights reserved.2* Licensed under the MIT License. See License.txt in the project root for license information.3*--------------------------------------------------------------------------------------------*/45/**6* prom-client is node library, we onyl import some types and values7* not default node metrics8*/9import type {10Registry as PromRegistry,11Counter as PromCounter,12Histogram as PromHistorgram,13MetricObjectWithValues,14MetricValue,15MetricValueWithName,16} from "prom-client";1718const Registry: typeof PromRegistry = require("prom-client/lib/registry");19const Counter: typeof PromCounter = require("prom-client/lib/counter");20const Histogram: typeof PromHistorgram = require("prom-client/lib/histogram");2122import { MethodKind } from "@bufbuild/protobuf";23import {24StreamResponse,25UnaryResponse,26Code,27ConnectError,28Interceptor,29StreamRequest,30UnaryRequest,31} from "@connectrpc/connect";3233type GrpcMethodType = "unary" | "client_stream" | "server_stream" | "bidi_stream";3435interface IGrpcCallMetricsLabels {36service: string;37method: string;38type: GrpcMethodType;39}4041interface IGrpcCallMetricsLabelsWithCode extends IGrpcCallMetricsLabels {42code: string;43}4445const register = new Registry();4647class PrometheusClientCallMetrics {48readonly startedCounter: PromCounter<string>;49readonly sentCounter: PromCounter<string>;50readonly receivedCounter: PromCounter<string>;51readonly handledCounter: PromCounter<string>;52readonly handledSecondsHistogram: PromHistorgram<string>;5354readonly webSocketCounter: PromCounter<string>;5556constructor() {57this.startedCounter = new Counter({58name: "grpc_client_started_total",59help: "Total number of RPCs started on the client.",60labelNames: ["grpc_service", "grpc_method", "grpc_type"],61registers: [register],62});63this.sentCounter = new Counter({64name: "grpc_client_msg_sent_total",65help: " Total number of gRPC stream messages sent by the client.",66labelNames: ["grpc_service", "grpc_method", "grpc_type"],67registers: [register],68});69this.receivedCounter = new Counter({70name: "grpc_client_msg_received_total",71help: "Total number of RPC stream messages received by the client.",72labelNames: ["grpc_service", "grpc_method", "grpc_type"],73registers: [register],74});75this.handledCounter = new Counter({76name: "grpc_client_handled_total",77help: "Total number of RPCs completed by the client, regardless of success or failure.",78labelNames: ["grpc_service", "grpc_method", "grpc_type", "grpc_code"],79registers: [register],80});81this.handledSecondsHistogram = new Histogram({82name: "grpc_client_handling_seconds",83help: "Histogram of response latency (seconds) of the gRPC until it is finished by the application.",84labelNames: ["grpc_service", "grpc_method", "grpc_type", "grpc_code"],85buckets: [0.1, 0.2, 0.5, 1, 2, 5, 10], // it should be aligned with https://github.com/gitpod-io/gitpod/blob/84ed1a0672d91446ba33cb7b504cfada769271a8/install/installer/pkg/components/ide-metrics/configmap.go#L31586registers: [register],87});8889this.webSocketCounter = new Counter({90name: "websocket_client_total",91help: "Total number of WebSocket connections by the client",92labelNames: ["origin", "instance_phase", "status", "code", "was_clean"],93registers: [register],94});95}9697started(labels: IGrpcCallMetricsLabels): void {98this.startedCounter.inc({99grpc_service: labels.service,100grpc_method: labels.method,101grpc_type: labels.type,102});103}104105sent(labels: IGrpcCallMetricsLabels): void {106this.sentCounter.inc({107grpc_service: labels.service,108grpc_method: labels.method,109grpc_type: labels.type,110});111}112113received(labels: IGrpcCallMetricsLabels): void {114this.receivedCounter.inc({115grpc_service: labels.service,116grpc_method: labels.method,117grpc_type: labels.type,118});119}120121handled(labels: IGrpcCallMetricsLabelsWithCode): void {122this.handledCounter.inc({123grpc_service: labels.service,124grpc_method: labels.method,125grpc_type: labels.type,126grpc_code: labels.code,127});128}129130startHandleTimer(131labels: IGrpcCallMetricsLabels,132): (endLabels?: Partial<Record<string, string | number>> | undefined) => number {133const startLabels = {134grpc_service: labels.service,135grpc_method: labels.method,136grpc_type: labels.type,137};138if (typeof window !== "undefined") {139const start = performance.now();140return (endLabels) => {141const delta = performance.now() - start;142const value = delta / 1000;143this.handledSecondsHistogram.labels(Object.assign(startLabels, endLabels)).observe(value);144return value;145};146}147return this.handledSecondsHistogram.startTimer(startLabels);148}149}150151const metrics = new PrometheusClientCallMetrics();152153export function getMetricsInterceptor(): Interceptor {154const getLabels = (req: UnaryRequest | StreamRequest): IGrpcCallMetricsLabels => {155let type: GrpcMethodType;156switch (req.method.kind) {157case MethodKind.Unary:158type = "unary";159break;160case MethodKind.ServerStreaming:161type = "server_stream";162break;163case MethodKind.ClientStreaming:164type = "client_stream";165break;166case MethodKind.BiDiStreaming:167type = "bidi_stream";168break;169}170return {171type,172service: req.service.typeName,173method: req.method.name,174};175};176177return (next) => async (req) => {178async function* incrementStreamMessagesCounter<T>(179iterable: AsyncIterable<T>,180callback: () => void,181handleMetrics: boolean,182): AsyncIterable<T> {183let status: Code | undefined;184try {185for await (const item of iterable) {186callback();187yield item;188}189} catch (e) {190const err = ConnectError.from(e);191status = err.code;192throw e;193} finally {194if (handleMetrics && !settled) {195stopTimer({ grpc_code: status ? Code[status] : "OK" });196metrics.handled({ ...labels, code: status ? Code[status] : "OK" });197}198}199}200201const labels = getLabels(req);202metrics.started(labels);203const stopTimer = metrics.startHandleTimer(labels);204205let settled = false;206let status: Code | undefined;207try {208let request: UnaryRequest | StreamRequest;209if (!req.stream) {210request = req;211} else {212request = {213...req,214message: incrementStreamMessagesCounter(req.message, metrics.sent.bind(metrics, labels), false),215};216}217218const res = await next(request);219220let response: UnaryResponse | StreamResponse;221if (!res.stream) {222response = res;223settled = true;224} else {225response = {226...res,227message: incrementStreamMessagesCounter(res.message, metrics.received.bind(metrics, labels), true),228};229}230231return response;232} catch (e) {233settled = true;234const err = ConnectError.from(e);235status = err.code;236throw e;237} finally {238if (settled) {239stopTimer({ grpc_code: status ? Code[status] : "OK" });240metrics.handled({ ...labels, code: status ? Code[status] : "OK" });241}242}243};244}245246export type MetricsRequest = RequestInit & { url: string };247248export class MetricsReporter {249private static readonly REPORT_INTERVAL = 10000;250251private intervalHandler: NodeJS.Timeout | undefined;252253private readonly metricsHost: string;254255private sendQueue = Promise.resolve();256257private readonly pendingRequests: MetricsRequest[] = [];258259constructor(260private readonly options: {261gitpodUrl: string;262clientName: string;263clientVersion: string;264log: {265error: typeof console.error;266debug: typeof console.debug;267};268isEnabled?: () => Promise<boolean>;269commonErrorDetails: { [key: string]: string | undefined };270},271) {272this.metricsHost = `ide.${new URL(options.gitpodUrl).hostname}`;273if (typeof window !== "undefined") {274this.options.commonErrorDetails["userAgent"] = window.navigator.userAgent;275}276}277278updateCommonErrorDetails(update: { [key: string]: string | undefined }) {279Object.assign(this.options.commonErrorDetails, update);280}281282startReporting() {283if (this.intervalHandler) {284return;285}286this.intervalHandler = setInterval(287() => this.report().catch((e) => this.options.log.error("metrics: error while reporting", e)),288MetricsReporter.REPORT_INTERVAL,289);290}291292stopReporting() {293if (this.intervalHandler) {294clearInterval(this.intervalHandler);295}296}297298private async isEnabled(): Promise<boolean> {299if (!this.options.isEnabled) {300return true;301}302return this.options.isEnabled();303}304305private async report() {306const enabled = await this.isEnabled();307if (!enabled) {308return;309}310if (typeof window !== undefined && !window.navigator.onLine) {311return;312}313314const metrics = await register.getMetricsAsJSON();315register.resetMetrics();316for (const m of metrics) {317if (m.name === "grpc_client_msg_sent_total" || m.name === "grpc_client_msg_received_total") {318// Skip these as thy are filtered by ide metrics319continue;320}321322const type = m.type as unknown as string;323if (type === "counter") {324this.syncReportCounter(m);325} else if (type === "histogram") {326this.syncReportHistogram(m);327}328}329330while (this.pendingRequests.length) {331const request = this.pendingRequests.shift();332if (request) {333// eslint-disable-next-line @typescript-eslint/no-floating-promises334this.send(request);335}336}337}338339private syncReportCounter(metric: MetricObjectWithValues<MetricValue<string>>) {340for (const { value, labels } of metric.values) {341if (value > 0) {342this.push(343this.create("metrics/counter/add/" + metric.name, {344name: metric.name,345labels,346value,347}),348);349}350}351}352353private syncReportHistogram(metric: MetricObjectWithValues<MetricValueWithName<string>>) {354let sum = 0;355let buckets: number[] = [];356for (const { value, labels, metricName } of metric.values) {357if (!metricName) {358continue;359}360// metricName are in the following order _bucket, _sum, _count361// We report on _count as it's the last362// https://github.com/siimon/prom-client/blob/eee34858d2ef4198ff94f56a278d7b81f65e9c63/lib/histogram.js#L222-L235363if (metricName.endsWith("_bucket")) {364if (labels["le"] !== "+Inf") {365buckets.push(value);366}367} else if (metricName.endsWith("_sum")) {368sum = value;369} else if (metricName.endsWith("_count")) {370if (value > 0) {371this.push(372this.create("metrics/histogram/add/" + metric.name, {373name: metric.name,374labels,375count: value,376sum,377buckets,378}),379);380}381sum = 0;382buckets = [];383}384}385}386387reportError(388error: Error,389data?: {390userId?: string;391workspaceId?: string;392instanceId?: string;393[key: string]: string | undefined;394},395): void {396// eslint-disable-next-line @typescript-eslint/no-floating-promises397this.asyncReportError(error, data);398}399400private async asyncReportError(401error: Error,402data?: {403userId?: string;404workspaceId?: string;405instanceId?: string;406[key: string]: string | undefined;407},408): Promise<void> {409const enabled = await this.isEnabled();410if (!enabled) {411return;412}413const properties = { ...data, ...this.options.commonErrorDetails };414properties["error_timestamp"] = new Date().toISOString();415properties["error_name"] = error.name;416properties["error_message"] = error.message;417418if (typeof window !== undefined) {419properties["onLine"] = String(window.navigator.onLine);420}421422const workspaceId = properties["workspaceId"];423const instanceId = properties["instanceId"];424const userId = properties["userId"];425426delete properties["workspaceId"];427delete properties["instanceId"];428delete properties["userId"];429430await this.send(431this.create("reportError", {432component: this.options.clientName,433errorStack: error.stack ?? String(error),434version: this.options.clientVersion,435workspaceId: workspaceId ?? "",436instanceId: instanceId ?? "",437userId: userId ?? "",438properties,439}),440);441}442443private create(endpoint: string, data: any): MetricsRequest | undefined {444try {445return <MetricsRequest>{446url: `https://${this.metricsHost}/metrics-api/` + endpoint,447method: "POST",448headers: {449"Content-Type": "application/json",450"X-Client": this.options.clientName,451"X-Client-Version": this.options.clientVersion,452},453body: JSON.stringify(data),454credentials: "omit",455};456} catch (e) {457this.options.log.error("metrics: failed to create request", e);458return undefined;459}460}461462private push(request: MetricsRequest | undefined): void {463if (!request) {464return;465}466this.pendingRequests.push(request);467}468469private async send(request: MetricsRequest | undefined): Promise<void> {470if (!request) {471return;472}473if (typeof window !== undefined && !window.navigator.onLine) {474this.push(request);475return;476}477this.sendQueue = this.sendQueue.then(async () => {478try {479const response = await fetch(request.url, { ...request, priority: "low" });480if (!response.ok) {481this.options.log.error(482`metrics: endpoint responded with ${response.status} ${response.statusText}`,483);484}485} catch (e) {486this.options.log.debug("metrics: failed to post, trying again next time", e);487this.push(request);488}489});490await this.sendQueue;491}492493instrumentWebSocket(ws: WebSocket, origin: string) {494const inc = (status: string, code?: number, wasClean?: boolean) => {495metrics.webSocketCounter496.labels({497origin,498instance_phase: this.options.commonErrorDetails["instancePhase"],499status,500code: code !== undefined ? String(code) : undefined,501was_clean: wasClean !== undefined ? String(Number(wasClean)) : undefined,502})503.inc();504};505inc("new");506ws.addEventListener("open", () => inc("open"));507ws.addEventListener("error", (event) => {508inc("error");509this.reportError(new Error(`WebSocket failed: ${String(event)}`));510});511ws.addEventListener("close", (event) => {512inc("close", event.code, event.wasClean);513if (!event.wasClean) {514this.reportError(new Error("WebSocket was not closed cleanly"), {515code: String(event.code),516reason: event.reason,517});518}519});520}521}522523524