Path: blob/main/components/ws-daemon/pkg/cgroup/plugin_psi.go
2499 views
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.1// Licensed under the GNU Affero General Public License (AGPL).2// See License.AGPL.txt in the project root for license information.34package cgroup56import (7"context"8"os"9"path/filepath"10"time"1112cgroups "github.com/gitpod-io/gitpod/common-go/cgroups/v2"13"github.com/gitpod-io/gitpod/common-go/kubernetes"14"github.com/gitpod-io/gitpod/common-go/log"15"github.com/prometheus/client_golang/prometheus"16)1718type PSIMetrics struct {19cpu *prometheus.GaugeVec20memory *prometheus.GaugeVec21io *prometheus.GaugeVec22nodeName string23}2425func NewPSIMetrics(prom prometheus.Registerer) *PSIMetrics {26p := &PSIMetrics{27cpu: prometheus.NewGaugeVec(prometheus.GaugeOpts{28Name: "workspace_cpu_psi_total_seconds",29Help: "Total time spent under cpu pressure in microseconds",30}, []string{"node", "workspace", "kind"}),3132memory: prometheus.NewGaugeVec(prometheus.GaugeOpts{33Name: "workspace_memory_psi_total_seconds",34Help: "Total time spent under memory pressure in microseconds",35}, []string{"node", "workspace", "kind"}),3637io: prometheus.NewGaugeVec(prometheus.GaugeOpts{38Name: "workspace_io_psi_total_seconds",39Help: "Total time spent under io pressure in microseconds",40}, []string{"node", "workspace", "kind"}),4142nodeName: os.Getenv("NODENAME"),43}4445prom.MustRegister(46p.cpu,47p.memory,48p.io,49)5051return p52}5354func (p *PSIMetrics) Name() string { return "psi-metrics" }55func (p *PSIMetrics) Type() Version { return Version2 }5657func (p *PSIMetrics) Apply(ctx context.Context, opts *PluginOptions) error {58if _, v := opts.Annotations[kubernetes.WorkspacePressureStallInfoAnnotation]; !v {59return nil60}6162fullPath := filepath.Join(opts.BasePath, opts.CgroupPath)63if _, err := os.Stat(fullPath); err != nil {64return err65}6667cpu := cgroups.NewCpuController(fullPath)68memory := cgroups.NewMemoryController(fullPath)69io := cgroups.NewIOController(fullPath)7071go func() {72ticker := time.NewTicker(10 * time.Second)73defer ticker.Stop()7475for {76select {77case <-ticker.C:78p.scrape(cpu, memory, io, opts.InstanceId)79case <-ctx.Done():80return81}82}83}()8485return nil86}8788func (p *PSIMetrics) scrape(cpu *cgroups.Cpu, memory *cgroups.Memory, io *cgroups.IO, instanceID string) {89if psi, err := cpu.PSI(); err == nil {90p.cpu.WithLabelValues(p.nodeName, instanceID, "some").Set(float64(psi.Some))91p.cpu.WithLabelValues(p.nodeName, instanceID, "full").Set(float64(psi.Full))92} else if !os.IsNotExist(err) {93log.WithError(err).WithFields(log.OWI("", "", instanceID)).Warn("could not retrieve cpu psi")94}9596if psi, err := memory.PSI(); err == nil {97p.memory.WithLabelValues(p.nodeName, instanceID, "some").Set(float64(psi.Some))98p.memory.WithLabelValues(p.nodeName, instanceID, "full").Set(float64(psi.Full))99} else if !os.IsNotExist(err) {100log.WithError(err).WithFields(log.OWI("", "", instanceID)).Warn("could not retrieve memory psi")101}102103if psi, err := io.PSI(); err == nil {104p.io.WithLabelValues(p.nodeName, instanceID, "some").Set(float64(psi.Some))105p.io.WithLabelValues(p.nodeName, instanceID, "full").Set(float64(psi.Full))106} else if !os.IsNotExist(err) {107log.WithError(err).WithFields(log.OWI("", "", instanceID)).Warn("could not retrieve io psi")108}109}110111112