Path: blob/main/components/ws-daemon/pkg/daemon/daemon.go
2501 views
// Copyright (c) 2020 Gitpod GmbH. All rights reserved.1// Licensed under the GNU Affero General Public License (AGPL).2// See License.AGPL.txt in the project root for license information.34package daemon56import (7"context"8"fmt"9"os"10"time"1112"github.com/prometheus/client_golang/prometheus"13"github.com/prometheus/client_golang/prometheus/collectors"14"golang.org/x/xerrors"15"k8s.io/apimachinery/pkg/runtime"16utilruntime "k8s.io/apimachinery/pkg/util/runtime"17"k8s.io/client-go/kubernetes"18clientgoscheme "k8s.io/client-go/kubernetes/scheme"19"k8s.io/client-go/rest"20"k8s.io/client-go/tools/clientcmd"21ctrl "sigs.k8s.io/controller-runtime"22"sigs.k8s.io/controller-runtime/pkg/cache"23"sigs.k8s.io/controller-runtime/pkg/manager"24"sigs.k8s.io/controller-runtime/pkg/metrics"25metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"26"sigs.k8s.io/controller-runtime/pkg/webhook"2728"github.com/gitpod-io/gitpod/common-go/log"29"github.com/gitpod-io/gitpod/ws-daemon/pkg/cgroup"30"github.com/gitpod-io/gitpod/ws-daemon/pkg/container"31"github.com/gitpod-io/gitpod/ws-daemon/pkg/content"32"github.com/gitpod-io/gitpod/ws-daemon/pkg/controller"33"github.com/gitpod-io/gitpod/ws-daemon/pkg/cpulimit"34"github.com/gitpod-io/gitpod/ws-daemon/pkg/diskguard"35"github.com/gitpod-io/gitpod/ws-daemon/pkg/dispatch"36"github.com/gitpod-io/gitpod/ws-daemon/pkg/iws"37"github.com/gitpod-io/gitpod/ws-daemon/pkg/netlimit"38"github.com/gitpod-io/gitpod/ws-daemon/pkg/quota"39workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"40)4142var (43scheme = runtime.NewScheme()44)4546func init() {47utilruntime.Must(clientgoscheme.AddToScheme(scheme))48utilruntime.Must(workspacev1.AddToScheme(scheme))49}5051// NewDaemon produces a new daemon52func NewDaemon(config Config) (*Daemon, error) {53// Use the metrics registry from the controller manager. The manager's registry54// isn't configurable so we use this instead of the baseserver's default registry.55// Hack: cast the registry as a *prometheus.Registry, as that's the type required56// by baseserver.57registry, ok := metrics.Registry.(*prometheus.Registry)58if ok {59// These collectors are also registered by baseserver. Use the ones from baseserver60// and remove the collectors registered by controller-manager, to prevent an error61// for duplicate collectors.62registry.Unregister(collectors.NewGoCollector())63registry.Unregister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))64} else {65log.Error("failed to use controller-runtime metrics registry, not of expected type. Using default registry instead, but will not collect controller metrics...")66registry = prometheus.NewRegistry()67}68wrappedReg := prometheus.WrapRegistererWithPrefix("gitpod_ws_daemon_", registry)6970restCfg, err := newClientConfig(config.Runtime.Kubeconfig)71if err != nil {72return nil, err73}74clientset, err := kubernetes.NewForConfig(restCfg)75if err != nil {76return nil, err77}7879containerRuntime, err := container.FromConfig(config.Runtime.Container)80if err != nil {81return nil, err82}83if containerRuntime == nil {84return nil, xerrors.Errorf("no container runtime configured")85}8687nodename := os.Getenv("NODENAME")88if nodename == "" {89return nil, xerrors.Errorf("NODENAME env var isn't set")90}9192markUnmountFallback, err := NewMarkUnmountFallback(wrappedReg)93if err != nil {94return nil, err95}9697cgroupV2IOLimiter, err := cgroup.NewIOLimiterV2(config.IOLimit.WriteBWPerSecond.Value(), config.IOLimit.ReadBWPerSecond.Value(), config.IOLimit.WriteIOPS, config.IOLimit.ReadIOPS)98if err != nil {99return nil, err100}101102procV2Plugin, err := cgroup.NewProcLimiterV2(config.ProcLimit)103if err != nil {104return nil, err105}106107cgroupPlugins, err := cgroup.NewPluginHost(config.CPULimit.CGroupBasePath,108&cgroup.FuseDeviceEnablerV2{},109cgroupV2IOLimiter,110&cgroup.ProcessPriorityV2{111ProcessPriorities: map[cgroup.ProcessType]int{112cgroup.ProcessWorkspaceKit: -10,113cgroup.ProcessSupervisor: -10,114115cgroup.ProcessIDE: -10,116cgroup.ProcessWebIDEHelper: -5,117118cgroup.ProcessCodeServer: -10,119cgroup.ProcessCodeServerHelper: -5,120121cgroup.ProcessJetBrainsIDE: -10,122},123EnableOOMScoreAdj: config.OOMScores.Enabled,124OOMScoreAdj: map[cgroup.ProcessType]int{125cgroup.ProcessWorkspaceKit: config.OOMScores.Tier1,126cgroup.ProcessSupervisor: config.OOMScores.Tier1,127cgroup.ProcessCodeServer: config.OOMScores.Tier1,128cgroup.ProcessIDE: config.OOMScores.Tier1,129cgroup.ProcessJetBrainsIDE: config.OOMScores.Tier1,130cgroup.ProcessCodeServerHelper: config.OOMScores.Tier2,131cgroup.ProcessWebIDEHelper: config.OOMScores.Tier2,132},133},134procV2Plugin,135cgroup.NewPSIMetrics(wrappedReg),136)137if err != nil {138return nil, err139}140141if cgroupPlugins.CGroupVersion != cgroup.Version2 {142return nil, xerrors.Errorf("only cgroup v2 is supported")143}144145err = wrappedReg.Register(cgroupPlugins)146if err != nil {147return nil, xerrors.Errorf("cannot register cgroup plugin metrics: %w", err)148}149150listener := []dispatch.Listener{151cpulimit.NewDispatchListener(&config.CPULimit, wrappedReg),152markUnmountFallback,153cgroupPlugins,154}155156netlimiter := netlimit.NewConnLimiter(config.NetLimit, wrappedReg)157if config.NetLimit.Enabled {158listener = append(listener, netlimiter)159}160161var configReloader CompositeConfigReloader162configReloader = append(configReloader, ConfigReloaderFunc(func(ctx context.Context, config *Config) error {163cgroupV2IOLimiter.Update(config.IOLimit.WriteBWPerSecond.Value(), config.IOLimit.ReadBWPerSecond.Value(), config.IOLimit.WriteIOPS, config.IOLimit.ReadIOPS)164procV2Plugin.Update(config.ProcLimit)165if config.NetLimit.Enabled {166netlimiter.Update(config.NetLimit)167}168return nil169}))170171var mgr manager.Manager172173mgr, err = ctrl.NewManager(restCfg, ctrl.Options{174Scheme: scheme,175HealthProbeBindAddress: "0",176Metrics: metricsserver.Options{177// Disable the metrics server.178// We only need access to the reconciliation loop feature.179BindAddress: "0",180},181Cache: cache.Options{182DefaultNamespaces: map[string]cache.Config{183config.Runtime.KubernetesNamespace: {},184config.Runtime.SecretsNamespace: {},185},186},187WebhookServer: webhook.NewServer(webhook.Options{188Port: 9443,189}),190})191if err != nil {192return nil, err193}194195contentCfg := config.Content196197xfs, err := quota.NewXFS(contentCfg.WorkingArea)198if err != nil {199return nil, err200}201202hooks := content.WorkspaceLifecycleHooks(203contentCfg,204config.Runtime.WorkspaceCIDR,205&iws.Uidmapper{Config: config.Uidmapper, Runtime: containerRuntime},206xfs,207config.CPULimit.CGroupBasePath,208)209210dsptch, err := dispatch.NewDispatch(containerRuntime, clientset, config.Runtime.KubernetesNamespace, nodename, listener...)211if err != nil {212return nil, err213}214215workspaceOps, err := controller.NewWorkspaceOperations(contentCfg, controller.NewWorkspaceProvider(contentCfg.WorkingArea, hooks), wrappedReg, dsptch)216if err != nil {217return nil, err218}219220wsctrl, err := controller.NewWorkspaceController(221mgr.GetClient(), mgr.GetEventRecorderFor("workspace"), nodename, config.Runtime.SecretsNamespace, config.WorkspaceController.MaxConcurrentReconciles, workspaceOps, wrappedReg, containerRuntime)222if err != nil {223return nil, err224}225err = wsctrl.SetupWithManager(mgr)226if err != nil {227return nil, err228}229230ssctrl := controller.NewSnapshotController(231mgr.GetClient(), mgr.GetEventRecorderFor("snapshot"), nodename, config.WorkspaceController.MaxConcurrentReconciles, workspaceOps)232err = ssctrl.SetupWithManager(mgr)233if err != nil {234return nil, err235}236237housekeeping := controller.NewHousekeeping(contentCfg.WorkingArea, 5*time.Minute)238go housekeeping.Start(context.Background())239240dsk := diskguard.FromConfig(config.DiskSpaceGuard, clientset, nodename)241242return &Daemon{243Config: config,244dispatch: dsptch,245diskGuards: dsk,246configReloader: configReloader,247mgr: mgr,248metricsRegistry: registry,249}, nil250}251252func newClientConfig(kubeconfig string) (*rest.Config, error) {253if kubeconfig != "" {254return clientcmd.BuildConfigFromFlags("", kubeconfig)255}256257return rest.InClusterConfig()258}259260// Daemon connects all the individual bits and bobs that make up the workspace daemon261type Daemon struct {262Config Config263264dispatch *dispatch.Dispatch265diskGuards []*diskguard.Guard266configReloader ConfigReloader267mgr ctrl.Manager268metricsRegistry *prometheus.Registry269270cancel context.CancelFunc271}272273func (d *Daemon) ReloadConfig(ctx context.Context, cfg *Config) error {274return d.configReloader.ReloadConfig(ctx, cfg)275}276277// Start runs all parts of the daemon until stop is called278func (d *Daemon) Start() error {279err := d.dispatch.Start()280if err != nil {281return xerrors.Errorf("cannot start dispatch: %w", err)282}283284for _, dsk := range d.diskGuards {285go dsk.Start()286}287288var ctx context.Context289ctx, d.cancel = context.WithCancel(context.Background())290291go func() {292err := d.mgr.Start(ctx)293if err != nil {294log.WithError(err).Fatal("cannot start controller")295}296}()297298return nil299}300301// Stop gracefully shuts down the daemon. Once stopped, it302// cannot be started again.303func (d *Daemon) Stop() error {304d.cancel()305306var errs []error307errs = append(errs, d.dispatch.Close())308for _, err := range errs {309if err != nil {310return err311}312}313314return nil315}316317func (d *Daemon) ReadinessProbe() func() error {318return func() error {319// use 2 second timeout to ensure that IsContainerdReady() will not block indefinetely320ctx, cancel := context.WithTimeout(context.Background(), time.Duration(2*time.Second))321defer cancel()322isContainerdReady, err := d.dispatch.Runtime.IsContainerdReady(ctx)323if err != nil {324log.WithError(err).Errorf("readiness probe failure: containerd error")325return fmt.Errorf("containerd error: %v", err)326}327328if !isContainerdReady {329err := fmt.Errorf("containerd is not ready")330log.WithError(err).Error("readiness probe failure")331return err332}333334return nil335}336}337338func (d *Daemon) MetricsRegistry() *prometheus.Registry {339return d.metricsRegistry340}341342343