Path: blob/main/components/ws-manager-mk2/controllers/workspace_controller_test.go
2498 views
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.1// Licensed under the GNU Affero General Public License (AGPL).2// See License-AGPL.txt in the project root for license information.34package controllers56import (7"fmt"89"github.com/aws/smithy-go/ptr"10"github.com/google/uuid"11. "github.com/onsi/ginkgo/v2"12. "github.com/onsi/gomega"13"github.com/prometheus/client_golang/prometheus"14"github.com/prometheus/client_golang/prometheus/testutil"15dto "github.com/prometheus/client_model/go"16"google.golang.org/protobuf/proto"17corev1 "k8s.io/api/core/v1"18"k8s.io/apimachinery/pkg/api/errors"19metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"20"k8s.io/apimachinery/pkg/types"21ctrl "sigs.k8s.io/controller-runtime"22"sigs.k8s.io/controller-runtime/pkg/client"23"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"2425wsk8s "github.com/gitpod-io/gitpod/common-go/kubernetes"26csapi "github.com/gitpod-io/gitpod/content-service/api"27"github.com/gitpod-io/gitpod/ws-manager-mk2/pkg/constants"28workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"29)3031var _ = Describe("WorkspaceController", func() {32Context("with regular workspaces", func() {33It("should handle successful workspace creation and stop request", func() {34name := uuid.NewString()3536envSecret := createSecret(fmt.Sprintf("%s-env", name), "default")37tokenSecret := createSecret(fmt.Sprintf("%s-tokens", name), secretsNamespace)3839ws := newWorkspace(name, "default")40m := collectMetricCounts(wsMetrics, ws)41pod := createWorkspaceExpectPod(ws)4243Expect(controllerutil.ContainsFinalizer(pod, workspacev1.GitpodFinalizerName)).To(BeTrue())4445By("controller updating the pod starts value")46Eventually(func() (int, error) {47err := k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)48if err != nil {49return 0, err50}51return ws.Status.PodStarts, nil52}, timeout, interval).Should(Equal(1))5354// Deployed condition should be added.55expectConditionEventually(ws, string(workspacev1.WorkspaceConditionDeployed), metav1.ConditionTrue, "")5657// Runtime status should be set.58expectRuntimeStatus(ws, pod)5960By("controller setting status after creation")61Eventually(func(g Gomega) {62g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())63g.Expect(ws.Status.OwnerToken).ToNot(BeEmpty())64g.Expect(ws.Status.URL).ToNot(BeEmpty())65}, timeout, interval).Should(Succeed())6667// Transition Pod to pending, and expect workspace to reach Creating phase.68// This should also cause create time metrics to be recorded.69updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {70pod.Status.Phase = corev1.PodPending71pod.Status.ContainerStatuses = []corev1.ContainerStatus{{72State: corev1.ContainerState{73Waiting: &corev1.ContainerStateWaiting{74Reason: "ContainerCreating",75},76},77Name: "workspace",78}}79})8081expectPhaseEventually(ws, workspacev1.WorkspacePhaseCreating)8283// Transition Pod to running, and expect workspace to reach Running phase.84// This should also cause e.g. startup time metrics to be recorded.85updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {86pod.Status.Phase = corev1.PodRunning87pod.Status.ContainerStatuses = []corev1.ContainerStatus{{88Name: "workspace",89Ready: true,90}}91})9293updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {94ws.Status.SetCondition(workspacev1.NewWorkspaceConditionContentReady(metav1.ConditionTrue, workspacev1.ReasonInitializationSuccess, ""))95})9697expectPhaseEventually(ws, workspacev1.WorkspacePhaseRunning)98expectSecretCleanup(envSecret)99expectSecretCleanup(tokenSecret)100101markReady(ws)102103requestStop(ws)104105expectFinalizerAndMarkBackupCompleted(ws, pod)106107expectWorkspaceCleanup(ws, pod)108109By("checking pod doesn't get recreated by controller")110Consistently(func() error {111return checkNotFound(pod)112}, duration, interval).Should(Succeed(), "pod came back")113114expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{115starts: 1,116creatingCounts: 1,117restores: 1,118stops: map[StopReason]int{StopReasonRegular: 1},119backups: 1,120})121})122123It("should handle content init failure", func() {124ws := newWorkspace(uuid.NewString(), "default")125m := collectMetricCounts(wsMetrics, ws)126pod := createWorkspaceExpectPod(ws)127128By("adding ws init failure condition")129updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {130ws.Status.SetCondition(workspacev1.NewWorkspaceConditionContentReady(metav1.ConditionFalse, workspacev1.ReasonInitializationFailure, "some failure"))131})132133// On init failure, expect workspace cleans up without a backup.134expectWorkspaceCleanup(ws, pod)135136expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{137startFailures: 1,138failures: 1,139restoreFailures: 1,140stops: map[StopReason]int{StopReasonStartFailure: 1},141})142})143144It("should not take a backup if content init did not happen", func() {145ws := newWorkspace(uuid.NewString(), "default")146m := collectMetricCounts(wsMetrics, ws)147pod := createWorkspaceExpectPod(ws)148149requestStop(ws)150151// No content init, expect cleanup without backup.152expectWorkspaceCleanup(ws, pod)153154expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{155startFailures: 0, // No start failure should be recorded, even though the workspace didn't become ready, as it was stopped before it could become ready.156stops: map[StopReason]int{StopReasonRegular: 1},157})158})159160It("should handle backup failure", func() {161ws := newWorkspace(uuid.NewString(), "default")162m := collectMetricCounts(wsMetrics, ws)163pod := createWorkspaceExpectPod(ws)164165markReady(ws)166167// Stop the workspace.168requestStop(ws)169170// Indicate the backup failed.171expectFinalizerAndMarkBackupFailed(ws, pod)172173// Workspace should get cleaned up.174expectWorkspaceCleanup(ws, pod)175176expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{177restores: 1,178backups: 1,179backupFailures: 1,180failures: 1,181stops: map[StopReason]int{StopReasonFailed: 1},182})183})184185It("should handle workspace failure", func() {186ws := newWorkspace(uuid.NewString(), "default")187m := collectMetricCounts(wsMetrics, ws)188pod := createWorkspaceExpectPod(ws)189190markReady(ws)191192// Update Pod with failed exit status.193updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {194pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, corev1.ContainerStatus{195LastTerminationState: corev1.ContainerState{196Terminated: &corev1.ContainerStateTerminated{197ExitCode: 1,198Message: "Error",199},200},201})202})203204// Controller should detect container exit and add Failed condition.205expectConditionEventually(ws, string(workspacev1.WorkspaceConditionFailed), metav1.ConditionTrue, "")206207expectFinalizerAndMarkBackupCompleted(ws, pod)208209expectWorkspaceCleanup(ws, pod)210211expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{212restores: 1,213startFailures: 0,214failures: 1,215stops: map[StopReason]int{StopReasonFailed: 1},216backups: 1,217})218})219220It("should handle workspace failure with unknown exit code", func() {221ws := newWorkspace(uuid.NewString(), "default")222m := collectMetricCounts(wsMetrics, ws)223pod := createWorkspaceExpectPod(ws)224225markReady(ws)226227// Update Pod with failed exit status.228updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {229pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, corev1.ContainerStatus{230LastTerminationState: corev1.ContainerState{231Terminated: &corev1.ContainerStateTerminated{232ExitCode: containerUnknownExitCode,233},234},235})236})237238// Controller should detect container exit and add Failed condition.239expectConditionEventually(ws, string(workspacev1.WorkspaceConditionFailed), metav1.ConditionTrue, "")240241expectFinalizerAndMarkBackupCompleted(ws, pod)242243expectWorkspaceCleanup(ws, pod)244245expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{246restores: 1,247startFailures: 0,248failures: 1,249stops: map[StopReason]int{StopReasonFailed: 1},250backups: 1,251})252})253254It("should clean up timed out workspaces", func() {255ws := newWorkspace(uuid.NewString(), "default")256m := collectMetricCounts(wsMetrics, ws)257pod := createWorkspaceExpectPod(ws)258259markReady(ws)260261By("adding Timeout condition")262updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {263ws.Status.SetCondition(workspacev1.NewWorkspaceConditionTimeout(""))264})265266expectFinalizerAndMarkBackupCompleted(ws, pod)267268expectWorkspaceCleanup(ws, pod)269270expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{271restores: 1,272stops: map[StopReason]int{StopReasonTimeout: 1},273backups: 1,274})275})276277It("should handle workspace abort", func() {278ws := newWorkspace(uuid.NewString(), "default")279m := collectMetricCounts(wsMetrics, ws)280pod := createWorkspaceExpectPod(ws)281282markReady(ws)283284// Update Pod with stop and abort conditions.285updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {286ws.Status.SetCondition(workspacev1.NewWorkspaceConditionAborted(""))287ws.Status.SetCondition(workspacev1.NewWorkspaceConditionStoppedByRequest(""))288})289290// Expect cleanup without a backup.291expectWorkspaceCleanup(ws, pod)292293expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{294restores: 1,295stops: map[StopReason]int{StopReasonAborted: 1},296})297})298299It("deleting workspace resource should gracefully clean up", func() {300name := uuid.NewString()301ws := newWorkspace(name, "default")302303envSecret := createSecret(fmt.Sprintf("%s-env", name), "default")304tokenSecret := createSecret(fmt.Sprintf("%s-tokens", name), secretsNamespace)305306m := collectMetricCounts(wsMetrics, ws)307pod := createWorkspaceExpectPod(ws)308309markReady(ws)310311Expect(k8sClient.Delete(ctx, ws)).To(Succeed())312313expectPhaseEventually(ws, workspacev1.WorkspacePhaseStopping)314315expectFinalizerAndMarkBackupCompleted(ws, pod)316317expectWorkspaceCleanup(ws, pod)318319expectSecretCleanup(envSecret)320expectSecretCleanup(tokenSecret)321322expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{323restores: 1,324stops: map[StopReason]int{StopReasonRegular: 1},325backups: 1,326})327})328329It("node disappearing should fail with backup failure", func() {330ws := newWorkspace(uuid.NewString(), "default")331m := collectMetricCounts(wsMetrics, ws)332333// Simulate pod getting scheduled to a node.334var node corev1.Node335node.Name = uuid.NewString()336Expect(k8sClient.Create(ctx, &node)).To(Succeed())337// Manually create the workspace pod with the node name.338// We can't update the pod with the node name, as this operation339// is only allowed for the scheduler. So as a hack, we manually340// create the workspace's pod.341pod := &corev1.Pod{342ObjectMeta: metav1.ObjectMeta{343Name: fmt.Sprintf("ws-%s", ws.Name),344Namespace: ws.Namespace,345Finalizers: []string{workspacev1.GitpodFinalizerName},346Labels: map[string]string{347wsk8s.WorkspaceManagedByLabel: constants.ManagedBy,348},349},350Spec: corev1.PodSpec{351NodeName: node.Name,352Containers: []corev1.Container{{353Name: "workspace",354Image: "someimage",355}},356},357}358359Expect(k8sClient.Create(ctx, pod)).To(Succeed())360pod = createWorkspaceExpectPod(ws)361updateObjWithRetries(k8sClient, pod, false, func(pod *corev1.Pod) {362Expect(ctrl.SetControllerReference(ws, pod, k8sClient.Scheme())).To(Succeed())363})364// Wait until controller has reconciled at least once (by waiting for the runtime status to get updated).365// This is necessary for the metrics to get recorded correctly. If we don't wait, the first reconciliation366// might be once the Pod is already in a running state, and hence the metric state might not record e.g. content367// restore.368// This is only necessary because we manually created the pod, normally the Pod creation is the controller's369// first reconciliation which ensures the metrics are recorded from the workspace's initial state.370371Eventually(func(g Gomega) {372g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())373g.Expect(ws.Status.Runtime).ToNot(BeNil())374g.Expect(ws.Status.Runtime.PodName).To(Equal(pod.Name))375}, timeout, interval).Should(Succeed())376377markReady(ws)378379// Make node disappear 🪄380By("deleting node")381Expect(k8sClient.Delete(ctx, &node)).To(Succeed())382383// Expect workspace to disappear, with a backup failure.384// NOTE: Can't use expectWorkspaceCleanup() here, as the pod never disappears in envtest due to a nodeName being set.385// Therefore, we only verify deletion timestamps are set and all finalizers are removed, which in a real cluster386// would cause the pod and workspace to disappear.387By("workspace and pod finalizers being removed and deletion timestamps set")388Eventually(func() error {389if err := k8sClient.Get(ctx, types.NamespacedName{Name: pod.GetName(), Namespace: pod.GetNamespace()}, pod); err != nil {390if !errors.IsNotFound(err) {391return err392}393} else {394if len(pod.ObjectMeta.Finalizers) > 0 {395return fmt.Errorf("pod still has finalizers: %v", pod.ObjectMeta.Finalizers)396}397if pod.DeletionTimestamp == nil {398return fmt.Errorf("pod deletion timestamp not set")399}400}401402if err := k8sClient.Get(ctx, types.NamespacedName{Name: ws.GetName(), Namespace: ws.GetNamespace()}, ws); err != nil {403if !errors.IsNotFound(err) {404return err405}406} else {407if ws.Status.Phase != workspacev1.WorkspacePhaseStopped {408return fmt.Errorf("workspace phase did not reach Stopped, was %s", ws.Status.Phase)409}410// Can't check for workspace finalizer removal and deletionTimestamp being set,411// as this only happens once all pods are gone, and the pod never disappears in this test.412}413return nil414}, timeout, interval).Should(Succeed(), "pod/workspace not cleaned up")415416expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{417restores: 1,418backups: 1,419backupFailures: 1,420failures: 1,421stops: map[StopReason]int{StopReasonFailed: 1},422})423})424425It("pod rejection should result in a retry", func() {426ws := newWorkspace(uuid.NewString(), "default")427m := collectMetricCounts(wsMetrics, ws)428su := collectSubscriberUpdates()429430// ### prepare block start431By("creating workspace")432// Simulate pod getting scheduled to a node.433var node corev1.Node434node.Name = uuid.NewString()435Expect(k8sClient.Create(ctx, &node)).To(Succeed())436// Manually create the workspace pod with the node name.437// We can't update the pod with the node name, as this operation438// is only allowed for the scheduler. So as a hack, we manually439// create the workspace's pod.440pod := &corev1.Pod{441ObjectMeta: metav1.ObjectMeta{442Name: fmt.Sprintf("ws-%s", ws.Name),443Namespace: ws.Namespace,444Finalizers: []string{workspacev1.GitpodFinalizerName},445Labels: map[string]string{446wsk8s.WorkspaceManagedByLabel: constants.ManagedBy,447},448},449Spec: corev1.PodSpec{450NodeName: node.Name,451Containers: []corev1.Container{{452Name: "workspace",453Image: "someimage",454}},455},456}457458Expect(k8sClient.Create(ctx, pod)).To(Succeed())459pod = createWorkspaceExpectPod(ws)460updateObjWithRetries(k8sClient, pod, false, func(pod *corev1.Pod) {461Expect(ctrl.SetControllerReference(ws, pod, k8sClient.Scheme())).To(Succeed())462})463// mimic the regular "start" phase464updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {465ws.Status.PodStarts = 1466ws.Status.PodRecreated = 0467})468469// Wait until controller has reconciled at least once (by waiting for the runtime status to get updated).470// This is necessary for the metrics to get recorded correctly. If we don't wait, the first reconciliation471// might be once the Pod is already in a running state, and hence the metric state might not record e.g. content472// restore.473// This is only necessary because we manually created the pod, normally the Pod creation is the controller's474// first reconciliation which ensures the metrics are recorded from the workspace's initial state.475476Eventually(func(g Gomega) {477g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())478g.Expect(ws.Status.Runtime).ToNot(BeNil())479g.Expect(ws.Status.Runtime.PodName).To(Equal(pod.Name))480}, timeout, interval).Should(Succeed())481482// Await "deployed" condition, and check we are good483expectConditionEventually(ws, string(workspacev1.WorkspaceConditionDeployed), metav1.ConditionTrue, "")484Eventually(func(g Gomega) {485g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())486g.Expect(ws.Status.PodStarts).To(Equal(1))487g.Expect(ws.Status.PodRecreated).To(Equal(0))488}, timeout, interval).Should(Succeed())489490// ### prepare block end491492// ### trigger block start493// Make pod be rejected 🪄494By("rejecting pod")495rejectPod(pod)496497By("await pod being in stopping")498Eventually(func(g Gomega) {499g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())500g.Expect(ws.Status.Phase).To(Equal(workspacev1.WorkspacePhaseStopping))501}, timeout, interval).Should(Succeed())502503// when a rejected workspace pod is in stopping, ws-daemon wipes the state before it's moved to "stopped"504// mimic this ws-daemon behavior505updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {506ws.Status.SetCondition(workspacev1.NewWorkspaceConditionStateWiped("", metav1.ConditionTrue))507})508509By("await pod recreation")510Eventually(func(g Gomega) {511g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())512g.Expect(ws.Status.PodRecreated).To(Equal(1))513g.Expect(ws.Status.Phase).To(Equal(workspacev1.WorkspacePhasePending))514}, timeout, interval).Should(Succeed())515// ### trigger block end516517// ### retry block start518// Transition Pod to pending, and expect workspace to reach Creating phase.519// This should also cause create time metrics to be recorded.520updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {521pod.Status.Phase = corev1.PodPending522pod.Status.ContainerStatuses = []corev1.ContainerStatus{{523State: corev1.ContainerState{524Waiting: &corev1.ContainerStateWaiting{525Reason: "ContainerCreating",526},527},528Name: "workspace",529}}530})531532expectPhaseEventually(ws, workspacev1.WorkspacePhaseCreating)533// ### retry block end534535// ### move to running start536// Transition Pod to running, and expect workspace to reach Running phase.537// This should also cause e.g. startup time metrics to be recorded.538updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {539pod.Status.Phase = corev1.PodRunning540pod.Status.ContainerStatuses = []corev1.ContainerStatus{{541Name: "workspace",542Ready: true,543}}544})545546updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {547ws.Status.SetCondition(workspacev1.NewWorkspaceConditionContentReady(metav1.ConditionTrue, workspacev1.ReasonInitializationSuccess, ""))548})549550expectPhaseEventually(ws, workspacev1.WorkspacePhaseRunning)551// ### move to running end552553// ### validate start554Eventually(func(g Gomega) {555g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())556g.Expect(ws.Status.PodStarts).To(Equal(2))557g.Expect(ws.Status.PodRecreated).To(Equal(1))558}, timeout, interval).Should(Succeed())559560expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{561restores: 1,562backups: 0,563backupFailures: 0,564failures: 1,565creatingCounts: 1,566stops: map[StopReason]int{StopReasonStartFailure: 1},567starts: 1, // this is NOT PodStarts, but merely an artifact of how we count it in the tests568recreations: map[int]int{1: 1},569})570571expectPhaseTransitions(su, []workspacev1.WorkspacePhase{workspacev1.WorkspacePhasePending, workspacev1.WorkspacePhaseCreating, workspacev1.WorkspacePhaseInitializing, workspacev1.WorkspacePhaseRunning})572// ### validate end573})574})575576Context("with headless workspaces", func() {577It("should handle headless task failure", func() {578ws, pod, m := createHeadlessWorkspace(workspacev1.WorkspaceTypePrebuild)579580updateObjWithRetries(k8sClient, pod, true, func(p *corev1.Pod) {581p.Status.Phase = corev1.PodFailed582p.Status.ContainerStatuses = []corev1.ContainerStatus{583{584Name: "workspace",585State: corev1.ContainerState{586Terminated: &corev1.ContainerStateTerminated{587Message: headlessTaskFailedPrefix,588ExitCode: 5,589},590},591},592}593})594595expectFinalizerAndMarkBackupCompleted(ws, pod)596expectWorkspaceCleanup(ws, pod)597expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{598restores: 1,599backups: 1,600backupFailures: 0,601failures: 0,602stops: map[StopReason]int{StopReasonRegular: 1},603})604})605606It("should handle successful prebuild", func() {607ws, pod, m := createHeadlessWorkspace(workspacev1.WorkspaceTypePrebuild)608updateObjWithRetries(k8sClient, pod, true, func(p *corev1.Pod) {609p.Status.Phase = corev1.PodSucceeded610})611612expectFinalizerAndMarkBackupCompleted(ws, pod)613expectWorkspaceCleanup(ws, pod)614expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{615restores: 1,616backups: 1,617backupFailures: 0,618failures: 0,619stops: map[StopReason]int{StopReasonRegular: 1},620})621})622623It("should handle failed prebuild", func() {624ws, pod, m := createHeadlessWorkspace(workspacev1.WorkspaceTypePrebuild)625updateObjWithRetries(k8sClient, pod, true, func(p *corev1.Pod) {626p.Status.Phase = corev1.PodFailed627p.Status.ContainerStatuses = []corev1.ContainerStatus{628{629Name: "workspace",630State: corev1.ContainerState{631Terminated: &corev1.ContainerStateTerminated{632Message: "prebuild failed",633ExitCode: 5,634},635},636},637}638})639640expectFinalizerAndMarkBackupCompleted(ws, pod)641expectWorkspaceCleanup(ws, pod)642expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{643restores: 1,644backups: 1,645backupFailures: 0,646failures: 1,647stops: map[StopReason]int{StopReasonFailed: 1},648})649})650651It("should handle aborted prebuild", func() {652ws, pod, m := createHeadlessWorkspace(workspacev1.WorkspaceTypePrebuild)653// abort workspace654updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {655ws.Status.SetCondition(workspacev1.NewWorkspaceConditionAborted("StopWorkspaceRequest"))656})657658requestStop(ws)659660// should not take a backup661expectWorkspaceCleanup(ws, pod)662expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{663restores: 1,664backups: 0,665backupFailures: 0,666failures: 0,667stops: map[StopReason]int{StopReasonAborted: 1},668})669})670671It("should handle imagebuild", func() {672ws, pod, m := createHeadlessWorkspace(workspacev1.WorkspaceTypeImageBuild)673updateObjWithRetries(k8sClient, pod, true, func(p *corev1.Pod) {674p.Status.Phase = corev1.PodSucceeded675})676677// should not take a backup678expectWorkspaceCleanup(ws, pod)679expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{680restores: 1,681backups: 0,682backupFailures: 0,683failures: 0,684stops: map[StopReason]int{StopReasonRegular: 1},685})686})687})688})689690func createHeadlessWorkspace(typ workspacev1.WorkspaceType) (ws *workspacev1.Workspace, pod *corev1.Pod, m metricCounts) {691name := uuid.NewString()692693ws = newWorkspace(name, "default")694ws.Spec.Type = typ695m = collectMetricCounts(wsMetrics, ws)696pod = createWorkspaceExpectPod(ws)697698// Expect headless699Expect(ws.IsHeadless()).To(BeTrue())700Expect(controllerutil.ContainsFinalizer(pod, workspacev1.GitpodFinalizerName)).To(BeTrue())701702// Expect runtime status also gets reported for headless workspaces.703expectRuntimeStatus(ws, pod)704705By("controller setting status after creation")706Eventually(func(g Gomega) {707g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())708g.Expect(ws.Status.OwnerToken).ToNot(BeEmpty())709g.Expect(ws.Status.URL).ToNot(BeEmpty())710}, timeout, interval).Should(Succeed())711712markReady(ws)713return714}715716func updateObjWithRetries[O client.Object](c client.Client, obj O, updateStatus bool, update func(obj O)) {717GinkgoHelper()718Eventually(func() error {719err := c.Get(ctx, types.NamespacedName{720Name: obj.GetName(),721Namespace: obj.GetNamespace(),722}, obj)723if err != nil {724return err725}726727// Apply update.728update(obj)729730if updateStatus {731return c.Status().Update(ctx, obj)732}733734return c.Update(ctx, obj)735}, timeout, interval).Should(Succeed())736}737738// createWorkspaceExpectPod creates the workspace resource, and expects739// the controller to eventually create the workspace Pod. The created Pod740// is returned.741func createWorkspaceExpectPod(ws *workspacev1.Workspace) *corev1.Pod {742GinkgoHelper()743By("creating workspace")744Expect(k8sClient.Create(ctx, ws)).To(Succeed())745746By("controller creating workspace pod")747pod := &corev1.Pod{}748var podPrefix string749switch ws.Spec.Type {750case workspacev1.WorkspaceTypeRegular:751podPrefix = "ws"752case workspacev1.WorkspaceTypePrebuild:753podPrefix = "prebuild"754case workspacev1.WorkspaceTypeImageBuild:755podPrefix = "imagebuild"756}757Eventually(func() error {758return k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s", podPrefix, ws.Name), Namespace: ws.Namespace}, pod)759}, timeout, interval).Should(Succeed())760return pod761}762763func expectPhaseEventually(ws *workspacev1.Workspace, phase workspacev1.WorkspacePhase) {764GinkgoHelper()765By(fmt.Sprintf("controller transition workspace phase to %s", phase))766Eventually(func(g Gomega) {767g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())768g.Expect(ws.Status.Phase).To(Equal(phase))769}, timeout, interval).Should(Succeed())770}771772func expectConditionEventually(ws *workspacev1.Workspace, tpe string, status metav1.ConditionStatus, reason string) {773GinkgoHelper()774By(fmt.Sprintf("controller setting workspace condition %s to %s", tpe, status))775Eventually(func(g Gomega) {776g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())777c := wsk8s.GetCondition(ws.Status.Conditions, tpe)778g.Expect(c).ToNot(BeNil(), fmt.Sprintf("expected condition %s to be present", tpe))779g.Expect(c.Status).To(Equal(status))780if reason != "" {781g.Expect(c.Reason).To(Equal(reason))782}783}, timeout, interval).Should(Succeed())784}785786func expectRuntimeStatus(ws *workspacev1.Workspace, pod *corev1.Pod) {787GinkgoHelper()788By("artificially setting the pod's status")789// Since there are no Pod controllers running in the EnvTest cluster to populate the Pod status,790// we artificially update the created Pod's status here, and verify later that the workspace791// controller reconciles this and puts it in the workspace status.792var (793hostIP = "1.2.3.4"794podIP = "10.0.0.0"795)796updateObjWithRetries(k8sClient, pod, true, func(p *corev1.Pod) {797p.Status.HostIP = hostIP798p.Status.PodIP = podIP799})800801By("controller adding pod status to the workspace status")802Eventually(func(g Gomega) {803g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())804g.Expect(ws.Status.Runtime).ToNot(BeNil())805g.Expect(ws.Status.Runtime.HostIP).To(Equal(hostIP))806g.Expect(ws.Status.Runtime.PodIP).To(Equal(podIP))807g.Expect(ws.Status.Runtime.PodName).To(Equal(pod.Name))808}, timeout, interval).Should(Succeed())809}810811func requestStop(ws *workspacev1.Workspace) {812GinkgoHelper()813By("adding stop signal")814updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {815ws.Status.SetCondition(workspacev1.NewWorkspaceConditionStoppedByRequest(""))816})817}818819func rejectPod(pod *corev1.Pod) {820GinkgoHelper()821By("adding pod rejected condition")822updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {823pod.Status.Phase = corev1.PodFailed824pod.Status.Reason = "OutOfcpu"825pod.Status.Message = "Pod was rejected"826})827}828829func markReady(ws *workspacev1.Workspace) {830GinkgoHelper()831By("adding content ready condition")832updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {833ws.Status.SetCondition(workspacev1.NewWorkspaceConditionContentReady(metav1.ConditionTrue, workspacev1.ReasonInitializationSuccess, ""))834ws.Status.SetCondition(workspacev1.NewWorkspaceConditionEverReady())835})836}837838func expectFinalizerAndMarkBackupCompleted(ws *workspacev1.Workspace, pod *corev1.Pod) {839GinkgoHelper()840// Checking for the finalizer enforces our expectation that the workspace841// should be waiting for a backup to be taken.842By("checking finalizer exists for backup")843Consistently(func() (bool, error) {844if err := k8sClient.Get(ctx, types.NamespacedName{Name: pod.GetName(), Namespace: pod.GetNamespace()}, pod); err != nil {845return false, err846}847return controllerutil.ContainsFinalizer(pod, workspacev1.GitpodFinalizerName), nil848}, duration, interval).Should(BeTrue(), "missing gitpod finalizer on pod, expected one to wait for backup to succeed")849850By("signalling backup completed")851updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {852ws.Status.SetCondition(workspacev1.NewWorkspaceConditionBackupComplete())853})854}855856func expectFinalizerAndMarkBackupFailed(ws *workspacev1.Workspace, pod *corev1.Pod) {857GinkgoHelper()858// Checking for the finalizer enforces our expectation that the workspace859// should be waiting for a backup to be taken (or fail).860By("checking finalizer exists for backup")861Consistently(func() (bool, error) {862if err := k8sClient.Get(ctx, types.NamespacedName{Name: pod.GetName(), Namespace: pod.GetNamespace()}, pod); err != nil {863return false, err864}865return controllerutil.ContainsFinalizer(pod, workspacev1.GitpodFinalizerName), nil866}, duration, interval).Should(BeTrue(), "missing gitpod finalizer on pod, expected one to wait for backup to succeed")867868By("signalling backup failed")869updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {870ws.Status.SetCondition(workspacev1.NewWorkspaceConditionBackupFailure(""))871})872}873874func expectWorkspaceCleanup(ws *workspacev1.Workspace, pod *corev1.Pod) {875GinkgoHelper()876By("controller removing pod finalizers")877Eventually(func() (int, error) {878if err := k8sClient.Get(ctx, types.NamespacedName{Name: pod.GetName(), Namespace: pod.GetNamespace()}, pod); err != nil {879if errors.IsNotFound(err) {880// Race: finalizers got removed causing pod to get deleted before we could check.881// This is what we want though.882return 0, nil883}884return 0, err885}886return len(pod.ObjectMeta.Finalizers), nil887888}, timeout, interval).Should(Equal(0), "pod finalizers did not go away")889890By("cleaning up the workspace pod")891Eventually(func() error {892return checkNotFound(pod)893}, timeout, interval).Should(Succeed(), "pod did not go away")894895By("controller removing workspace finalizers")896Eventually(func() (int, error) {897if err := k8sClient.Get(ctx, types.NamespacedName{Name: ws.GetName(), Namespace: ws.GetNamespace()}, ws); err != nil {898if errors.IsNotFound(err) {899// Race: finalizers got removed causing workspace to get deleted before we could check.900// This is what we want though.901return 0, nil902}903return 0, err904}905return len(ws.ObjectMeta.Finalizers), nil906907}, timeout, interval).Should(Equal(0), "workspace finalizers did not go away")908909By("cleaning up the workspace resource")910Eventually(func(g Gomega) error {911if err := checkNotFound(ws); err == nil {912return nil913}914g.Expect(ws.Status.Phase).To(Equal(workspacev1.WorkspacePhaseStopped))915return fmt.Errorf("workspace is Stopped, but hasn't been deleted yet")916}, timeout, interval).Should(Succeed(), "workspace did not go away")917}918919func expectSecretCleanup(secret *corev1.Secret) {920GinkgoHelper()921922By("controller deleting secrets")923Eventually(func() (int, error) {924var s corev1.Secret925if err := k8sClient.Get(ctx, types.NamespacedName{Name: secret.GetName(), Namespace: secret.GetNamespace()}, &s); err != nil {926if errors.IsNotFound(err) {927return 0, nil928}929return 1, err930}931return 1, nil932933}, timeout, interval).Should(Equal(0), "environment secret has not been deleted")934}935936// checkNotFound returns nil if the object does not exist.937// Otherwise, it returns an error.938func checkNotFound(obj client.Object) error {939err := k8sClient.Get(ctx, types.NamespacedName{Name: obj.GetName(), Namespace: obj.GetNamespace()}, obj)940if err == nil {941// Object exists, return as an error.942return fmt.Errorf("object exists")943}944if errors.IsNotFound(err) {945// Object doesn't exist, this is what we want.946return nil947}948return err949}950951func newWorkspace(name, namespace string) *workspacev1.Workspace {952GinkgoHelper()953initializer := &csapi.WorkspaceInitializer{954Spec: &csapi.WorkspaceInitializer_Empty{Empty: &csapi.EmptyInitializer{}},955}956initializerBytes, err := proto.Marshal(initializer)957Expect(err).ToNot(HaveOccurred())958959return &workspacev1.Workspace{960TypeMeta: metav1.TypeMeta{961APIVersion: "workspace.gitpod.io/v1",962Kind: "Workspace",963},964ObjectMeta: metav1.ObjectMeta{965Name: name,966Namespace: namespace,967Finalizers: []string{workspacev1.GitpodFinalizerName},968Labels: map[string]string{969wsk8s.WorkspaceManagedByLabel: constants.ManagedBy,970},971},972Spec: workspacev1.WorkspaceSpec{973Ownership: workspacev1.Ownership{974Owner: "foobar",975WorkspaceID: "cool-workspace",976},977Type: workspacev1.WorkspaceTypeRegular,978Class: "default",979Image: workspacev1.WorkspaceImages{980Workspace: workspacev1.WorkspaceImage{981Ref: ptr.String("alpine:latest"),982},983IDE: workspacev1.IDEImages{984Refs: []string{},985},986},987Ports: []workspacev1.PortSpec{},988Initializer: initializerBytes,989Admission: workspacev1.AdmissionSpec{990Level: workspacev1.AdmissionLevelEveryone,991},992},993}994}995996func createSecret(name, namespace string) *corev1.Secret {997GinkgoHelper()998999By(fmt.Sprintf("creating secret %s", name))1000secret := &corev1.Secret{1001ObjectMeta: metav1.ObjectMeta{1002Name: name,1003Namespace: namespace,1004},1005StringData: map[string]string{1006"git": "pod",1007},1008}10091010Expect(k8sClient.Create(ctx, secret)).To(Succeed())1011Eventually(func() error {1012return k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, secret)1013}, timeout, interval).Should(Succeed())10141015return secret1016}10171018type metricCounts struct {1019starts int1020creatingCounts int1021startFailures int1022failures int1023stops map[StopReason]int1024recreations map[int]int1025backups int1026backupFailures int1027restores int1028restoreFailures int1029}10301031// collectHistCount is a hack to get the value of the histogram's sample count.1032// testutil.ToFloat64() does not accept histograms.1033func collectHistCount(h prometheus.Histogram) uint64 {1034GinkgoHelper()1035pb := &dto.Metric{}1036Expect(h.Write(pb)).To(Succeed())1037return pb.Histogram.GetSampleCount()1038}10391040var stopReasons = []StopReason{StopReasonFailed, StopReasonStartFailure, StopReasonAborted, StopReasonOutOfSpace, StopReasonTimeout, StopReasonTabClosed, StopReasonRegular}10411042func collectMetricCounts(wsMetrics *controllerMetrics, ws *workspacev1.Workspace) metricCounts {1043tpe := string(ws.Spec.Type)1044cls := ws.Spec.Class1045startHist := wsMetrics.startupTimeHistVec.WithLabelValues(tpe, cls).(prometheus.Histogram)1046creatingHist := wsMetrics.creatingTimeHistVec.WithLabelValues(tpe, cls).(prometheus.Histogram)1047stopCounts := make(map[StopReason]int)1048for _, reason := range stopReasons {1049stopCounts[reason] = int(testutil.ToFloat64(wsMetrics.totalStopsCounterVec.WithLabelValues(string(reason), tpe, cls)))1050}1051recreations := make(map[int]int)1052for _, attempts := range []int{1, 2, 3, 4, 5} {1053recreations[attempts] = int(testutil.ToFloat64(wsMetrics.totalRecreationsCounterVec.WithLabelValues(tpe, cls, fmt.Sprint(attempts))))1054}1055return metricCounts{1056starts: int(collectHistCount(startHist)),1057creatingCounts: int(collectHistCount(creatingHist)),1058startFailures: int(testutil.ToFloat64(wsMetrics.totalStartsFailureCounterVec.WithLabelValues(tpe, cls))),1059failures: int(testutil.ToFloat64(wsMetrics.totalFailuresCounterVec.WithLabelValues(tpe, cls))),1060stops: stopCounts,1061recreations: recreations,1062backups: int(testutil.ToFloat64(wsMetrics.totalBackupCounterVec.WithLabelValues(tpe, cls))),1063backupFailures: int(testutil.ToFloat64(wsMetrics.totalBackupFailureCounterVec.WithLabelValues(tpe, cls))),1064restores: int(testutil.ToFloat64(wsMetrics.totalRestoreCounterVec.WithLabelValues(tpe, cls))),1065restoreFailures: int(testutil.ToFloat64(wsMetrics.totalRestoreFailureCounterVec.WithLabelValues(tpe, cls))),1066}1067}10681069func expectMetricsDelta(initial metricCounts, cur metricCounts, expectedDelta metricCounts) {1070GinkgoHelper()1071By("checking metrics have been recorded")1072Expect(cur.starts-initial.starts).To(Equal(expectedDelta.starts), "expected metric count delta for starts")1073Expect(cur.creatingCounts-initial.creatingCounts).To(Equal(expectedDelta.creatingCounts), "expected metric count delta for creating count")1074Expect(cur.startFailures-initial.startFailures).To(Equal(expectedDelta.startFailures), "expected metric count delta for startFailures")1075Expect(cur.failures-initial.failures).To(Equal(expectedDelta.failures), "expected metric count delta for failures")1076for _, reason := range stopReasons {1077Expect(cur.stops[reason]-initial.stops[reason]).To(Equal(expectedDelta.stops[reason]), "expected metric count delta for stops with reason %s", reason)1078}1079Expect(cur.backups-initial.backups).To(Equal(expectedDelta.backups), "expected metric count delta for backups")1080Expect(cur.backupFailures-initial.backupFailures).To(Equal(expectedDelta.backupFailures), "expected metric count delta for backupFailures")1081Expect(cur.restores-initial.restores).To(Equal(expectedDelta.restores), "expected metric count delta for restores")1082Expect(cur.restoreFailures-initial.restoreFailures).To(Equal(expectedDelta.restoreFailures), "expected metric count delta for restoreFailures")1083}10841085type subscriberUpdates struct {1086phaseTransitions []workspacev1.WorkspacePhase1087}10881089func collectSubscriberUpdates() *subscriberUpdates {1090su := subscriberUpdates{}1091recordPhaseTransition := func(su *subscriberUpdates, ws *workspacev1.Workspace) {1092phase := ws.Status.Phase10931094var lastPhase workspacev1.WorkspacePhase1095lenPhases := len(su.phaseTransitions)1096if lenPhases > 0 {1097lastPhase = su.phaseTransitions[lenPhases-1]1098}10991100if lastPhase != phase {1101su.phaseTransitions = append(su.phaseTransitions, phase)1102}1103}11041105RegisterSubscriber(func(ws *workspacev1.Workspace) {1106recordPhaseTransition(&su, ws)1107})1108return &su1109}11101111func expectPhaseTransitions(su *subscriberUpdates, expectation []workspacev1.WorkspacePhase) {1112GinkgoHelper()1113By("checking recorded phase transitions")1114Expect(su.phaseTransitions).To(HaveExactElements(expectation), "expected list of recorded phase transitions")1115}111611171118