Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gitpod-io
GitHub Repository: gitpod-io/gitpod
Path: blob/main/components/ws-manager-mk2/controllers/workspace_controller_test.go
2498 views
1
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.
2
// Licensed under the GNU Affero General Public License (AGPL).
3
// See License-AGPL.txt in the project root for license information.
4
5
package controllers
6
7
import (
8
"fmt"
9
10
"github.com/aws/smithy-go/ptr"
11
"github.com/google/uuid"
12
. "github.com/onsi/ginkgo/v2"
13
. "github.com/onsi/gomega"
14
"github.com/prometheus/client_golang/prometheus"
15
"github.com/prometheus/client_golang/prometheus/testutil"
16
dto "github.com/prometheus/client_model/go"
17
"google.golang.org/protobuf/proto"
18
corev1 "k8s.io/api/core/v1"
19
"k8s.io/apimachinery/pkg/api/errors"
20
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21
"k8s.io/apimachinery/pkg/types"
22
ctrl "sigs.k8s.io/controller-runtime"
23
"sigs.k8s.io/controller-runtime/pkg/client"
24
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
25
26
wsk8s "github.com/gitpod-io/gitpod/common-go/kubernetes"
27
csapi "github.com/gitpod-io/gitpod/content-service/api"
28
"github.com/gitpod-io/gitpod/ws-manager-mk2/pkg/constants"
29
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
30
)
31
32
var _ = Describe("WorkspaceController", func() {
33
Context("with regular workspaces", func() {
34
It("should handle successful workspace creation and stop request", func() {
35
name := uuid.NewString()
36
37
envSecret := createSecret(fmt.Sprintf("%s-env", name), "default")
38
tokenSecret := createSecret(fmt.Sprintf("%s-tokens", name), secretsNamespace)
39
40
ws := newWorkspace(name, "default")
41
m := collectMetricCounts(wsMetrics, ws)
42
pod := createWorkspaceExpectPod(ws)
43
44
Expect(controllerutil.ContainsFinalizer(pod, workspacev1.GitpodFinalizerName)).To(BeTrue())
45
46
By("controller updating the pod starts value")
47
Eventually(func() (int, error) {
48
err := k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)
49
if err != nil {
50
return 0, err
51
}
52
return ws.Status.PodStarts, nil
53
}, timeout, interval).Should(Equal(1))
54
55
// Deployed condition should be added.
56
expectConditionEventually(ws, string(workspacev1.WorkspaceConditionDeployed), metav1.ConditionTrue, "")
57
58
// Runtime status should be set.
59
expectRuntimeStatus(ws, pod)
60
61
By("controller setting status after creation")
62
Eventually(func(g Gomega) {
63
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
64
g.Expect(ws.Status.OwnerToken).ToNot(BeEmpty())
65
g.Expect(ws.Status.URL).ToNot(BeEmpty())
66
}, timeout, interval).Should(Succeed())
67
68
// Transition Pod to pending, and expect workspace to reach Creating phase.
69
// This should also cause create time metrics to be recorded.
70
updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {
71
pod.Status.Phase = corev1.PodPending
72
pod.Status.ContainerStatuses = []corev1.ContainerStatus{{
73
State: corev1.ContainerState{
74
Waiting: &corev1.ContainerStateWaiting{
75
Reason: "ContainerCreating",
76
},
77
},
78
Name: "workspace",
79
}}
80
})
81
82
expectPhaseEventually(ws, workspacev1.WorkspacePhaseCreating)
83
84
// Transition Pod to running, and expect workspace to reach Running phase.
85
// This should also cause e.g. startup time metrics to be recorded.
86
updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {
87
pod.Status.Phase = corev1.PodRunning
88
pod.Status.ContainerStatuses = []corev1.ContainerStatus{{
89
Name: "workspace",
90
Ready: true,
91
}}
92
})
93
94
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
95
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionContentReady(metav1.ConditionTrue, workspacev1.ReasonInitializationSuccess, ""))
96
})
97
98
expectPhaseEventually(ws, workspacev1.WorkspacePhaseRunning)
99
expectSecretCleanup(envSecret)
100
expectSecretCleanup(tokenSecret)
101
102
markReady(ws)
103
104
requestStop(ws)
105
106
expectFinalizerAndMarkBackupCompleted(ws, pod)
107
108
expectWorkspaceCleanup(ws, pod)
109
110
By("checking pod doesn't get recreated by controller")
111
Consistently(func() error {
112
return checkNotFound(pod)
113
}, duration, interval).Should(Succeed(), "pod came back")
114
115
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
116
starts: 1,
117
creatingCounts: 1,
118
restores: 1,
119
stops: map[StopReason]int{StopReasonRegular: 1},
120
backups: 1,
121
})
122
})
123
124
It("should handle content init failure", func() {
125
ws := newWorkspace(uuid.NewString(), "default")
126
m := collectMetricCounts(wsMetrics, ws)
127
pod := createWorkspaceExpectPod(ws)
128
129
By("adding ws init failure condition")
130
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
131
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionContentReady(metav1.ConditionFalse, workspacev1.ReasonInitializationFailure, "some failure"))
132
})
133
134
// On init failure, expect workspace cleans up without a backup.
135
expectWorkspaceCleanup(ws, pod)
136
137
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
138
startFailures: 1,
139
failures: 1,
140
restoreFailures: 1,
141
stops: map[StopReason]int{StopReasonStartFailure: 1},
142
})
143
})
144
145
It("should not take a backup if content init did not happen", func() {
146
ws := newWorkspace(uuid.NewString(), "default")
147
m := collectMetricCounts(wsMetrics, ws)
148
pod := createWorkspaceExpectPod(ws)
149
150
requestStop(ws)
151
152
// No content init, expect cleanup without backup.
153
expectWorkspaceCleanup(ws, pod)
154
155
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
156
startFailures: 0, // No start failure should be recorded, even though the workspace didn't become ready, as it was stopped before it could become ready.
157
stops: map[StopReason]int{StopReasonRegular: 1},
158
})
159
})
160
161
It("should handle backup failure", func() {
162
ws := newWorkspace(uuid.NewString(), "default")
163
m := collectMetricCounts(wsMetrics, ws)
164
pod := createWorkspaceExpectPod(ws)
165
166
markReady(ws)
167
168
// Stop the workspace.
169
requestStop(ws)
170
171
// Indicate the backup failed.
172
expectFinalizerAndMarkBackupFailed(ws, pod)
173
174
// Workspace should get cleaned up.
175
expectWorkspaceCleanup(ws, pod)
176
177
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
178
restores: 1,
179
backups: 1,
180
backupFailures: 1,
181
failures: 1,
182
stops: map[StopReason]int{StopReasonFailed: 1},
183
})
184
})
185
186
It("should handle workspace failure", func() {
187
ws := newWorkspace(uuid.NewString(), "default")
188
m := collectMetricCounts(wsMetrics, ws)
189
pod := createWorkspaceExpectPod(ws)
190
191
markReady(ws)
192
193
// Update Pod with failed exit status.
194
updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {
195
pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, corev1.ContainerStatus{
196
LastTerminationState: corev1.ContainerState{
197
Terminated: &corev1.ContainerStateTerminated{
198
ExitCode: 1,
199
Message: "Error",
200
},
201
},
202
})
203
})
204
205
// Controller should detect container exit and add Failed condition.
206
expectConditionEventually(ws, string(workspacev1.WorkspaceConditionFailed), metav1.ConditionTrue, "")
207
208
expectFinalizerAndMarkBackupCompleted(ws, pod)
209
210
expectWorkspaceCleanup(ws, pod)
211
212
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
213
restores: 1,
214
startFailures: 0,
215
failures: 1,
216
stops: map[StopReason]int{StopReasonFailed: 1},
217
backups: 1,
218
})
219
})
220
221
It("should handle workspace failure with unknown exit code", func() {
222
ws := newWorkspace(uuid.NewString(), "default")
223
m := collectMetricCounts(wsMetrics, ws)
224
pod := createWorkspaceExpectPod(ws)
225
226
markReady(ws)
227
228
// Update Pod with failed exit status.
229
updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {
230
pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, corev1.ContainerStatus{
231
LastTerminationState: corev1.ContainerState{
232
Terminated: &corev1.ContainerStateTerminated{
233
ExitCode: containerUnknownExitCode,
234
},
235
},
236
})
237
})
238
239
// Controller should detect container exit and add Failed condition.
240
expectConditionEventually(ws, string(workspacev1.WorkspaceConditionFailed), metav1.ConditionTrue, "")
241
242
expectFinalizerAndMarkBackupCompleted(ws, pod)
243
244
expectWorkspaceCleanup(ws, pod)
245
246
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
247
restores: 1,
248
startFailures: 0,
249
failures: 1,
250
stops: map[StopReason]int{StopReasonFailed: 1},
251
backups: 1,
252
})
253
})
254
255
It("should clean up timed out workspaces", func() {
256
ws := newWorkspace(uuid.NewString(), "default")
257
m := collectMetricCounts(wsMetrics, ws)
258
pod := createWorkspaceExpectPod(ws)
259
260
markReady(ws)
261
262
By("adding Timeout condition")
263
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
264
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionTimeout(""))
265
})
266
267
expectFinalizerAndMarkBackupCompleted(ws, pod)
268
269
expectWorkspaceCleanup(ws, pod)
270
271
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
272
restores: 1,
273
stops: map[StopReason]int{StopReasonTimeout: 1},
274
backups: 1,
275
})
276
})
277
278
It("should handle workspace abort", func() {
279
ws := newWorkspace(uuid.NewString(), "default")
280
m := collectMetricCounts(wsMetrics, ws)
281
pod := createWorkspaceExpectPod(ws)
282
283
markReady(ws)
284
285
// Update Pod with stop and abort conditions.
286
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
287
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionAborted(""))
288
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionStoppedByRequest(""))
289
})
290
291
// Expect cleanup without a backup.
292
expectWorkspaceCleanup(ws, pod)
293
294
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
295
restores: 1,
296
stops: map[StopReason]int{StopReasonAborted: 1},
297
})
298
})
299
300
It("deleting workspace resource should gracefully clean up", func() {
301
name := uuid.NewString()
302
ws := newWorkspace(name, "default")
303
304
envSecret := createSecret(fmt.Sprintf("%s-env", name), "default")
305
tokenSecret := createSecret(fmt.Sprintf("%s-tokens", name), secretsNamespace)
306
307
m := collectMetricCounts(wsMetrics, ws)
308
pod := createWorkspaceExpectPod(ws)
309
310
markReady(ws)
311
312
Expect(k8sClient.Delete(ctx, ws)).To(Succeed())
313
314
expectPhaseEventually(ws, workspacev1.WorkspacePhaseStopping)
315
316
expectFinalizerAndMarkBackupCompleted(ws, pod)
317
318
expectWorkspaceCleanup(ws, pod)
319
320
expectSecretCleanup(envSecret)
321
expectSecretCleanup(tokenSecret)
322
323
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
324
restores: 1,
325
stops: map[StopReason]int{StopReasonRegular: 1},
326
backups: 1,
327
})
328
})
329
330
It("node disappearing should fail with backup failure", func() {
331
ws := newWorkspace(uuid.NewString(), "default")
332
m := collectMetricCounts(wsMetrics, ws)
333
334
// Simulate pod getting scheduled to a node.
335
var node corev1.Node
336
node.Name = uuid.NewString()
337
Expect(k8sClient.Create(ctx, &node)).To(Succeed())
338
// Manually create the workspace pod with the node name.
339
// We can't update the pod with the node name, as this operation
340
// is only allowed for the scheduler. So as a hack, we manually
341
// create the workspace's pod.
342
pod := &corev1.Pod{
343
ObjectMeta: metav1.ObjectMeta{
344
Name: fmt.Sprintf("ws-%s", ws.Name),
345
Namespace: ws.Namespace,
346
Finalizers: []string{workspacev1.GitpodFinalizerName},
347
Labels: map[string]string{
348
wsk8s.WorkspaceManagedByLabel: constants.ManagedBy,
349
},
350
},
351
Spec: corev1.PodSpec{
352
NodeName: node.Name,
353
Containers: []corev1.Container{{
354
Name: "workspace",
355
Image: "someimage",
356
}},
357
},
358
}
359
360
Expect(k8sClient.Create(ctx, pod)).To(Succeed())
361
pod = createWorkspaceExpectPod(ws)
362
updateObjWithRetries(k8sClient, pod, false, func(pod *corev1.Pod) {
363
Expect(ctrl.SetControllerReference(ws, pod, k8sClient.Scheme())).To(Succeed())
364
})
365
// Wait until controller has reconciled at least once (by waiting for the runtime status to get updated).
366
// This is necessary for the metrics to get recorded correctly. If we don't wait, the first reconciliation
367
// might be once the Pod is already in a running state, and hence the metric state might not record e.g. content
368
// restore.
369
// This is only necessary because we manually created the pod, normally the Pod creation is the controller's
370
// first reconciliation which ensures the metrics are recorded from the workspace's initial state.
371
372
Eventually(func(g Gomega) {
373
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
374
g.Expect(ws.Status.Runtime).ToNot(BeNil())
375
g.Expect(ws.Status.Runtime.PodName).To(Equal(pod.Name))
376
}, timeout, interval).Should(Succeed())
377
378
markReady(ws)
379
380
// Make node disappear 🪄
381
By("deleting node")
382
Expect(k8sClient.Delete(ctx, &node)).To(Succeed())
383
384
// Expect workspace to disappear, with a backup failure.
385
// NOTE: Can't use expectWorkspaceCleanup() here, as the pod never disappears in envtest due to a nodeName being set.
386
// Therefore, we only verify deletion timestamps are set and all finalizers are removed, which in a real cluster
387
// would cause the pod and workspace to disappear.
388
By("workspace and pod finalizers being removed and deletion timestamps set")
389
Eventually(func() error {
390
if err := k8sClient.Get(ctx, types.NamespacedName{Name: pod.GetName(), Namespace: pod.GetNamespace()}, pod); err != nil {
391
if !errors.IsNotFound(err) {
392
return err
393
}
394
} else {
395
if len(pod.ObjectMeta.Finalizers) > 0 {
396
return fmt.Errorf("pod still has finalizers: %v", pod.ObjectMeta.Finalizers)
397
}
398
if pod.DeletionTimestamp == nil {
399
return fmt.Errorf("pod deletion timestamp not set")
400
}
401
}
402
403
if err := k8sClient.Get(ctx, types.NamespacedName{Name: ws.GetName(), Namespace: ws.GetNamespace()}, ws); err != nil {
404
if !errors.IsNotFound(err) {
405
return err
406
}
407
} else {
408
if ws.Status.Phase != workspacev1.WorkspacePhaseStopped {
409
return fmt.Errorf("workspace phase did not reach Stopped, was %s", ws.Status.Phase)
410
}
411
// Can't check for workspace finalizer removal and deletionTimestamp being set,
412
// as this only happens once all pods are gone, and the pod never disappears in this test.
413
}
414
return nil
415
}, timeout, interval).Should(Succeed(), "pod/workspace not cleaned up")
416
417
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
418
restores: 1,
419
backups: 1,
420
backupFailures: 1,
421
failures: 1,
422
stops: map[StopReason]int{StopReasonFailed: 1},
423
})
424
})
425
426
It("pod rejection should result in a retry", func() {
427
ws := newWorkspace(uuid.NewString(), "default")
428
m := collectMetricCounts(wsMetrics, ws)
429
su := collectSubscriberUpdates()
430
431
// ### prepare block start
432
By("creating workspace")
433
// Simulate pod getting scheduled to a node.
434
var node corev1.Node
435
node.Name = uuid.NewString()
436
Expect(k8sClient.Create(ctx, &node)).To(Succeed())
437
// Manually create the workspace pod with the node name.
438
// We can't update the pod with the node name, as this operation
439
// is only allowed for the scheduler. So as a hack, we manually
440
// create the workspace's pod.
441
pod := &corev1.Pod{
442
ObjectMeta: metav1.ObjectMeta{
443
Name: fmt.Sprintf("ws-%s", ws.Name),
444
Namespace: ws.Namespace,
445
Finalizers: []string{workspacev1.GitpodFinalizerName},
446
Labels: map[string]string{
447
wsk8s.WorkspaceManagedByLabel: constants.ManagedBy,
448
},
449
},
450
Spec: corev1.PodSpec{
451
NodeName: node.Name,
452
Containers: []corev1.Container{{
453
Name: "workspace",
454
Image: "someimage",
455
}},
456
},
457
}
458
459
Expect(k8sClient.Create(ctx, pod)).To(Succeed())
460
pod = createWorkspaceExpectPod(ws)
461
updateObjWithRetries(k8sClient, pod, false, func(pod *corev1.Pod) {
462
Expect(ctrl.SetControllerReference(ws, pod, k8sClient.Scheme())).To(Succeed())
463
})
464
// mimic the regular "start" phase
465
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
466
ws.Status.PodStarts = 1
467
ws.Status.PodRecreated = 0
468
})
469
470
// Wait until controller has reconciled at least once (by waiting for the runtime status to get updated).
471
// This is necessary for the metrics to get recorded correctly. If we don't wait, the first reconciliation
472
// might be once the Pod is already in a running state, and hence the metric state might not record e.g. content
473
// restore.
474
// This is only necessary because we manually created the pod, normally the Pod creation is the controller's
475
// first reconciliation which ensures the metrics are recorded from the workspace's initial state.
476
477
Eventually(func(g Gomega) {
478
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
479
g.Expect(ws.Status.Runtime).ToNot(BeNil())
480
g.Expect(ws.Status.Runtime.PodName).To(Equal(pod.Name))
481
}, timeout, interval).Should(Succeed())
482
483
// Await "deployed" condition, and check we are good
484
expectConditionEventually(ws, string(workspacev1.WorkspaceConditionDeployed), metav1.ConditionTrue, "")
485
Eventually(func(g Gomega) {
486
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
487
g.Expect(ws.Status.PodStarts).To(Equal(1))
488
g.Expect(ws.Status.PodRecreated).To(Equal(0))
489
}, timeout, interval).Should(Succeed())
490
491
// ### prepare block end
492
493
// ### trigger block start
494
// Make pod be rejected 🪄
495
By("rejecting pod")
496
rejectPod(pod)
497
498
By("await pod being in stopping")
499
Eventually(func(g Gomega) {
500
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
501
g.Expect(ws.Status.Phase).To(Equal(workspacev1.WorkspacePhaseStopping))
502
}, timeout, interval).Should(Succeed())
503
504
// when a rejected workspace pod is in stopping, ws-daemon wipes the state before it's moved to "stopped"
505
// mimic this ws-daemon behavior
506
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
507
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionStateWiped("", metav1.ConditionTrue))
508
})
509
510
By("await pod recreation")
511
Eventually(func(g Gomega) {
512
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
513
g.Expect(ws.Status.PodRecreated).To(Equal(1))
514
g.Expect(ws.Status.Phase).To(Equal(workspacev1.WorkspacePhasePending))
515
}, timeout, interval).Should(Succeed())
516
// ### trigger block end
517
518
// ### retry block start
519
// Transition Pod to pending, and expect workspace to reach Creating phase.
520
// This should also cause create time metrics to be recorded.
521
updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {
522
pod.Status.Phase = corev1.PodPending
523
pod.Status.ContainerStatuses = []corev1.ContainerStatus{{
524
State: corev1.ContainerState{
525
Waiting: &corev1.ContainerStateWaiting{
526
Reason: "ContainerCreating",
527
},
528
},
529
Name: "workspace",
530
}}
531
})
532
533
expectPhaseEventually(ws, workspacev1.WorkspacePhaseCreating)
534
// ### retry block end
535
536
// ### move to running start
537
// Transition Pod to running, and expect workspace to reach Running phase.
538
// This should also cause e.g. startup time metrics to be recorded.
539
updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {
540
pod.Status.Phase = corev1.PodRunning
541
pod.Status.ContainerStatuses = []corev1.ContainerStatus{{
542
Name: "workspace",
543
Ready: true,
544
}}
545
})
546
547
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
548
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionContentReady(metav1.ConditionTrue, workspacev1.ReasonInitializationSuccess, ""))
549
})
550
551
expectPhaseEventually(ws, workspacev1.WorkspacePhaseRunning)
552
// ### move to running end
553
554
// ### validate start
555
Eventually(func(g Gomega) {
556
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
557
g.Expect(ws.Status.PodStarts).To(Equal(2))
558
g.Expect(ws.Status.PodRecreated).To(Equal(1))
559
}, timeout, interval).Should(Succeed())
560
561
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
562
restores: 1,
563
backups: 0,
564
backupFailures: 0,
565
failures: 1,
566
creatingCounts: 1,
567
stops: map[StopReason]int{StopReasonStartFailure: 1},
568
starts: 1, // this is NOT PodStarts, but merely an artifact of how we count it in the tests
569
recreations: map[int]int{1: 1},
570
})
571
572
expectPhaseTransitions(su, []workspacev1.WorkspacePhase{workspacev1.WorkspacePhasePending, workspacev1.WorkspacePhaseCreating, workspacev1.WorkspacePhaseInitializing, workspacev1.WorkspacePhaseRunning})
573
// ### validate end
574
})
575
})
576
577
Context("with headless workspaces", func() {
578
It("should handle headless task failure", func() {
579
ws, pod, m := createHeadlessWorkspace(workspacev1.WorkspaceTypePrebuild)
580
581
updateObjWithRetries(k8sClient, pod, true, func(p *corev1.Pod) {
582
p.Status.Phase = corev1.PodFailed
583
p.Status.ContainerStatuses = []corev1.ContainerStatus{
584
{
585
Name: "workspace",
586
State: corev1.ContainerState{
587
Terminated: &corev1.ContainerStateTerminated{
588
Message: headlessTaskFailedPrefix,
589
ExitCode: 5,
590
},
591
},
592
},
593
}
594
})
595
596
expectFinalizerAndMarkBackupCompleted(ws, pod)
597
expectWorkspaceCleanup(ws, pod)
598
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
599
restores: 1,
600
backups: 1,
601
backupFailures: 0,
602
failures: 0,
603
stops: map[StopReason]int{StopReasonRegular: 1},
604
})
605
})
606
607
It("should handle successful prebuild", func() {
608
ws, pod, m := createHeadlessWorkspace(workspacev1.WorkspaceTypePrebuild)
609
updateObjWithRetries(k8sClient, pod, true, func(p *corev1.Pod) {
610
p.Status.Phase = corev1.PodSucceeded
611
})
612
613
expectFinalizerAndMarkBackupCompleted(ws, pod)
614
expectWorkspaceCleanup(ws, pod)
615
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
616
restores: 1,
617
backups: 1,
618
backupFailures: 0,
619
failures: 0,
620
stops: map[StopReason]int{StopReasonRegular: 1},
621
})
622
})
623
624
It("should handle failed prebuild", func() {
625
ws, pod, m := createHeadlessWorkspace(workspacev1.WorkspaceTypePrebuild)
626
updateObjWithRetries(k8sClient, pod, true, func(p *corev1.Pod) {
627
p.Status.Phase = corev1.PodFailed
628
p.Status.ContainerStatuses = []corev1.ContainerStatus{
629
{
630
Name: "workspace",
631
State: corev1.ContainerState{
632
Terminated: &corev1.ContainerStateTerminated{
633
Message: "prebuild failed",
634
ExitCode: 5,
635
},
636
},
637
},
638
}
639
})
640
641
expectFinalizerAndMarkBackupCompleted(ws, pod)
642
expectWorkspaceCleanup(ws, pod)
643
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
644
restores: 1,
645
backups: 1,
646
backupFailures: 0,
647
failures: 1,
648
stops: map[StopReason]int{StopReasonFailed: 1},
649
})
650
})
651
652
It("should handle aborted prebuild", func() {
653
ws, pod, m := createHeadlessWorkspace(workspacev1.WorkspaceTypePrebuild)
654
// abort workspace
655
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
656
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionAborted("StopWorkspaceRequest"))
657
})
658
659
requestStop(ws)
660
661
// should not take a backup
662
expectWorkspaceCleanup(ws, pod)
663
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
664
restores: 1,
665
backups: 0,
666
backupFailures: 0,
667
failures: 0,
668
stops: map[StopReason]int{StopReasonAborted: 1},
669
})
670
})
671
672
It("should handle imagebuild", func() {
673
ws, pod, m := createHeadlessWorkspace(workspacev1.WorkspaceTypeImageBuild)
674
updateObjWithRetries(k8sClient, pod, true, func(p *corev1.Pod) {
675
p.Status.Phase = corev1.PodSucceeded
676
})
677
678
// should not take a backup
679
expectWorkspaceCleanup(ws, pod)
680
expectMetricsDelta(m, collectMetricCounts(wsMetrics, ws), metricCounts{
681
restores: 1,
682
backups: 0,
683
backupFailures: 0,
684
failures: 0,
685
stops: map[StopReason]int{StopReasonRegular: 1},
686
})
687
})
688
})
689
})
690
691
func createHeadlessWorkspace(typ workspacev1.WorkspaceType) (ws *workspacev1.Workspace, pod *corev1.Pod, m metricCounts) {
692
name := uuid.NewString()
693
694
ws = newWorkspace(name, "default")
695
ws.Spec.Type = typ
696
m = collectMetricCounts(wsMetrics, ws)
697
pod = createWorkspaceExpectPod(ws)
698
699
// Expect headless
700
Expect(ws.IsHeadless()).To(BeTrue())
701
Expect(controllerutil.ContainsFinalizer(pod, workspacev1.GitpodFinalizerName)).To(BeTrue())
702
703
// Expect runtime status also gets reported for headless workspaces.
704
expectRuntimeStatus(ws, pod)
705
706
By("controller setting status after creation")
707
Eventually(func(g Gomega) {
708
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
709
g.Expect(ws.Status.OwnerToken).ToNot(BeEmpty())
710
g.Expect(ws.Status.URL).ToNot(BeEmpty())
711
}, timeout, interval).Should(Succeed())
712
713
markReady(ws)
714
return
715
}
716
717
func updateObjWithRetries[O client.Object](c client.Client, obj O, updateStatus bool, update func(obj O)) {
718
GinkgoHelper()
719
Eventually(func() error {
720
err := c.Get(ctx, types.NamespacedName{
721
Name: obj.GetName(),
722
Namespace: obj.GetNamespace(),
723
}, obj)
724
if err != nil {
725
return err
726
}
727
728
// Apply update.
729
update(obj)
730
731
if updateStatus {
732
return c.Status().Update(ctx, obj)
733
}
734
735
return c.Update(ctx, obj)
736
}, timeout, interval).Should(Succeed())
737
}
738
739
// createWorkspaceExpectPod creates the workspace resource, and expects
740
// the controller to eventually create the workspace Pod. The created Pod
741
// is returned.
742
func createWorkspaceExpectPod(ws *workspacev1.Workspace) *corev1.Pod {
743
GinkgoHelper()
744
By("creating workspace")
745
Expect(k8sClient.Create(ctx, ws)).To(Succeed())
746
747
By("controller creating workspace pod")
748
pod := &corev1.Pod{}
749
var podPrefix string
750
switch ws.Spec.Type {
751
case workspacev1.WorkspaceTypeRegular:
752
podPrefix = "ws"
753
case workspacev1.WorkspaceTypePrebuild:
754
podPrefix = "prebuild"
755
case workspacev1.WorkspaceTypeImageBuild:
756
podPrefix = "imagebuild"
757
}
758
Eventually(func() error {
759
return k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s", podPrefix, ws.Name), Namespace: ws.Namespace}, pod)
760
}, timeout, interval).Should(Succeed())
761
return pod
762
}
763
764
func expectPhaseEventually(ws *workspacev1.Workspace, phase workspacev1.WorkspacePhase) {
765
GinkgoHelper()
766
By(fmt.Sprintf("controller transition workspace phase to %s", phase))
767
Eventually(func(g Gomega) {
768
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
769
g.Expect(ws.Status.Phase).To(Equal(phase))
770
}, timeout, interval).Should(Succeed())
771
}
772
773
func expectConditionEventually(ws *workspacev1.Workspace, tpe string, status metav1.ConditionStatus, reason string) {
774
GinkgoHelper()
775
By(fmt.Sprintf("controller setting workspace condition %s to %s", tpe, status))
776
Eventually(func(g Gomega) {
777
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
778
c := wsk8s.GetCondition(ws.Status.Conditions, tpe)
779
g.Expect(c).ToNot(BeNil(), fmt.Sprintf("expected condition %s to be present", tpe))
780
g.Expect(c.Status).To(Equal(status))
781
if reason != "" {
782
g.Expect(c.Reason).To(Equal(reason))
783
}
784
}, timeout, interval).Should(Succeed())
785
}
786
787
func expectRuntimeStatus(ws *workspacev1.Workspace, pod *corev1.Pod) {
788
GinkgoHelper()
789
By("artificially setting the pod's status")
790
// Since there are no Pod controllers running in the EnvTest cluster to populate the Pod status,
791
// we artificially update the created Pod's status here, and verify later that the workspace
792
// controller reconciles this and puts it in the workspace status.
793
var (
794
hostIP = "1.2.3.4"
795
podIP = "10.0.0.0"
796
)
797
updateObjWithRetries(k8sClient, pod, true, func(p *corev1.Pod) {
798
p.Status.HostIP = hostIP
799
p.Status.PodIP = podIP
800
})
801
802
By("controller adding pod status to the workspace status")
803
Eventually(func(g Gomega) {
804
g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ws.Name, Namespace: ws.Namespace}, ws)).To(Succeed())
805
g.Expect(ws.Status.Runtime).ToNot(BeNil())
806
g.Expect(ws.Status.Runtime.HostIP).To(Equal(hostIP))
807
g.Expect(ws.Status.Runtime.PodIP).To(Equal(podIP))
808
g.Expect(ws.Status.Runtime.PodName).To(Equal(pod.Name))
809
}, timeout, interval).Should(Succeed())
810
}
811
812
func requestStop(ws *workspacev1.Workspace) {
813
GinkgoHelper()
814
By("adding stop signal")
815
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
816
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionStoppedByRequest(""))
817
})
818
}
819
820
func rejectPod(pod *corev1.Pod) {
821
GinkgoHelper()
822
By("adding pod rejected condition")
823
updateObjWithRetries(k8sClient, pod, true, func(pod *corev1.Pod) {
824
pod.Status.Phase = corev1.PodFailed
825
pod.Status.Reason = "OutOfcpu"
826
pod.Status.Message = "Pod was rejected"
827
})
828
}
829
830
func markReady(ws *workspacev1.Workspace) {
831
GinkgoHelper()
832
By("adding content ready condition")
833
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
834
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionContentReady(metav1.ConditionTrue, workspacev1.ReasonInitializationSuccess, ""))
835
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionEverReady())
836
})
837
}
838
839
func expectFinalizerAndMarkBackupCompleted(ws *workspacev1.Workspace, pod *corev1.Pod) {
840
GinkgoHelper()
841
// Checking for the finalizer enforces our expectation that the workspace
842
// should be waiting for a backup to be taken.
843
By("checking finalizer exists for backup")
844
Consistently(func() (bool, error) {
845
if err := k8sClient.Get(ctx, types.NamespacedName{Name: pod.GetName(), Namespace: pod.GetNamespace()}, pod); err != nil {
846
return false, err
847
}
848
return controllerutil.ContainsFinalizer(pod, workspacev1.GitpodFinalizerName), nil
849
}, duration, interval).Should(BeTrue(), "missing gitpod finalizer on pod, expected one to wait for backup to succeed")
850
851
By("signalling backup completed")
852
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
853
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionBackupComplete())
854
})
855
}
856
857
func expectFinalizerAndMarkBackupFailed(ws *workspacev1.Workspace, pod *corev1.Pod) {
858
GinkgoHelper()
859
// Checking for the finalizer enforces our expectation that the workspace
860
// should be waiting for a backup to be taken (or fail).
861
By("checking finalizer exists for backup")
862
Consistently(func() (bool, error) {
863
if err := k8sClient.Get(ctx, types.NamespacedName{Name: pod.GetName(), Namespace: pod.GetNamespace()}, pod); err != nil {
864
return false, err
865
}
866
return controllerutil.ContainsFinalizer(pod, workspacev1.GitpodFinalizerName), nil
867
}, duration, interval).Should(BeTrue(), "missing gitpod finalizer on pod, expected one to wait for backup to succeed")
868
869
By("signalling backup failed")
870
updateObjWithRetries(k8sClient, ws, true, func(ws *workspacev1.Workspace) {
871
ws.Status.SetCondition(workspacev1.NewWorkspaceConditionBackupFailure(""))
872
})
873
}
874
875
func expectWorkspaceCleanup(ws *workspacev1.Workspace, pod *corev1.Pod) {
876
GinkgoHelper()
877
By("controller removing pod finalizers")
878
Eventually(func() (int, error) {
879
if err := k8sClient.Get(ctx, types.NamespacedName{Name: pod.GetName(), Namespace: pod.GetNamespace()}, pod); err != nil {
880
if errors.IsNotFound(err) {
881
// Race: finalizers got removed causing pod to get deleted before we could check.
882
// This is what we want though.
883
return 0, nil
884
}
885
return 0, err
886
}
887
return len(pod.ObjectMeta.Finalizers), nil
888
889
}, timeout, interval).Should(Equal(0), "pod finalizers did not go away")
890
891
By("cleaning up the workspace pod")
892
Eventually(func() error {
893
return checkNotFound(pod)
894
}, timeout, interval).Should(Succeed(), "pod did not go away")
895
896
By("controller removing workspace finalizers")
897
Eventually(func() (int, error) {
898
if err := k8sClient.Get(ctx, types.NamespacedName{Name: ws.GetName(), Namespace: ws.GetNamespace()}, ws); err != nil {
899
if errors.IsNotFound(err) {
900
// Race: finalizers got removed causing workspace to get deleted before we could check.
901
// This is what we want though.
902
return 0, nil
903
}
904
return 0, err
905
}
906
return len(ws.ObjectMeta.Finalizers), nil
907
908
}, timeout, interval).Should(Equal(0), "workspace finalizers did not go away")
909
910
By("cleaning up the workspace resource")
911
Eventually(func(g Gomega) error {
912
if err := checkNotFound(ws); err == nil {
913
return nil
914
}
915
g.Expect(ws.Status.Phase).To(Equal(workspacev1.WorkspacePhaseStopped))
916
return fmt.Errorf("workspace is Stopped, but hasn't been deleted yet")
917
}, timeout, interval).Should(Succeed(), "workspace did not go away")
918
}
919
920
func expectSecretCleanup(secret *corev1.Secret) {
921
GinkgoHelper()
922
923
By("controller deleting secrets")
924
Eventually(func() (int, error) {
925
var s corev1.Secret
926
if err := k8sClient.Get(ctx, types.NamespacedName{Name: secret.GetName(), Namespace: secret.GetNamespace()}, &s); err != nil {
927
if errors.IsNotFound(err) {
928
return 0, nil
929
}
930
return 1, err
931
}
932
return 1, nil
933
934
}, timeout, interval).Should(Equal(0), "environment secret has not been deleted")
935
}
936
937
// checkNotFound returns nil if the object does not exist.
938
// Otherwise, it returns an error.
939
func checkNotFound(obj client.Object) error {
940
err := k8sClient.Get(ctx, types.NamespacedName{Name: obj.GetName(), Namespace: obj.GetNamespace()}, obj)
941
if err == nil {
942
// Object exists, return as an error.
943
return fmt.Errorf("object exists")
944
}
945
if errors.IsNotFound(err) {
946
// Object doesn't exist, this is what we want.
947
return nil
948
}
949
return err
950
}
951
952
func newWorkspace(name, namespace string) *workspacev1.Workspace {
953
GinkgoHelper()
954
initializer := &csapi.WorkspaceInitializer{
955
Spec: &csapi.WorkspaceInitializer_Empty{Empty: &csapi.EmptyInitializer{}},
956
}
957
initializerBytes, err := proto.Marshal(initializer)
958
Expect(err).ToNot(HaveOccurred())
959
960
return &workspacev1.Workspace{
961
TypeMeta: metav1.TypeMeta{
962
APIVersion: "workspace.gitpod.io/v1",
963
Kind: "Workspace",
964
},
965
ObjectMeta: metav1.ObjectMeta{
966
Name: name,
967
Namespace: namespace,
968
Finalizers: []string{workspacev1.GitpodFinalizerName},
969
Labels: map[string]string{
970
wsk8s.WorkspaceManagedByLabel: constants.ManagedBy,
971
},
972
},
973
Spec: workspacev1.WorkspaceSpec{
974
Ownership: workspacev1.Ownership{
975
Owner: "foobar",
976
WorkspaceID: "cool-workspace",
977
},
978
Type: workspacev1.WorkspaceTypeRegular,
979
Class: "default",
980
Image: workspacev1.WorkspaceImages{
981
Workspace: workspacev1.WorkspaceImage{
982
Ref: ptr.String("alpine:latest"),
983
},
984
IDE: workspacev1.IDEImages{
985
Refs: []string{},
986
},
987
},
988
Ports: []workspacev1.PortSpec{},
989
Initializer: initializerBytes,
990
Admission: workspacev1.AdmissionSpec{
991
Level: workspacev1.AdmissionLevelEveryone,
992
},
993
},
994
}
995
}
996
997
func createSecret(name, namespace string) *corev1.Secret {
998
GinkgoHelper()
999
1000
By(fmt.Sprintf("creating secret %s", name))
1001
secret := &corev1.Secret{
1002
ObjectMeta: metav1.ObjectMeta{
1003
Name: name,
1004
Namespace: namespace,
1005
},
1006
StringData: map[string]string{
1007
"git": "pod",
1008
},
1009
}
1010
1011
Expect(k8sClient.Create(ctx, secret)).To(Succeed())
1012
Eventually(func() error {
1013
return k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, secret)
1014
}, timeout, interval).Should(Succeed())
1015
1016
return secret
1017
}
1018
1019
type metricCounts struct {
1020
starts int
1021
creatingCounts int
1022
startFailures int
1023
failures int
1024
stops map[StopReason]int
1025
recreations map[int]int
1026
backups int
1027
backupFailures int
1028
restores int
1029
restoreFailures int
1030
}
1031
1032
// collectHistCount is a hack to get the value of the histogram's sample count.
1033
// testutil.ToFloat64() does not accept histograms.
1034
func collectHistCount(h prometheus.Histogram) uint64 {
1035
GinkgoHelper()
1036
pb := &dto.Metric{}
1037
Expect(h.Write(pb)).To(Succeed())
1038
return pb.Histogram.GetSampleCount()
1039
}
1040
1041
var stopReasons = []StopReason{StopReasonFailed, StopReasonStartFailure, StopReasonAborted, StopReasonOutOfSpace, StopReasonTimeout, StopReasonTabClosed, StopReasonRegular}
1042
1043
func collectMetricCounts(wsMetrics *controllerMetrics, ws *workspacev1.Workspace) metricCounts {
1044
tpe := string(ws.Spec.Type)
1045
cls := ws.Spec.Class
1046
startHist := wsMetrics.startupTimeHistVec.WithLabelValues(tpe, cls).(prometheus.Histogram)
1047
creatingHist := wsMetrics.creatingTimeHistVec.WithLabelValues(tpe, cls).(prometheus.Histogram)
1048
stopCounts := make(map[StopReason]int)
1049
for _, reason := range stopReasons {
1050
stopCounts[reason] = int(testutil.ToFloat64(wsMetrics.totalStopsCounterVec.WithLabelValues(string(reason), tpe, cls)))
1051
}
1052
recreations := make(map[int]int)
1053
for _, attempts := range []int{1, 2, 3, 4, 5} {
1054
recreations[attempts] = int(testutil.ToFloat64(wsMetrics.totalRecreationsCounterVec.WithLabelValues(tpe, cls, fmt.Sprint(attempts))))
1055
}
1056
return metricCounts{
1057
starts: int(collectHistCount(startHist)),
1058
creatingCounts: int(collectHistCount(creatingHist)),
1059
startFailures: int(testutil.ToFloat64(wsMetrics.totalStartsFailureCounterVec.WithLabelValues(tpe, cls))),
1060
failures: int(testutil.ToFloat64(wsMetrics.totalFailuresCounterVec.WithLabelValues(tpe, cls))),
1061
stops: stopCounts,
1062
recreations: recreations,
1063
backups: int(testutil.ToFloat64(wsMetrics.totalBackupCounterVec.WithLabelValues(tpe, cls))),
1064
backupFailures: int(testutil.ToFloat64(wsMetrics.totalBackupFailureCounterVec.WithLabelValues(tpe, cls))),
1065
restores: int(testutil.ToFloat64(wsMetrics.totalRestoreCounterVec.WithLabelValues(tpe, cls))),
1066
restoreFailures: int(testutil.ToFloat64(wsMetrics.totalRestoreFailureCounterVec.WithLabelValues(tpe, cls))),
1067
}
1068
}
1069
1070
func expectMetricsDelta(initial metricCounts, cur metricCounts, expectedDelta metricCounts) {
1071
GinkgoHelper()
1072
By("checking metrics have been recorded")
1073
Expect(cur.starts-initial.starts).To(Equal(expectedDelta.starts), "expected metric count delta for starts")
1074
Expect(cur.creatingCounts-initial.creatingCounts).To(Equal(expectedDelta.creatingCounts), "expected metric count delta for creating count")
1075
Expect(cur.startFailures-initial.startFailures).To(Equal(expectedDelta.startFailures), "expected metric count delta for startFailures")
1076
Expect(cur.failures-initial.failures).To(Equal(expectedDelta.failures), "expected metric count delta for failures")
1077
for _, reason := range stopReasons {
1078
Expect(cur.stops[reason]-initial.stops[reason]).To(Equal(expectedDelta.stops[reason]), "expected metric count delta for stops with reason %s", reason)
1079
}
1080
Expect(cur.backups-initial.backups).To(Equal(expectedDelta.backups), "expected metric count delta for backups")
1081
Expect(cur.backupFailures-initial.backupFailures).To(Equal(expectedDelta.backupFailures), "expected metric count delta for backupFailures")
1082
Expect(cur.restores-initial.restores).To(Equal(expectedDelta.restores), "expected metric count delta for restores")
1083
Expect(cur.restoreFailures-initial.restoreFailures).To(Equal(expectedDelta.restoreFailures), "expected metric count delta for restoreFailures")
1084
}
1085
1086
type subscriberUpdates struct {
1087
phaseTransitions []workspacev1.WorkspacePhase
1088
}
1089
1090
func collectSubscriberUpdates() *subscriberUpdates {
1091
su := subscriberUpdates{}
1092
recordPhaseTransition := func(su *subscriberUpdates, ws *workspacev1.Workspace) {
1093
phase := ws.Status.Phase
1094
1095
var lastPhase workspacev1.WorkspacePhase
1096
lenPhases := len(su.phaseTransitions)
1097
if lenPhases > 0 {
1098
lastPhase = su.phaseTransitions[lenPhases-1]
1099
}
1100
1101
if lastPhase != phase {
1102
su.phaseTransitions = append(su.phaseTransitions, phase)
1103
}
1104
}
1105
1106
RegisterSubscriber(func(ws *workspacev1.Workspace) {
1107
recordPhaseTransition(&su, ws)
1108
})
1109
return &su
1110
}
1111
1112
func expectPhaseTransitions(su *subscriberUpdates, expectation []workspacev1.WorkspacePhase) {
1113
GinkgoHelper()
1114
By("checking recorded phase transitions")
1115
Expect(su.phaseTransitions).To(HaveExactElements(expectation), "expected list of recorded phase transitions")
1116
}
1117
1118