Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gitpod-io
GitHub Repository: gitpod-io/gitpod
Path: blob/main/components/ws-manager-mk2/controllers/timeout_controller.go
2498 views
1
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.
2
// Licensed under the GNU Affero General Public License (AGPL).
3
// See License-AGPL.txt in the project root for license information.
4
5
package controllers
6
7
import (
8
"context"
9
"fmt"
10
"time"
11
12
corev1 "k8s.io/api/core/v1"
13
apierrors "k8s.io/apimachinery/pkg/api/errors"
14
"k8s.io/apimachinery/pkg/types"
15
"k8s.io/client-go/tools/record"
16
"k8s.io/client-go/util/retry"
17
ctrl "sigs.k8s.io/controller-runtime"
18
"sigs.k8s.io/controller-runtime/pkg/client"
19
"sigs.k8s.io/controller-runtime/pkg/controller"
20
"sigs.k8s.io/controller-runtime/pkg/log"
21
"sigs.k8s.io/controller-runtime/pkg/predicate"
22
23
k8s "github.com/gitpod-io/gitpod/common-go/kubernetes"
24
"github.com/gitpod-io/gitpod/common-go/util"
25
"github.com/gitpod-io/gitpod/ws-manager-mk2/pkg/activity"
26
"github.com/gitpod-io/gitpod/ws-manager-mk2/pkg/constants"
27
"github.com/gitpod-io/gitpod/ws-manager-mk2/pkg/maintenance"
28
config "github.com/gitpod-io/gitpod/ws-manager/api/config"
29
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
30
"github.com/go-logr/logr"
31
)
32
33
func NewTimeoutReconciler(c client.Client, recorder record.EventRecorder, cfg config.Configuration, maintenance maintenance.Maintenance) (*TimeoutReconciler, error) {
34
if cfg.HeartbeatInterval == 0 {
35
return nil, fmt.Errorf("invalid heartbeat interval, must not be 0")
36
}
37
reconcileInterval := time.Duration(cfg.HeartbeatInterval)
38
// Reconcile interval is half the heartbeat interval to catch timed out workspaces in time.
39
// See https://en.wikipedia.org/wiki/Nyquist%E2%80%93Shannon_sampling_theorem why we need this.
40
reconcileInterval /= 2
41
42
return &TimeoutReconciler{
43
Client: c,
44
Config: cfg,
45
reconcileInterval: reconcileInterval,
46
recorder: recorder,
47
maintenance: maintenance,
48
}, nil
49
}
50
51
// TimeoutReconciler reconciles workspace timeouts. This is a separate reconciler, as it
52
// always requeues events for existing workspaces such that timeouts are checked on (at least)
53
// a specified interval. The reconcile loop should therefore be light-weight as it's repeatedly
54
// reconciling all workspaces in the cluster.
55
type TimeoutReconciler struct {
56
client.Client
57
58
Config config.Configuration
59
reconcileInterval time.Duration
60
recorder record.EventRecorder
61
maintenance maintenance.Maintenance
62
}
63
64
//+kubebuilder:rbac:groups=workspace.gitpod.io,resources=workspaces,verbs=get;list;watch;create;update;patch;delete
65
//+kubebuilder:rbac:groups=workspace.gitpod.io,resources=workspaces/status,verbs=get;update;patch
66
67
// Reconcile will check the given workspace for timing out. When done, a new event gets
68
// requeued automatically to ensure the workspace gets reconciled at least every reconcileInterval.
69
func (r *TimeoutReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, err error) {
70
log := log.FromContext(ctx).WithValues("ws", req.NamespacedName)
71
72
var workspace workspacev1.Workspace
73
if err := r.Get(ctx, req.NamespacedName, &workspace); err != nil {
74
if !apierrors.IsNotFound(err) {
75
log.Error(err, "unable to fetch workspace")
76
}
77
// We'll ignore not-found errors, since they can't be fixed by an immediate
78
// requeue (we'll need to wait for a new notification), and we can get them
79
// on deleted requests.
80
// On any other error, let the controller requeue an event with exponential
81
// backoff.
82
return ctrl.Result{}, client.IgnoreNotFound(err)
83
}
84
log = log.WithValues("owi", workspace.OWI())
85
ctx = logr.NewContext(ctx, log)
86
87
if workspace.IsConditionTrue(workspacev1.WorkspaceConditionTimeout) {
88
// Workspace has already been marked as timed out.
89
// Return and don't requeue another reconciliation.
90
return ctrl.Result{}, nil
91
}
92
93
if r.maintenance.IsEnabled(ctx) {
94
// Don't reconcile timeouts in maintenance mode, to prevent workspace deletion.
95
// Requeue after some time to ensure we do still reconcile this workspace when
96
// maintenance mode ends.
97
return ctrl.Result{RequeueAfter: maintenanceRequeue}, nil
98
}
99
100
// The workspace hasn't timed out yet. After this point, we always
101
// want to requeue a reconciliation after the configured interval.
102
defer func() {
103
result.RequeueAfter = r.reconcileInterval
104
}()
105
106
timedout := r.isWorkspaceTimedOut(&workspace)
107
if timedout == "" {
108
// Hasn't timed out.
109
return ctrl.Result{}, nil
110
}
111
112
// Workspace timed out, set Timeout condition.
113
log.V(2).Info("Workspace timed out", "reason", timedout)
114
if err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
115
err := r.Get(ctx, types.NamespacedName{Name: workspace.Name, Namespace: workspace.Namespace}, &workspace)
116
if err != nil {
117
return err
118
}
119
120
workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionTimeout(timedout))
121
return r.Status().Update(ctx, &workspace)
122
}); err != nil {
123
log.Error(err, "Failed to update workspace status with Timeout condition")
124
return ctrl.Result{}, fmt.Errorf("failed to add timeout condition: %w", err)
125
}
126
127
r.recorder.Event(&workspace, corev1.EventTypeNormal, "TimedOut", timedout)
128
return ctrl.Result{}, nil
129
}
130
131
type timeoutActivity string
132
133
const (
134
activityInit timeoutActivity = "initialization"
135
activityStartup timeoutActivity = "startup"
136
activityCreatingContainers timeoutActivity = "creating containers"
137
activityPullingImages timeoutActivity = "pulling images"
138
activityRunningHeadless timeoutActivity = "running the headless workspace"
139
activityNone timeoutActivity = "period of inactivity"
140
activityMaxLifetime timeoutActivity = "maximum lifetime"
141
activityClosed timeoutActivity = "after being closed"
142
activityInterrupted timeoutActivity = "workspace interruption"
143
activityStopping timeoutActivity = "stopping"
144
activityBackup timeoutActivity = "backup"
145
)
146
147
// isWorkspaceTimedOut determines if a workspace is timed out based on the manager configuration and state the pod is in.
148
// This function does NOT use the Timeout condition, but rather is used to set that condition in the first place.
149
func (r *TimeoutReconciler) isWorkspaceTimedOut(ws *workspacev1.Workspace) (reason string) {
150
timeouts := r.Config.Timeouts
151
phase := ws.Status.Phase
152
153
decide := func(start time.Time, timeout util.Duration, activity timeoutActivity) string {
154
td := time.Duration(timeout)
155
inactivity := time.Since(start)
156
if inactivity < td {
157
return ""
158
}
159
160
return fmt.Sprintf("workspace timed out after %s (%s) took longer than %s", activity, formatDuration(inactivity), formatDuration(td))
161
}
162
163
start := ws.ObjectMeta.CreationTimestamp.Time
164
lastActivity := activity.Last(ws)
165
isClosed := ws.IsConditionTrue(workspacev1.WorkspaceConditionClosed)
166
167
switch phase {
168
case workspacev1.WorkspacePhasePending:
169
return decide(start, timeouts.Initialization, activityInit)
170
171
case workspacev1.WorkspacePhaseInitializing:
172
return decide(start, timeouts.TotalStartup, activityStartup)
173
174
case workspacev1.WorkspacePhaseCreating:
175
activity := activityCreatingContainers
176
// TODO:
177
// if status.Conditions.PullingImages == api.WorkspaceConditionBool_TRUE {
178
// activity = activityPullingImages
179
// }
180
return decide(start, timeouts.TotalStartup, activity)
181
182
case workspacev1.WorkspacePhaseRunning:
183
// First check is always for the max lifetime
184
maxLifetime := r.getMaxLifetime(ws)
185
if msg := decide(start, maxLifetime, activityMaxLifetime); msg != "" {
186
return msg
187
}
188
189
timeout := timeouts.RegularWorkspace
190
if customTimeout := ws.Spec.Timeout.Time; customTimeout != nil {
191
timeout = util.Duration(customTimeout.Duration)
192
}
193
activity := activityNone
194
if ws.IsHeadless() {
195
timeout = timeouts.HeadlessWorkspace
196
lastActivity = &start
197
activity = activityRunningHeadless
198
} else if lastActivity == nil {
199
// The workspace is up and running, but the user has never produced any activity
200
return decide(start, timeouts.TotalStartup, activityNone)
201
} else if isClosed {
202
reason := func() string {
203
afterClosed := timeouts.AfterClose
204
if customClosedTimeout := ws.Spec.Timeout.ClosedTimeout; customClosedTimeout != nil {
205
afterClosed = util.Duration(customClosedTimeout.Duration)
206
if afterClosed == 0 {
207
return ""
208
}
209
}
210
return decide(*lastActivity, afterClosed, activityClosed)
211
}()
212
if reason != "" {
213
return reason
214
}
215
}
216
return decide(*lastActivity, timeout, activity)
217
218
case workspacev1.WorkspacePhaseStopping:
219
if isWorkspaceBeingDeleted(ws) && !ws.IsConditionTrue(workspacev1.WorkspaceConditionBackupComplete) {
220
// Beware: we apply the ContentFinalization timeout only to workspaces which are currently being deleted.
221
// We basically don't expect a workspace to be in content finalization before it's been deleted.
222
return decide(ws.DeletionTimestamp.Time, timeouts.ContentFinalization, activityBackup)
223
} else if !isWorkspaceBeingDeleted(ws) {
224
// workspaces that have not been deleted have never timed out
225
return ""
226
} else {
227
return decide(ws.DeletionTimestamp.Time, timeouts.Stopping, activityStopping)
228
}
229
230
default:
231
// The only other phases we can be in is stopped which is pointless to time out
232
return ""
233
}
234
}
235
236
func (r *TimeoutReconciler) getMaxLifetime(ws *workspacev1.Workspace) util.Duration {
237
if ws.Spec.Timeout.MaximumLifetime != nil {
238
return util.Duration(ws.Spec.Timeout.MaximumLifetime.Duration)
239
}
240
241
return r.Config.Timeouts.MaxLifetime
242
}
243
244
func formatDuration(d time.Duration) string {
245
d = d.Round(time.Minute)
246
h := d / time.Hour
247
d -= h * time.Hour
248
m := d / time.Minute
249
return fmt.Sprintf("%02dh%02dm", h, m)
250
}
251
252
// SetupWithManager sets up the controller with the Manager.
253
func (r *TimeoutReconciler) SetupWithManager(mgr ctrl.Manager) error {
254
maxConcurrentReconciles := r.Config.TimeoutMaxConcurrentReconciles
255
if maxConcurrentReconciles <= 0 {
256
maxConcurrentReconciles = 1
257
}
258
259
return ctrl.NewControllerManagedBy(mgr).
260
Named("timeout").
261
WithOptions(controller.Options{MaxConcurrentReconciles: maxConcurrentReconciles}).
262
For(&workspacev1.Workspace{}).
263
WithEventFilter(predicate.NewPredicateFuncs(func(object client.Object) bool {
264
for k, v := range object.GetLabels() {
265
if k == k8s.WorkspaceManagedByLabel {
266
switch v {
267
case constants.ManagedBy:
268
return true
269
default:
270
return false
271
}
272
}
273
}
274
275
return true
276
})).
277
Complete(r)
278
}
279
280