CoCalc -- markunmount.go

GitHub Repository: gitpod-io/gitpod
Path: blob/main/components/ws-daemon/pkg/daemon/markunmount.go
²⁵⁰¹ views
1
// Copyright (c) 2021 Gitpod GmbH. All rights reserved.
2
// Licensed under the GNU Affero General Public License (AGPL).
3
// See License.AGPL.txt in the project root for license information.
4

5
package daemon
6

7
import (
8
	"bufio"
9
	"bytes"
10
	"context"
11
	"errors"
12
	"io/ioutil"
13
	"path/filepath"
14
	"strings"
15
	"sync"
16
	"time"
17

18
	"golang.org/x/sync/errgroup"
19
	"golang.org/x/sys/unix"
20
	"golang.org/x/xerrors"
21
	"k8s.io/apimachinery/pkg/util/wait"
22
	"k8s.io/client-go/util/retry"
23

24
	"github.com/gitpod-io/gitpod/common-go/log"
25
	"github.com/gitpod-io/gitpod/ws-daemon/pkg/dispatch"
26
	"github.com/prometheus/client_golang/prometheus"
27
)
28

29
const (
30
	// propagationGracePeriod is the time we allow on top of a container's deletionGracePeriod
31
	// to make sure the changes propagate on the data plane.
32
	propagationGracePeriod = 10 * time.Second
33
)
34

35
// NewMarkUnmountFallback produces a new MarkUnmountFallback. reg can be nil
36
func NewMarkUnmountFallback(reg prometheus.Registerer) (*MarkUnmountFallback, error) {
37
	counter := prometheus.NewCounterVec(prometheus.CounterOpts{
38
		Name: "markunmountfallback_active_total",
39
		Help: "counts how often the mark unmount fallback was active",
40
	}, []string{"successful"})
41
	if reg != nil {
42
		err := reg.Register(counter)
43
		if err != nil {
44
			return nil, err
45
		}
46
	}
47

48
	return &MarkUnmountFallback{
49
		activityCounter: counter,
50
	}, nil
51
}
52

53
// MarkUnmountFallback works around the mount propagation of the ring1 FS mark mount.
54
// When ws-daemon restarts runc propagates all rootfs mounts to ws-daemon's mount namespace.
55
// This prevents proper unmounting of the mark mount, hence the rootfs of the workspace container.
56
//
57
// To work around this issue we wait pod.terminationGracePeriod + propagationGracePeriod and,
58
// after which we attempt to unmount the mark mount.
59
//
60
// Some clusters might run an older version of containerd, for which we build this workaround.
61
type MarkUnmountFallback struct {
62
	mu      sync.Mutex
63
	handled map[string]struct{}
64

65
	activityCounter *prometheus.CounterVec
66
}
67

68
// WorkspaceAdded does nothing but implemented the dispatch.Listener interface
69
func (c *MarkUnmountFallback) WorkspaceAdded(ctx context.Context, ws *dispatch.Workspace) error {
70
	return nil
71
}
72

73
// WorkspaceUpdated gets called when a workspace pod is updated. For containers being deleted, we'll check
74
// if they're still running after their terminationGracePeriod and if Kubernetes still knows about them.
75
func (c *MarkUnmountFallback) WorkspaceUpdated(ctx context.Context, ws *dispatch.Workspace) error {
76
	if ws.Pod.DeletionTimestamp == nil {
77
		return nil
78
	}
79

80
	err := func() error {
81
		c.mu.Lock()
82
		defer c.mu.Unlock()
83

84
		if c.handled == nil {
85
			c.handled = make(map[string]struct{})
86
		}
87
		if _, exists := c.handled[ws.InstanceID]; exists {
88
			return nil
89
		}
90
		c.handled[ws.InstanceID] = struct{}{}
91
		return nil
92
	}()
93
	if err != nil {
94
		return err
95
	}
96

97
	var gracePeriod int64
98
	if ws.Pod.DeletionGracePeriodSeconds != nil {
99
		gracePeriod = *ws.Pod.DeletionGracePeriodSeconds
100
	} else {
101
		gracePeriod = 30
102
	}
103
	ttl := time.Duration(gracePeriod)*time.Second + propagationGracePeriod
104

105
	dispatch.GetDispatchWaitGroup(ctx).Add(1)
106
	go func() {
107
		defer dispatch.GetDispatchWaitGroup(ctx).Done()
108

109
		defer func() {
110
			// We expect the container to be gone now. Don't keep its referenec in memory.
111
			c.mu.Lock()
112
			delete(c.handled, ws.InstanceID)
113
			c.mu.Unlock()
114
		}()
115

116
		wait := time.NewTicker(ttl)
117
		defer wait.Stop()
118
		select {
119
		case <-ctx.Done():
120
			return
121
		case <-wait.C:
122
		}
123

124
		dsp := dispatch.GetFromContext(ctx)
125
		if !dsp.WorkspaceExistsOnNode(ws.InstanceID) {
126
			// container is already gone - all is well
127
			return
128
		}
129

130
		err := unmountMark(ws.InstanceID)
131
		if err != nil && errors.Is(err, context.Canceled) {
132
			log.WithFields(ws.OWI()).WithError(err).Error("cannot unmount mark mount from within ws-daemon")
133
			c.activityCounter.WithLabelValues("false").Inc()
134
		} else {
135
			c.activityCounter.WithLabelValues("true").Inc()
136
		}
137
	}()
138

139
	return nil
140
}
141

142
// if the mark mount still exists in /proc/mounts it means we failed to unmount it and
143
// we cannot remove the content. As a side effect the pod will stay in Terminating state
144
func unmountMark(instanceID string) error {
145
	mounts, err := ioutil.ReadFile("/proc/mounts")
146
	if err != nil {
147
		return xerrors.Errorf("cannot read /proc/mounts: %w", err)
148
	}
149

150
	dir := instanceID + "-daemon"
151
	path := fromPartialMount(filepath.Join(dir, "mark"), mounts)
152
	// empty path means no mount found
153
	if len(path) == 0 {
154
		return nil
155
	}
156

157
	// in some scenarios we need to wait for the unmount
158
	var canRetryFn = func(err error) bool {
159
		if !strings.Contains(err.Error(), "device or resource busy") {
160
			log.WithError(err).WithFields(log.OWI("", "", instanceID)).Info("Will not retry unmount mark")
161
		}
162
		return strings.Contains(err.Error(), "device or resource busy")
163
	}
164

165
	var eg errgroup.Group
166
	for _, p := range path {
167
		// add p as closure so that we can use it inside the Go routine.
168
		p := p
169
		eg.Go(func() error {
170
			return retry.OnError(wait.Backoff{
171
				Steps:    5,
172
				Duration: 1 * time.Second,
173
				Factor:   5.0,
174
				Jitter:   0.1,
175
			}, canRetryFn, func() error {
176
				return unix.Unmount(p, 0)
177
			})
178
		})
179
	}
180
	return eg.Wait()
181
}
182

183
func fromPartialMount(path string, info []byte) (res []string) {
184
	scanner := bufio.NewScanner(bytes.NewReader(info))
185
	for scanner.Scan() {
186
		mount := strings.Split(scanner.Text(), " ")
187
		if len(mount) < 2 {
188
			continue
189
		}
190

191
		if strings.Contains(mount[1], path) {
192
			res = append(res, mount[1])
193
		}
194
	}
195

196
	return res
197
}
198

199
Product

Resources

Company