Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gitpod-io
GitHub Repository: gitpod-io/gitpod
Path: blob/main/components/workspacekit/cmd/rings.go
2498 views
1
// Copyright (c) 2020 Gitpod GmbH. All rights reserved.
2
// Licensed under the GNU Affero General Public License (AGPL).
3
// See License.AGPL.txt in the project root for license information.
4
5
package cmd
6
7
import (
8
"bufio"
9
"bytes"
10
"context"
11
"encoding/json"
12
"errors"
13
"fmt"
14
"io"
15
"io/fs"
16
"io/ioutil"
17
"net"
18
"os"
19
"os/exec"
20
"os/signal"
21
"path/filepath"
22
"runtime"
23
"strconv"
24
"strings"
25
"syscall"
26
"time"
27
28
"github.com/rootless-containers/rootlesskit/pkg/msgutil"
29
"github.com/rootless-containers/rootlesskit/pkg/sigproxy"
30
sigproxysignal "github.com/rootless-containers/rootlesskit/pkg/sigproxy/signal"
31
libseccomp "github.com/seccomp/libseccomp-golang"
32
"github.com/spf13/cobra"
33
"golang.org/x/sys/unix"
34
"golang.org/x/xerrors"
35
"google.golang.org/grpc"
36
"google.golang.org/grpc/codes"
37
"google.golang.org/grpc/credentials/insecure"
38
"google.golang.org/grpc/status"
39
40
common_grpc "github.com/gitpod-io/gitpod/common-go/grpc"
41
"github.com/gitpod-io/gitpod/common-go/log"
42
"github.com/gitpod-io/gitpod/workspacekit/pkg/lift"
43
"github.com/gitpod-io/gitpod/workspacekit/pkg/seccomp"
44
"github.com/gitpod-io/gitpod/ws-daemon/api"
45
daemonapi "github.com/gitpod-io/gitpod/ws-daemon/api"
46
)
47
48
const (
49
// ring1ShutdownTimeout is the time ring1 gets between SIGTERM and SIGKILL.
50
// We do this to ensure we have enough time left for ring0 to clean up prior
51
// to receiving SIGKILL from the kubelet.
52
//
53
// This time must give ring1 enough time to shut down (see time budgets in supervisor.go),
54
// and to talk to ws-daemon within the terminationGracePeriod of the workspace pod.
55
ring1ShutdownTimeout = 20 * time.Second
56
57
// ring2StartupTimeout is the maximum time we wait between starting ring2 and its
58
// attempt to connect to the parent socket.
59
ring2StartupTimeout = 5 * time.Second
60
)
61
62
var ring0Cmd = &cobra.Command{
63
Use: "ring0",
64
Short: "starts ring0 - enter here",
65
Run: func(_ *cobra.Command, args []string) {
66
log.Init(ServiceName, Version, true, false)
67
68
wsid := os.Getenv("GITPOD_WORKSPACE_ID")
69
if wsid == "" {
70
log.Error("cannot find GITPOD_WORKSPACE_ID")
71
return
72
}
73
74
log := log.WithField("ring", 0).WithField("workspaceId", wsid)
75
76
common_grpc.SetupLogging()
77
78
exitCode := 1
79
defer handleExit(&exitCode)
80
81
defer log.Info("ring0 stopped")
82
83
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
84
defer cancel()
85
86
client, err := connectToInWorkspaceDaemonService(ctx)
87
if err != nil {
88
log.WithError(err).Error("cannot connect to daemon from ring0")
89
return
90
}
91
92
prep, err := client.PrepareForUserNS(ctx, &daemonapi.PrepareForUserNSRequest{})
93
if err != nil {
94
log.WithError(err).Fatal("cannot prepare for user namespaces")
95
return
96
}
97
client.Close()
98
99
defer func() {
100
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
101
defer cancel()
102
103
client, err := connectToInWorkspaceDaemonService(ctx)
104
if err != nil {
105
log.WithError(err).Error("cannot connect to daemon from ring0 in defer")
106
return
107
}
108
defer client.Close()
109
110
_, err = client.Teardown(ctx, &daemonapi.TeardownRequest{})
111
if err != nil {
112
log.WithError(err).Error("cannot trigger teardown")
113
}
114
}()
115
116
cmd := exec.Command("/proc/self/exe", "ring1")
117
cmd.SysProcAttr = &syscall.SysProcAttr{
118
Pdeathsig: syscall.SIGKILL,
119
Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS | unix.CLONE_NEWCGROUP,
120
}
121
cmd.Stdin = os.Stdin
122
cmd.Stdout = os.Stdout
123
cmd.Stderr = os.Stderr
124
cmd.Env = append(os.Environ(),
125
"WORKSPACEKIT_FSSHIFT="+prep.FsShift.String(),
126
)
127
128
if err := cmd.Start(); err != nil {
129
log.WithError(err).Error("failed to start ring0")
130
return
131
}
132
133
sigc := make(chan os.Signal, 128)
134
signal.Notify(sigc)
135
go func() {
136
defer func() {
137
// This is a 'just in case' fallback, in case we're racing the cmd.Process and it's become
138
// nil in the time since we checked.
139
err := recover()
140
if err != nil {
141
log.WithField("recovered", err).Error("recovered from panic")
142
}
143
}()
144
145
for {
146
sig := <-sigc
147
if sig != unix.SIGTERM {
148
_ = cmd.Process.Signal(sig)
149
continue
150
}
151
152
_ = cmd.Process.Signal(unix.SIGTERM)
153
time.Sleep(ring1ShutdownTimeout)
154
if cmd.Process == nil || cmd.ProcessState.Exited() {
155
return
156
}
157
158
log.Warn("ring1 did not shut down in time - sending sigkill")
159
err = cmd.Process.Kill()
160
if err != nil {
161
if isProcessAlreadyFinished(err) {
162
err = nil
163
return
164
}
165
166
log.WithError(err).Error("cannot kill ring1")
167
}
168
return
169
}
170
}()
171
172
err = cmd.Wait()
173
if eerr, ok := err.(*exec.ExitError); ok {
174
state, ok := eerr.ProcessState.Sys().(syscall.WaitStatus)
175
if ok && state.Signal() == syscall.SIGKILL {
176
log.Warn("ring1 was killed")
177
return
178
}
179
}
180
if err != nil {
181
if eerr, ok := err.(*exec.ExitError); ok {
182
exitCode = eerr.ExitCode()
183
}
184
log.WithError(err).Error("unexpected exit")
185
return
186
}
187
exitCode = 0 // once we get here everythings good
188
},
189
}
190
191
var ring1Opts struct {
192
MappingEstablished bool
193
}
194
var ring1Cmd = &cobra.Command{
195
Use: "ring1",
196
Short: "starts ring1",
197
Run: func(_cmd *cobra.Command, args []string) {
198
log.Init(ServiceName, Version, true, false)
199
200
wsid := os.Getenv("GITPOD_WORKSPACE_ID")
201
if wsid == "" {
202
log.Error("cannot find GITPOD_WORKSPACE_ID")
203
return
204
}
205
log := log.WithField("ring", 1).WithField("workspaceId", wsid)
206
207
common_grpc.SetupLogging()
208
209
exitCode := 1
210
defer handleExit(&exitCode)
211
212
defer log.Info("ring1 stopped")
213
214
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
215
defer cancel()
216
217
mapping := []*daemonapi.WriteIDMappingRequest_Mapping{
218
{ContainerId: 0, HostId: 33333, Size: 1},
219
{ContainerId: 1, HostId: 100000, Size: 65534},
220
}
221
if !ring1Opts.MappingEstablished {
222
client, err := connectToInWorkspaceDaemonService(ctx)
223
if err != nil {
224
log.WithError(err).Error("cannot connect to daemon from ring1 when mappings not established")
225
return
226
}
227
defer client.Close()
228
229
_, err = client.WriteIDMapping(ctx, &daemonapi.WriteIDMappingRequest{Pid: int64(os.Getpid()), Gid: false, Mapping: mapping})
230
if err != nil {
231
log.WithError(err).Error("cannot establish UID mapping")
232
return
233
}
234
_, err = client.WriteIDMapping(ctx, &daemonapi.WriteIDMappingRequest{Pid: int64(os.Getpid()), Gid: true, Mapping: mapping})
235
if err != nil {
236
log.WithError(err).Error("cannot establish GID mapping")
237
return
238
}
239
err = syscall.Exec("/proc/self/exe", append(os.Args, "--mapping-established"), os.Environ())
240
if err != nil {
241
log.WithError(err).Error("cannot exec /proc/self/exe")
242
return
243
}
244
245
return
246
}
247
248
// The parent calls child with Pdeathsig, but it is cleared when the UID/GID mapping is written.
249
// (see also https://github.com/rootless-containers/rootlesskit/issues/65#issuecomment-492343646).
250
//
251
// (cw) I have been able to reproduce this issue without newuidmap/newgidmap.
252
// See https://gist.github.com/csweichel/3fc9d4b0752367d4a436f969c8685c06
253
runtime.LockOSThread()
254
_ = unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0)
255
runtime.UnlockOSThread()
256
257
ring2Root, err := os.MkdirTemp("", "supervisor")
258
if err != nil {
259
log.WithError(err).Fatal("cannot create tempdir")
260
}
261
262
var fsshift api.FSShiftMethod
263
if v, ok := api.FSShiftMethod_value[os.Getenv("WORKSPACEKIT_FSSHIFT")]; !ok {
264
log.WithField("fsshift", os.Getenv("WORKSPACEKIT_FSSHIFT")).Fatal("unknown FS shift method")
265
} else {
266
fsshift = api.FSShiftMethod(v)
267
}
268
269
type mnte struct {
270
Target string
271
Source string
272
FSType string
273
Flags uintptr
274
}
275
276
var mnts []mnte
277
switch fsshift {
278
case api.FSShiftMethod_SHIFTFS:
279
mnts = append(mnts,
280
mnte{Target: "/", Source: "/.workspace/mark", FSType: "shiftfs"},
281
)
282
default:
283
log.WithField("fsshift", fsshift).Fatal("unknown FS shift method")
284
}
285
286
procMounts, err := ioutil.ReadFile("/proc/mounts")
287
if err != nil {
288
log.WithError(err).Fatal("cannot read /proc/mounts")
289
}
290
291
candidates, err := findBindMountCandidates(bytes.NewReader(procMounts), os.Readlink)
292
if err != nil {
293
log.WithError(err).Fatal("cannot detect mount candidates")
294
}
295
for _, c := range candidates {
296
mnts = append(mnts, mnte{Target: c, Flags: unix.MS_BIND | unix.MS_REC})
297
}
298
mnts = append(mnts, mnte{Target: "/tmp", Source: "tmpfs", FSType: "tmpfs"})
299
300
// If this is a cgroupv2 machine, we'll want to mount the cgroup2 FS ourselves
301
if _, err := os.Stat("/sys/fs/cgroup/cgroup.controllers"); err == nil {
302
mnts = append(mnts, mnte{Target: "/sys/fs/cgroup", Source: "tmpfs", FSType: "tmpfs"})
303
mnts = append(mnts, mnte{Target: "/sys/fs/cgroup", Source: "cgroup", FSType: "cgroup2"})
304
}
305
306
if adds := os.Getenv("GITPOD_WORKSPACEKIT_BIND_MOUNTS"); adds != "" {
307
var additionalMounts []string
308
err = json.Unmarshal([]byte(adds), &additionalMounts)
309
if err != nil {
310
log.WithError(err).Fatal("cannot unmarshal GITPOD_WORKSPACEKIT_BIND_MOUNTS")
311
}
312
for _, c := range additionalMounts {
313
mnts = append(mnts, mnte{Target: c, Flags: unix.MS_BIND | unix.MS_REC})
314
}
315
}
316
317
mnts = append(mnts,
318
mnte{Target: "/workspace", Flags: unix.MS_BIND | unix.MS_REC},
319
)
320
321
for _, m := range mnts {
322
dst := filepath.Join(ring2Root, m.Target)
323
_ = os.MkdirAll(dst, 0644)
324
325
if m.Source == "" {
326
m.Source = m.Target
327
}
328
if m.FSType == "" {
329
m.FSType = "none"
330
}
331
332
log.WithFields(map[string]interface{}{
333
"source": m.Source,
334
"target": dst,
335
"fstype": m.FSType,
336
"flags": m.Flags,
337
}).Debug("mounting new rootfs")
338
err = unix.Mount(m.Source, dst, m.FSType, m.Flags, "")
339
if err != nil {
340
log.WithError(err).WithField("dest", dst).WithField("fsType", m.FSType).Error("cannot establish mount")
341
return
342
}
343
}
344
345
// We deliberately do not bind mount `/etc/resolv.conf` and `/etc/hosts`, but instead place a copy
346
// so that users in the workspace can modify the file.
347
copyPaths := []string{"/etc/resolv.conf", "/etc/hosts"}
348
for _, fn := range copyPaths {
349
err = copyRing2Root(ring2Root, fn)
350
if err != nil {
351
log.WithError(err).Warn("cannot copy " + fn)
352
}
353
}
354
355
err = makeHostnameLocal(ring2Root)
356
if err != nil {
357
log.WithError(err).Warn("cannot make /etc/hosts hostname local")
358
}
359
360
env := make([]string, 0, len(os.Environ()))
361
for _, e := range os.Environ() {
362
if strings.HasPrefix(e, "WORKSPACEKIT_") {
363
continue
364
}
365
env = append(env, e)
366
}
367
368
env = append(env, "WORKSPACEKIT_WRAP_NETNS=true")
369
370
socketFN := filepath.Join(os.TempDir(), fmt.Sprintf("workspacekit-ring1-%d.unix", time.Now().UnixNano()))
371
skt, err := net.Listen("unix", socketFN)
372
if err != nil {
373
log.WithError(err).Error("cannot create socket for ring2")
374
return
375
}
376
defer skt.Close()
377
378
var (
379
cloneFlags uintptr = syscall.CLONE_NEWNS | syscall.CLONE_NEWPID | syscall.CLONE_NEWNET
380
)
381
382
cmd := exec.Command("/proc/self/exe", "ring2", socketFN)
383
cmd.SysProcAttr = &syscall.SysProcAttr{
384
Pdeathsig: syscall.SIGKILL,
385
Cloneflags: cloneFlags,
386
}
387
cmd.Dir = ring2Root
388
cmd.Stdin = os.Stdin
389
cmd.Stdout = os.Stdout
390
cmd.Stderr = os.Stderr
391
cmd.Env = env
392
if err := cmd.Start(); err != nil {
393
log.WithError(err).Error("failed to start the child process")
394
return
395
}
396
sigc := sigproxy.ForwardAllSignals(context.Background(), cmd.Process.Pid)
397
defer sigproxysignal.StopCatch(sigc)
398
399
procLoc := filepath.Join(ring2Root, "proc")
400
err = os.MkdirAll(procLoc, 0755)
401
if err != nil {
402
log.WithError(err).Error("cannot create directory for mounting proc")
403
return
404
}
405
406
client, err := connectToInWorkspaceDaemonService(ctx)
407
if err != nil {
408
log.WithError(err).Error("cannot connect to daemon from ring1")
409
return
410
}
411
_, err = client.MountProc(ctx, &daemonapi.MountProcRequest{
412
Target: procLoc,
413
Pid: int64(cmd.Process.Pid),
414
})
415
if err != nil {
416
client.Close()
417
log.WithError(err).Error("cannot mount proc")
418
return
419
}
420
421
_, err = client.EvacuateCGroup(ctx, &daemonapi.EvacuateCGroupRequest{})
422
if err != nil {
423
client.Close()
424
log.WithError(err).Error("cannot evacuate cgroup")
425
return
426
}
427
client.Close()
428
429
// We have to wait for ring2 to come back to us and connect to the socket we've passed along.
430
// There's a chance that ring2 crashes or misbehaves, so we don't want to wait forever, hence
431
// the someone complicated "accept" logic below.
432
// If there's a deadline that can be set somewhere that we've missed, we should be using that
433
// one instead.
434
incoming := make(chan net.Conn, 1)
435
errc := make(chan error, 1)
436
go func() {
437
defer close(incoming)
438
defer close(errc)
439
440
// Accept stops the latest when we close the socket.
441
c, err := skt.Accept()
442
if err != nil {
443
errc <- err
444
return
445
}
446
incoming <- c
447
}()
448
var ring2Conn *net.UnixConn
449
for {
450
var brek bool
451
select {
452
case err = <-errc:
453
if err != nil {
454
brek = true
455
}
456
case c := <-incoming:
457
if c == nil {
458
continue
459
}
460
ring2Conn = c.(*net.UnixConn)
461
brek = true
462
case <-time.After(ring2StartupTimeout):
463
err = xerrors.Errorf("ring2 did not connect in time")
464
brek = true
465
}
466
if brek {
467
break
468
}
469
}
470
if err != nil {
471
log.WithError(err).Error("ring2 did not connect successfully")
472
return
473
}
474
475
client, err = connectToInWorkspaceDaemonService(ctx)
476
if err != nil {
477
log.WithError(err).Error("cannot connect to daemon from ring1 after ring2")
478
return
479
}
480
_, err = client.SetupPairVeths(ctx, &daemonapi.SetupPairVethsRequest{Pid: int64(cmd.Process.Pid)})
481
if err != nil {
482
log.WithError(err).Error("cannot setup pair of veths")
483
return
484
}
485
client.Close()
486
487
log.Info("signaling to child process")
488
_, err = msgutil.MarshalToWriter(ring2Conn, ringSyncMsg{
489
Stage: 1,
490
Rootfs: ring2Root,
491
FSShift: fsshift,
492
})
493
if err != nil {
494
log.WithError(err).Error("cannot send ring sync msg to ring2")
495
return
496
}
497
498
log.Info("awaiting seccomp fd")
499
scmpfd, err := receiveSeccmpFd(ring2Conn)
500
if err != nil {
501
log.WithError(err).Error("did not receive seccomp fd from ring2")
502
return
503
}
504
505
if scmpfd == 0 {
506
log.Warn("received 0 as ring2 seccomp fd - syscall handling is broken")
507
} else {
508
handler := &seccomp.InWorkspaceHandler{
509
FD: scmpfd,
510
Daemon: func(ctx context.Context) (seccomp.InWorkspaceServiceClient, error) {
511
return connectToInWorkspaceDaemonService(ctx)
512
},
513
Ring2PID: cmd.Process.Pid,
514
Ring2Rootfs: ring2Root,
515
BindEvents: make(chan seccomp.BindEvent),
516
WorkspaceId: wsid,
517
}
518
519
stp, errchan := seccomp.Handle(scmpfd, handler, wsid)
520
defer close(stp)
521
go func() {
522
t := time.NewTicker(100 * time.Microsecond)
523
defer t.Stop()
524
for {
525
// We use the ticker to rate-limit the errors from the syscall handler.
526
// We're only handling low-frequency syscalls (e.g. mount), and don't want
527
// the handler to hog the CPU because it fails on its fd.
528
<-t.C
529
err := <-errchan
530
if err == nil {
531
return
532
}
533
log.WithError(err).Warn("syscall handler error")
534
}
535
}()
536
}
537
538
if enclave := os.Getenv("WORKSPACEKIT_RING2_ENCLAVE"); enclave != "" {
539
ecmd := exec.Command("/proc/self/exe", append([]string{"nsenter", "--target", strconv.Itoa(cmd.Process.Pid), "--mount", "--net"}, strings.Fields(enclave)...)...)
540
ecmd.Stdout = os.Stdout
541
ecmd.Stderr = os.Stderr
542
543
err := ecmd.Start()
544
if err != nil {
545
log.WithError(err).WithField("cmd", enclave).Error("cannot run enclave")
546
return
547
}
548
}
549
550
go func() {
551
err := lift.ServeLift(ctx, lift.DefaultSocketPath)
552
if err != nil {
553
log.WithError(err).Error("failed to serve ring1 command lift")
554
}
555
}()
556
557
socketPath := filepath.Join(ring2Root, ".supervisor")
558
if _, err = os.Stat(socketPath); errors.Is(err, fs.ErrNotExist) {
559
if err := os.MkdirAll(socketPath, 0644); err != nil {
560
log.Errorf("failed to create dir %v", err)
561
}
562
}
563
564
stopHook, err := startInfoService(socketPath)
565
if err != nil {
566
// workspace info is not critical, so we will not fail workspace start
567
log.Error("failed to start workspace info service")
568
}
569
defer stopHook()
570
571
err = cmd.Wait()
572
if err != nil {
573
if eerr, ok := err.(*exec.ExitError); ok {
574
exitCode = eerr.ExitCode()
575
}
576
log.WithError(err).Error("unexpected exit")
577
return
578
}
579
exitCode = 0 // once we get here everythings good
580
},
581
}
582
583
var (
584
knownMountCandidatePaths = []string{
585
"/workspace",
586
"/sys",
587
"/dev",
588
"/etc/hostname",
589
"/etc/ssl/certs/gitpod-ca.crt",
590
}
591
rejectMountPaths = map[string]struct{}{
592
"/etc/resolv.conf": {},
593
"/etc/hosts": {},
594
}
595
)
596
597
// findBindMountCandidates attempts to find bind mount candidates in the ring0 mount namespace.
598
// It does that by either checking for knownMountCandidatePaths, or after rejecting based on filesystems (e.g. cgroup or proc),
599
// checking if in the root of the mountpoint there's a `..data` symlink pointing to a file starting with `..`.
600
// That's how configMaps and secrets behave in Kubernetes.
601
//
602
// Note/Caveat: configMap or secret volumes with a subPath do not behave as described above and will not be recognised by this function.
603
//
604
// in those cases you'll want to use GITPOD_WORKSPACEKIT_BIND_MOUNTS to explicitely list those paths.
605
func findBindMountCandidates(procMounts io.Reader, readlink func(path string) (dest string, err error)) (mounts []string, err error) {
606
scanner := bufio.NewScanner(procMounts)
607
for scanner.Scan() {
608
fields := strings.Fields(scanner.Text())
609
if len(fields) < 4 {
610
continue
611
}
612
613
// accept known paths
614
var (
615
path = fields[1]
616
accept bool
617
)
618
for _, p := range knownMountCandidatePaths {
619
if p == path {
620
accept = true
621
break
622
}
623
}
624
if accept {
625
mounts = append(mounts, path)
626
continue
627
}
628
629
// reject known filesystems
630
var (
631
fs = fields[0]
632
reject bool
633
)
634
switch fs {
635
case "cgroup", "devpts", "mqueue", "shm", "proc", "sysfs", "cgroup2":
636
reject = true
637
}
638
if reject {
639
continue
640
}
641
642
// reject known paths
643
if _, ok := rejectMountPaths[path]; ok {
644
continue
645
}
646
647
// test remaining candidates if they're a Kubernetes configMap or secret
648
ln, err := readlink(filepath.Join(path, "..data"))
649
if err != nil {
650
continue
651
}
652
if !strings.HasPrefix(ln, "..") {
653
continue
654
}
655
656
mounts = append(mounts, path)
657
}
658
return mounts, scanner.Err()
659
}
660
661
// copyRing2Root copies <fn> to <ring2root>/<fn>
662
func copyRing2Root(ring2root string, fn string) error {
663
stat, err := os.Stat(fn)
664
if err != nil {
665
return err
666
}
667
668
org, err := os.Open(fn)
669
if err != nil {
670
return err
671
}
672
defer org.Close()
673
674
dst, err := os.OpenFile(filepath.Join(ring2root, fn), os.O_CREATE|os.O_TRUNC|os.O_WRONLY, stat.Mode())
675
if err != nil {
676
return err
677
}
678
defer dst.Close()
679
680
_, err = io.Copy(dst, org)
681
if err != nil {
682
return err
683
}
684
685
return nil
686
}
687
688
func makeHostnameLocal(ring2root string) error {
689
hostname, err := os.Hostname()
690
if err != nil {
691
return err
692
}
693
path := filepath.Join(ring2root, "/etc/hosts")
694
stat, err := os.Stat(path)
695
if err != nil {
696
return err
697
}
698
b, err := os.ReadFile(path)
699
if err != nil {
700
return err
701
}
702
bStr := string(b)
703
lines := strings.Split(bStr, "\n")
704
newLines := []string{}
705
for _, line := range lines {
706
fields := strings.Fields(line)
707
if len(fields) < 1 {
708
newLines = append(newLines, line)
709
continue
710
}
711
if strings.HasPrefix(fields[0], "#") {
712
newLines = append(newLines, line)
713
}
714
ip := net.ParseIP(fields[0]).To4()
715
if len(ip) != net.IPv4len {
716
continue
717
}
718
if fields[1] == hostname {
719
newLines = append(newLines, "127.0.0.1 "+hostname)
720
} else {
721
newLines = append(newLines, line)
722
}
723
}
724
return os.WriteFile(path, []byte(strings.Join(newLines, "\n")), stat.Mode())
725
}
726
727
func receiveSeccmpFd(conn *net.UnixConn) (libseccomp.ScmpFd, error) {
728
buf := make([]byte, unix.CmsgSpace(4))
729
730
err := conn.SetDeadline(time.Now().Add(5 * time.Second))
731
if err != nil {
732
return 0, xerrors.Errorf("cannot setdeadline: %v", err)
733
}
734
735
f, err := conn.File()
736
if err != nil {
737
return 0, xerrors.Errorf("cannot open socket: %v", err)
738
}
739
defer f.Close()
740
connfd := int(f.Fd())
741
742
_, _, _, _, err = unix.Recvmsg(connfd, nil, buf, 0)
743
if err != nil {
744
return 0, xerrors.Errorf("cannot recvmsg from fd '%d': %v", connfd, err)
745
}
746
747
msgs, err := unix.ParseSocketControlMessage(buf)
748
if err != nil {
749
return 0, xerrors.Errorf("cannot parse socket control message: %v", err)
750
}
751
if len(msgs) != 1 {
752
return 0, xerrors.Errorf("expected a single socket control message")
753
}
754
755
fds, err := unix.ParseUnixRights(&msgs[0])
756
if err != nil {
757
return 0, xerrors.Errorf("cannot parse unix rights: %v", err)
758
}
759
if len(fds) == 0 {
760
return 0, xerrors.Errorf("expected a single socket FD")
761
}
762
763
return libseccomp.ScmpFd(fds[0]), nil
764
}
765
766
var ring2Opts struct {
767
SupervisorPath string
768
}
769
var ring2Cmd = &cobra.Command{
770
Use: "ring2 <ring1Socket>",
771
Short: "starts ring2",
772
Args: cobra.ExactArgs(1),
773
Run: func(_cmd *cobra.Command, args []string) {
774
log.Init(ServiceName, Version, true, false)
775
776
wsid := os.Getenv("GITPOD_WORKSPACE_ID")
777
if wsid == "" {
778
log.Error("cannot find GITPOD_WORKSPACE_ID")
779
return
780
}
781
log := log.WithField("ring", 2).WithField("workspaceId", wsid)
782
783
common_grpc.SetupLogging()
784
785
exitCode := 1
786
defer handleExit(&exitCode)
787
788
defer log.Info("ring2 stopped")
789
790
// we talk to ring1 using a Unix socket, so that we can send the seccomp fd across.
791
rconn, err := net.Dial("unix", args[0])
792
if err != nil {
793
log.WithError(err).Error("cannot connect to parent")
794
return
795
}
796
conn := rconn.(*net.UnixConn)
797
defer conn.Close()
798
799
log.Info("connected to parent socket")
800
801
// Before we do anything, we wait for the parent to make /proc available to us.
802
var msg ringSyncMsg
803
_, err = msgutil.UnmarshalFromReader(conn, &msg)
804
if err != nil {
805
log.WithError(err).Error("cannot read parent message")
806
return
807
}
808
if msg.Stage != 1 {
809
log.WithError(err).WithField("msg", fmt.Sprintf("%+q", msg)).Error("expected stage 1 sync message")
810
return
811
}
812
813
err = pivotRoot(msg.Rootfs, msg.FSShift)
814
if err != nil {
815
log.WithError(err).Error("cannot pivot root")
816
return
817
}
818
819
type fakeRlimit struct {
820
Cur uint64 `json:"softLimit"`
821
Max uint64 `json:"hardLimit"`
822
}
823
824
var rLimitCore fakeRlimit
825
826
rLimitValue := os.Getenv("GITPOD_RLIMIT_CORE")
827
if len(rLimitValue) != 0 {
828
err = json.Unmarshal([]byte(rLimitValue), &rLimitCore)
829
if err != nil {
830
log.WithError(err).WithField("data", rLimitValue).Error("cannot deserialize GITPOD_RLIMIT_CORE")
831
}
832
}
833
834
// we either set a limit or explicitly disable core dumps by setting 0 as values
835
err = unix.Setrlimit(unix.RLIMIT_CORE, &unix.Rlimit{
836
Cur: rLimitCore.Cur,
837
Max: rLimitCore.Max,
838
})
839
if err != nil {
840
log.WithError(err).WithField("rlimit", rLimitCore).Error("cannot configure core dumps")
841
}
842
843
// Now that we're in our new root filesystem, including proc and all, we can load
844
// our seccomp filter, and tell our parent about it.
845
scmpFd, err := seccomp.LoadFilter()
846
if err != nil {
847
log.WithError(err).Error("cannot load seccomp filter - syscall handling would be broken")
848
return
849
}
850
connf, err := conn.File()
851
if err != nil {
852
log.WithError(err).Error("cannot get parent socket fd")
853
return
854
}
855
defer connf.Close()
856
857
sktfd := int(connf.Fd())
858
err = unix.Sendmsg(sktfd, nil, unix.UnixRights(int(scmpFd)), nil, 0)
859
if err != nil {
860
log.WithError(err).Error("cannot send seccomp fd")
861
return
862
}
863
864
err = unix.Exec(ring2Opts.SupervisorPath, []string{"supervisor", "init"}, os.Environ())
865
if err != nil {
866
if eerr, ok := err.(*exec.ExitError); ok {
867
exitCode = eerr.ExitCode()
868
}
869
log.WithError(err).WithField("cmd", ring2Opts.SupervisorPath).Error("cannot exec")
870
return
871
}
872
exitCode = 0 // once we get here everythings good
873
},
874
}
875
876
// pivotRoot will call pivot_root such that rootfs becomes the new root
877
// filesystem, and everything else is cleaned up.
878
//
879
// copied from runc: https://github.com/opencontainers/runc/blob/cf6c074115d00c932ef01dedb3e13ba8b8f964c3/libcontainer/rootfs_linux.go#L760
880
func pivotRoot(rootfs string, fsshift api.FSShiftMethod) error {
881
// While the documentation may claim otherwise, pivot_root(".", ".") is
882
// actually valid. What this results in is / being the new root but
883
// /proc/self/cwd being the old root. Since we can play around with the cwd
884
// with pivot_root this allows us to pivot without creating directories in
885
// the rootfs. Shout-outs to the LXC developers for giving us this idea.
886
887
oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0)
888
if err != nil {
889
return err
890
}
891
defer unix.Close(oldroot)
892
893
newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0)
894
if err != nil {
895
return err
896
}
897
defer unix.Close(newroot)
898
899
// Change to the new root so that the pivot_root actually acts on it.
900
if err := unix.Fchdir(newroot); err != nil {
901
return err
902
}
903
904
if err := unix.PivotRoot(".", "."); err != nil {
905
return xerrors.Errorf("pivot_root %s", err)
906
}
907
908
// Currently our "." is oldroot (according to the current kernel code).
909
// However, purely for safety, we will fchdir(oldroot) since there isn't
910
// really any guarantee from the kernel what /proc/self/cwd will be after a
911
// pivot_root(2).
912
913
if err := unix.Fchdir(oldroot); err != nil {
914
return err
915
}
916
917
// Make oldroot rslave to make sure our unmounts don't propagate to the
918
// host (and thus bork the machine). We don't use rprivate because this is
919
// known to cause issues due to races where we still have a reference to a
920
// mount while a process in the host namespace are trying to operate on
921
// something they think has no mounts (devicemapper in particular).
922
if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
923
return err
924
}
925
// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
926
if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
927
return err
928
}
929
930
// Switch back to our shiny new root.
931
if err := unix.Chdir("/"); err != nil {
932
return xerrors.Errorf("chdir / %s", err)
933
}
934
935
return nil
936
}
937
938
func handleExit(ec *int) {
939
exitCode := *ec
940
if exitCode != 0 {
941
sleepForDebugging()
942
}
943
os.Exit(exitCode)
944
}
945
946
func sleepForDebugging() {
947
if os.Getenv("GITPOD_WORKSPACEKIT_SLEEP_FOR_DEBUGGING") != "true" {
948
return
949
}
950
951
log.Info("sleeping five minutes to allow debugging")
952
sigChan := make(chan os.Signal, 1)
953
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
954
select {
955
case <-sigChan:
956
case <-time.After(5 * time.Minute):
957
}
958
}
959
960
type ringSyncMsg struct {
961
Stage int `json:"stage"`
962
Rootfs string `json:"rootfs"`
963
FSShift api.FSShiftMethod `json:"fsshift"`
964
}
965
966
type inWorkspaceServiceClient struct {
967
daemonapi.InWorkspaceServiceClient
968
969
conn *grpc.ClientConn
970
}
971
972
func (iwsc *inWorkspaceServiceClient) Close() error {
973
if iwsc.conn == nil {
974
return nil
975
}
976
977
return iwsc.conn.Close()
978
}
979
980
// ConnectToInWorkspaceDaemonService attempts to connect to the InWorkspaceService offered by the ws-daemon.
981
func connectToInWorkspaceDaemonService(ctx context.Context) (*inWorkspaceServiceClient, error) {
982
const socketFN = "/.workspace/daemon.sock"
983
984
t := time.NewTicker(500 * time.Millisecond)
985
errs := errors.New("errors of connect to ws-daemon")
986
defer t.Stop()
987
for {
988
if _, err := os.Stat(socketFN); err == nil {
989
break
990
} else if !errors.Is(err, fs.ErrNotExist) {
991
errs = fmt.Errorf("%v: %w", errs, err)
992
}
993
994
select {
995
case <-t.C:
996
continue
997
case <-ctx.Done():
998
return nil, fmt.Errorf("socket did not appear before context was canceled: %v", errs)
999
}
1000
}
1001
1002
conn, err := grpc.DialContext(ctx, "unix://"+socketFN, grpc.WithTransportCredentials(insecure.NewCredentials()))
1003
if err != nil {
1004
return nil, err
1005
}
1006
1007
return &inWorkspaceServiceClient{
1008
InWorkspaceServiceClient: daemonapi.NewInWorkspaceServiceClient(conn),
1009
conn: conn,
1010
}, nil
1011
}
1012
1013
type workspaceInfoService struct {
1014
socket net.Listener
1015
server *grpc.Server
1016
api.UnimplementedWorkspaceInfoServiceServer
1017
}
1018
1019
func startInfoService(socketDir string) (func(), error) {
1020
socketFN := filepath.Join(socketDir, "info.sock")
1021
if _, err := os.Stat(socketFN); err == nil {
1022
_ = os.Remove(socketFN)
1023
}
1024
1025
sckt, err := net.Listen("unix", socketFN)
1026
if err != nil {
1027
return nil, xerrors.Errorf("cannot create info socket: %w", err)
1028
}
1029
1030
err = os.Chmod(socketFN, 0777)
1031
if err != nil {
1032
return nil, xerrors.Errorf("cannot chmod info socket: %w", err)
1033
}
1034
1035
infoSvc := workspaceInfoService{
1036
socket: sckt,
1037
}
1038
1039
limiter := common_grpc.NewRatelimitingInterceptor(
1040
map[string]common_grpc.RateLimit{
1041
"iws.WorkspaceInfoService/WorkspaceInfo": {
1042
RefillInterval: 1500,
1043
BucketSize: 4,
1044
},
1045
})
1046
1047
infoSvc.server = grpc.NewServer(grpc.ChainUnaryInterceptor(limiter.UnaryInterceptor()))
1048
api.RegisterWorkspaceInfoServiceServer(infoSvc.server, &infoSvc)
1049
go func() {
1050
err := infoSvc.server.Serve(sckt)
1051
if err != nil {
1052
log.WithError(err).Error("workspace info server failed")
1053
}
1054
}()
1055
1056
return func() {
1057
infoSvc.server.Stop()
1058
os.Remove(socketFN)
1059
}, nil
1060
}
1061
1062
var lastWorkspaceInfo *api.WorkspaceInfoResponse
1063
1064
func (svc *workspaceInfoService) WorkspaceInfo(ctx context.Context, req *api.WorkspaceInfoRequest) (*api.WorkspaceInfoResponse, error) {
1065
client, err := connectToInWorkspaceDaemonService(ctx)
1066
if err != nil {
1067
log.WithError(err).Error("could not connect to workspace daemon")
1068
return nil, status.Error(codes.Internal, "could not resolve workspace info")
1069
}
1070
defer client.Close()
1071
1072
resp, err := client.WorkspaceInfo(ctx, &api.WorkspaceInfoRequest{})
1073
if err != nil {
1074
e, ok := status.FromError(err)
1075
if ok && e.Code() == codes.ResourceExhausted {
1076
return lastWorkspaceInfo, nil
1077
}
1078
log.WithError(err).Error("could not resolve workspace info")
1079
return nil, status.Error(codes.Internal, "could not resolve workspace info")
1080
} else {
1081
lastWorkspaceInfo = resp
1082
}
1083
return resp, nil
1084
}
1085
1086
func init() {
1087
rootCmd.AddCommand(ring0Cmd)
1088
rootCmd.AddCommand(ring1Cmd)
1089
rootCmd.AddCommand(ring2Cmd)
1090
1091
supervisorPath := os.Getenv("GITPOD_WORKSPACEKIT_SUPERVISOR_PATH")
1092
if supervisorPath == "" {
1093
wd, err := os.Executable()
1094
if err == nil {
1095
wd = filepath.Dir(wd)
1096
supervisorPath = filepath.Join(wd, "supervisor")
1097
} else {
1098
supervisorPath = "/.supervisor/supervisor"
1099
}
1100
}
1101
1102
ring1Cmd.Flags().BoolVar(&ring1Opts.MappingEstablished, "mapping-established", false, "true if the UID/GID mapping has already been established")
1103
ring2Cmd.Flags().StringVar(&ring2Opts.SupervisorPath, "supervisor-path", supervisorPath, "path to the supervisor binary (taken from $GITPOD_WORKSPACEKIT_SUPERVISOR_PATH, defaults to '$PWD/supervisor')")
1104
}
1105
1106
func isProcessAlreadyFinished(err error) bool {
1107
return strings.Contains(err.Error(), "os: process already finished")
1108
}
1109
1110