Path: blob/main/components/ws-daemon/pkg/content/initializer.go
2500 views
// Copyright (c) 2020 Gitpod GmbH. All rights reserved.1// Licensed under the GNU Affero General Public License (AGPL).2// See License.AGPL.txt in the project root for license information.34package content56import (7"bytes"8"context"9"encoding/json"10"errors"11"io"12"os"13"os/exec"14"path/filepath"15"strconv"16"strings"17"syscall"18"time"1920"github.com/google/uuid"21"github.com/opencontainers/runtime-spec/specs-go"22"github.com/opentracing/opentracing-go"23"github.com/sirupsen/logrus"24"golang.org/x/xerrors"25"google.golang.org/protobuf/proto"2627"github.com/gitpod-io/gitpod/common-go/log"28"github.com/gitpod-io/gitpod/common-go/tracing"29csapi "github.com/gitpod-io/gitpod/content-service/api"30"github.com/gitpod-io/gitpod/content-service/pkg/archive"31wsinit "github.com/gitpod-io/gitpod/content-service/pkg/initializer"32"github.com/gitpod-io/gitpod/content-service/pkg/storage"33"github.com/gitpod-io/gitpod/ws-daemon/pkg/libcontainer/specconv"34)3536// RunInitializerOpts configure RunInitializer37type RunInitializerOpts struct {38// Command is the path to the initializer executable we'll run39Command string40// Args is a set of additional arguments to pass to the initializer executable41Args []string42// Options to use on untar43IdMappings []archive.IDMapping4445UID uint3246GID uint324748OWI OWI49}5051type OWI struct {52Owner string53WorkspaceID string54InstanceID string55}5657func (o OWI) Fields() map[string]interface{} {58return log.OWI(o.Owner, o.WorkspaceID, o.InstanceID)59}6061// errors to be tested with errors.Is62var (63// cannot find snapshot64errCannotFindSnapshot = errors.New("cannot find snapshot")65)6667func CollectRemoteContent(ctx context.Context, rs storage.DirectAccess, ps storage.PresignedAccess, workspaceOwner string, initializer *csapi.WorkspaceInitializer) (rc map[string]storage.DownloadInfo, err error) {68rc = make(map[string]storage.DownloadInfo)6970backup, err := ps.SignDownload(ctx, rs.Bucket(workspaceOwner), rs.BackupObject(storage.DefaultBackup), &storage.SignedURLOptions{})71if err == storage.ErrNotFound {72// no backup found - that's fine73} else if err != nil {74return nil, err75} else {76rc[storage.DefaultBackup] = *backup77}7879si := initializer.GetSnapshot()80pi := initializer.GetPrebuild()81if ci := initializer.GetComposite(); ci != nil {82for _, c := range ci.Initializer {83if c.GetSnapshot() != nil {84si = c.GetSnapshot()85}86if c.GetPrebuild() != nil {87pi = c.GetPrebuild()88}89}90}91if si != nil {92bkt, obj, err := storage.ParseSnapshotName(si.Snapshot)93if err != nil {94return nil, err95}96info, err := ps.SignDownload(ctx, bkt, obj, &storage.SignedURLOptions{})97if err == storage.ErrNotFound {98return nil, errCannotFindSnapshot99}100if err != nil {101return nil, xerrors.Errorf("cannot find snapshot: %w", err)102}103104rc[si.Snapshot] = *info105}106if pi != nil && pi.Prebuild != nil && pi.Prebuild.Snapshot != "" {107bkt, obj, err := storage.ParseSnapshotName(pi.Prebuild.Snapshot)108if err != nil {109return nil, err110}111info, err := ps.SignDownload(ctx, bkt, obj, &storage.SignedURLOptions{})112if err == storage.ErrNotFound {113// no prebuild found - that's fine114} else if err != nil {115return nil, xerrors.Errorf("cannot find prebuild: %w", err)116} else {117rc[pi.Prebuild.Snapshot] = *info118}119}120121return rc, nil122}123124// RunInitializer runs a content initializer in a user, PID and mount namespace to isolate it from ws-daemon125func RunInitializer(ctx context.Context, destination string, initializer *csapi.WorkspaceInitializer, remoteContent map[string]storage.DownloadInfo, opts RunInitializerOpts) (*csapi.InitializerMetrics, error) {126//nolint:ineffassign,staticcheck127span, ctx := opentracing.StartSpanFromContext(ctx, "RunInitializer")128var err error129defer tracing.FinishSpan(span, &err)130131// it's possible the destination folder doesn't exist yet, because the kubelet hasn't created it yet.132// If we fail to create the folder, it either already exists, or we'll fail when we try and mount it.133err = os.MkdirAll(destination, 0755)134if err != nil && !os.IsExist(err) {135return nil, xerrors.Errorf("cannot mkdir destination: %w", err)136}137138init, err := proto.Marshal(initializer)139if err != nil {140return nil, err141}142143if opts.GID == 0 {144opts.GID = wsinit.GitpodGID145}146if opts.UID == 0 {147opts.UID = wsinit.GitpodUID148}149150tmpdir, err := os.MkdirTemp("", "content-init")151if err != nil {152return nil, err153}154defer os.RemoveAll(tmpdir)155156err = os.MkdirAll(filepath.Join(tmpdir, "rootfs"), 0755)157if err != nil {158return nil, err159}160161msg := msgInitContent{162Destination: "/dst",163Initializer: init,164RemoteContent: remoteContent,165TraceInfo: tracing.GetTraceID(span),166IDMappings: opts.IdMappings,167GID: int(opts.GID),168UID: int(opts.UID),169OWI: opts.OWI.Fields(),170}171fc, err := json.MarshalIndent(msg, "", " ")172if err != nil {173return nil, err174}175err = os.WriteFile(filepath.Join(tmpdir, "rootfs", "content.json"), fc, 0644)176if err != nil {177return nil, err178}179180spec := specconv.Example()181182// we assemble the root filesystem from the ws-daemon container183for _, d := range []string{"app", "bin", "dev", "etc", "lib", "opt", "sbin", "sys", "usr", "var", "lib32", "lib64", "tmp"} {184spec.Mounts = append(spec.Mounts, specs.Mount{185Destination: "/" + d,186Source: "/" + d,187Type: "bind",188Options: []string{"rbind", "rprivate"},189})190}191spec.Mounts = append(spec.Mounts, specs.Mount{192Destination: "/dst",193Source: destination,194Type: "bind",195Options: []string{"bind", "rprivate"},196})197198spec.Hostname = "content-init"199spec.Process.Terminal = false200spec.Process.NoNewPrivileges = true201spec.Process.User.UID = opts.UID202spec.Process.User.GID = opts.GID203spec.Process.Args = []string{"/app/content-initializer"}204for _, e := range os.Environ() {205if strings.HasPrefix(e, "JAEGER_") || strings.HasPrefix(e, "GIT_SSL_CAPATH=") || strings.HasPrefix(e, "GIT_SSL_CAINFO=") {206spec.Process.Env = append(spec.Process.Env, e)207}208}209210// TODO(cw): make the initializer work without chown211spec.Process.Capabilities.Ambient = append(spec.Process.Capabilities.Ambient, "CAP_CHOWN", "CAP_FOWNER", "CAP_MKNOD", "CAP_SETFCAP")212spec.Process.Capabilities.Bounding = append(spec.Process.Capabilities.Bounding, "CAP_CHOWN", "CAP_FOWNER", "CAP_MKNOD", "CAP_SETFCAP")213spec.Process.Capabilities.Effective = append(spec.Process.Capabilities.Effective, "CAP_CHOWN", "CAP_FOWNER", "CAP_MKNOD", "CAP_SETFCAP")214spec.Process.Capabilities.Inheritable = append(spec.Process.Capabilities.Inheritable, "CAP_CHOWN", "CAP_FOWNER", "CAP_MKNOD", "CAP_SETFCAP")215spec.Process.Capabilities.Permitted = append(spec.Process.Capabilities.Permitted, "CAP_CHOWN", "CAP_FOWNER", "CAP_MKNOD", "CAP_SETFCAP")216// TODO(cw): setup proper networking in a netns, rather than relying on ws-daemons network217n := 0218for _, x := range spec.Linux.Namespaces {219if x.Type == specs.NetworkNamespace {220continue221}222223spec.Linux.Namespaces[n] = x224n++225}226spec.Linux.Namespaces = spec.Linux.Namespaces[:n]227228fc, err = json.MarshalIndent(spec, "", " ")229if err != nil {230return nil, err231}232err = os.WriteFile(filepath.Join(tmpdir, "config.json"), fc, 0644)233if err != nil {234return nil, err235}236237args := []string{"--root", "state"}238239if log.Log.Logger.IsLevelEnabled(logrus.DebugLevel) {240args = append(args, "--debug")241}242243var name string244if opts.OWI.InstanceID == "" {245id, err := uuid.NewRandom()246if err != nil {247return nil, err248}249name = "init-rnd-" + id.String()250} else {251name = "init-ws-" + opts.OWI.InstanceID252}253254// pass a pipe "file" to the content init process as fd 3 to capture the error output255errIn, errOut, err := os.Pipe()256if err != nil {257return nil, err258}259260// pass a pipe "file" to the content init process as fd 4 to capture the metrics output261statsIn, statsOut, err := os.Pipe()262if err != nil {263return nil, err264}265266args = append(args, "--log-format", "json", "run")267extraFiles := []*os.File{errOut, statsOut}268args = append(args, "--preserve-fds", strconv.Itoa(len(extraFiles)))269args = append(args, name)270271var cmdOut bytes.Buffer272cmd := exec.Command("runc", args...)273cmd.Dir = tmpdir274cmd.Stdout = &cmdOut275cmd.Stderr = os.Stderr276cmd.Stdin = os.Stdin277cmd.ExtraFiles = extraFiles278err = cmd.Run()279log.FromBuffer(&cmdOut, log.WithFields(opts.OWI.Fields()))280281// read contents of the extra files282errOut.Close()283statsOut.Close()284errmsg, statsBytes := waitForAndReadExtraFiles(errIn, statsIn)285if err != nil {286if exiterr, ok := err.(*exec.ExitError); ok {287// The program has exited with an exit code != 0. If it's FAIL_CONTENT_INITIALIZER_EXIT_CODE, it was deliberate.288if status, ok := exiterr.Sys().(syscall.WaitStatus); ok && status.ExitStatus() == FAIL_CONTENT_INITIALIZER_EXIT_CODE {289log.WithError(err).WithFields(opts.OWI.Fields()).WithField("errmsgsize", len(errmsg)).WithField("exitCode", status.ExitStatus()).WithField("args", args).Error("content init failed")290return nil, xerrors.Errorf(string(errmsg))291}292}293294return nil, err295}296297stats := parseStats(statsBytes)298return stats, nil299}300301// waitForAndReadExtraFiles tries to read the content of the extra files passed to the content initializer, and waits up to 1s to do so302func waitForAndReadExtraFiles(errIn *os.File, statsIn *os.File) (errmsg []byte, statsBytes []byte) {303// read err304errch := make(chan []byte, 1)305go func() {306errmsg, _ := io.ReadAll(errIn)307errch <- errmsg308}()309310// read stats311statsCh := make(chan []byte, 1)312go func() {313statsBytes, readErr := io.ReadAll(statsIn)314if readErr != nil {315log.WithError(readErr).Warn("cannot read stats")316}317log.WithField("statsBytes", log.TrustedValueWrap{Value: string(statsBytes)}).Debug("read stats")318statsCh <- statsBytes319}()320321readFiles := 0322for {323select {324case errmsg = <-errch:325readFiles += 1326case statsBytes = <-statsCh:327readFiles += 1328case <-time.After(1 * time.Second):329if errmsg == nil {330errmsg = []byte("failed to read content initializer response")331}332return333}334if readFiles == 2 {335return336}337}338}339340func parseStats(statsBytes []byte) *csapi.InitializerMetrics {341var stats csapi.InitializerMetrics342err := json.Unmarshal(statsBytes, &stats)343if err != nil {344log.WithError(err).WithField("bytes", log.TrustedValueWrap{Value: statsBytes}).Warn("cannot unmarshal stats")345return nil346}347return &stats348}349350// RUN_INITIALIZER_CHILD_ERROUT_FD is the fileDescriptor of the "errout" file descriptor passed to the content initializer351const RUN_INITIALIZER_CHILD_ERROUT_FD = 3352353// RUN_INITIALIZER_CHILD_STATS_FD is the fileDescriptor of the "stats" file descriptor passed to the content initializer354const RUN_INITIALIZER_CHILD_STATS_FD = 4355356// RunInitializerChild is the function that's expected to run when we call `/proc/self/exe content-initializer`357func RunInitializerChild(statsFd *os.File) (err error) {358fc, err := os.ReadFile("/content.json")359if err != nil {360return err361}362363var initmsg msgInitContent364err = json.Unmarshal(fc, &initmsg)365if err != nil {366return err367}368log.Log = logrus.WithFields(initmsg.OWI)369370defer func() {371if err != nil {372log.WithError(err).WithFields(initmsg.OWI).Error("content init failed")373}374}()375376span := opentracing.StartSpan("RunInitializerChild", opentracing.ChildOf(tracing.FromTraceID(initmsg.TraceInfo)))377defer tracing.FinishSpan(span, &err)378ctx := opentracing.ContextWithSpan(context.Background(), span)379380var req csapi.WorkspaceInitializer381err = proto.Unmarshal(initmsg.Initializer, &req)382if err != nil {383return err384}385386rs := &remoteContentStorage{RemoteContent: initmsg.RemoteContent}387388dst := initmsg.Destination389initializer, err := wsinit.NewFromRequest(ctx, dst, rs, &req, wsinit.NewFromRequestOpts{ForceGitpodUserForGit: false})390if err != nil {391return err392}393394initSource, stats, err := wsinit.InitializeWorkspace(ctx, dst, rs,395wsinit.WithInitializer(initializer),396wsinit.WithMappings(initmsg.IDMappings),397wsinit.WithChown(initmsg.UID, initmsg.GID),398wsinit.WithCleanSlate,399)400if err != nil {401return err402}403404// some workspace content may have a `/dst/.gitpod` file or directory. That would break405// the workspace ready file placement (see https://github.com/gitpod-io/gitpod/issues/7694).406err = wsinit.EnsureCleanDotGitpodDirectory(ctx, dst)407if err != nil {408return err409}410411// Place the ready file to make Theia "open its gates"412err = wsinit.PlaceWorkspaceReadyFile(ctx, dst, initSource, stats, initmsg.UID, initmsg.GID)413if err != nil {414return err415}416417// Serialize metrics, so we can pass them back to the caller418if statsFd != nil {419defer statsFd.Close()420writeInitializerStats(statsFd, &stats)421} else {422log.Warn("no stats file descriptor provided")423}424425return nil426}427428func writeInitializerStats(statsFd *os.File, stats *csapi.InitializerMetrics) {429serializedStats, err := json.Marshal(stats)430if err != nil {431log.WithError(err).Warn("cannot serialize initializer stats")432return433}434435log.WithField("stats", log.TrustedValueWrap{Value: string(serializedStats)}).Debug("writing initializer stats to fd")436_, writeErr := statsFd.Write(serializedStats)437if writeErr != nil {438log.WithError(writeErr).Warn("error writing initializer stats to fd")439return440}441}442443var _ storage.DirectAccess = &remoteContentStorage{}444445type remoteContentStorage struct {446RemoteContent map[string]storage.DownloadInfo447}448449// Init does nothing450func (rs *remoteContentStorage) Init(ctx context.Context, owner, workspace, instance string) error {451return nil452}453454// EnsureExists does nothing455func (rs *remoteContentStorage) EnsureExists(ctx context.Context) error {456return nil457}458459// Download always returns false and does nothing460func (rs *remoteContentStorage) Download(ctx context.Context, destination string, name string, mappings []archive.IDMapping) (exists bool, err error) {461span, ctx := opentracing.StartSpanFromContext(ctx, "remoteContentStorage.Download")462span.SetTag("destination", destination)463span.SetTag("name", name)464defer tracing.FinishSpan(span, &err)465466info, exists := rs.RemoteContent[name]467if !exists {468return false, nil469}470471span.SetTag("URL", info.URL)472473// create a temporal file to download the content474tempFile, err := os.CreateTemp("", "remote-content-*")475if err != nil {476return true, xerrors.Errorf("cannot create temporal file: %w", err)477}478tempFile.Close()479480args := []string{481"-s10", "-x16", "-j12",482"--retry-wait=5",483"--log-level=error",484"--allow-overwrite=true", // rewrite temporal empty file485info.URL,486"-o", tempFile.Name(),487}488489downloadStart := time.Now()490cmd := exec.Command("aria2c", args...)491out, err := cmd.CombinedOutput()492if err != nil {493log.WithError(err).WithField("out", string(out)).Error("unexpected error downloading file")494return true, xerrors.Errorf("unexpected error downloading file")495}496downloadDuration := time.Since(downloadStart)497log.WithField("downloadDuration", downloadDuration.String()).Info("aria2c download duration")498499tempFile, err = os.Open(tempFile.Name())500if err != nil {501return true, xerrors.Errorf("unexpected error downloading file")502}503504defer os.Remove(tempFile.Name())505defer tempFile.Close()506507extractStart := time.Now()508err = archive.ExtractTarbal(ctx, tempFile, destination, archive.WithUIDMapping(mappings), archive.WithGIDMapping(mappings))509if err != nil {510return true, xerrors.Errorf("tar %s: %s", destination, err.Error())511}512extractDuration := time.Since(extractStart)513log.WithField("extractDuration", extractDuration.String()).Info("extract tarbal duration")514515return true, nil516}517518// DownloadSnapshot always returns false and does nothing519func (rs *remoteContentStorage) DownloadSnapshot(ctx context.Context, destination string, name string, mappings []archive.IDMapping) (bool, error) {520return rs.Download(ctx, destination, name, mappings)521}522523// ListObjects returns all objects found with the given prefix. Returns an empty list if the bucket does not exuist (yet).524func (rs *remoteContentStorage) ListObjects(ctx context.Context, prefix string) (objects []string, err error) {525return []string{}, nil526}527528// Qualify just returns the name529func (rs *remoteContentStorage) Qualify(name string) string {530return name531}532533// Upload does nothing534func (rs *remoteContentStorage) Upload(ctx context.Context, source string, name string, opts ...storage.UploadOption) (string, string, error) {535return "", "", xerrors.Errorf("not implemented")536}537538// UploadInstance takes all files from a local location and uploads it to the remote storage539func (rs *remoteContentStorage) UploadInstance(ctx context.Context, source string, name string, options ...storage.UploadOption) (bucket, obj string, err error) {540return "", "", xerrors.Errorf("not implemented")541}542543// Bucket returns an empty string544func (rs *remoteContentStorage) Bucket(string) string {545return ""546}547548// BackupObject returns a backup's object name that a direct downloader would download549func (rs *remoteContentStorage) BackupObject(name string) string {550return ""551}552553// InstanceObject returns a instance's object name that a direct downloader would download554func (rs *remoteContentStorage) InstanceObject(workspaceID string, instanceID string, name string) string {555return ""556}557558// SnapshotObject returns a snapshot's object name that a direct downloer would download559func (rs *remoteContentStorage) SnapshotObject(name string) string {560return ""561}562563type msgInitContent struct {564Destination string565RemoteContent map[string]storage.DownloadInfo566Initializer []byte567UID, GID int568IDMappings []archive.IDMapping569570TraceInfo string571OWI map[string]interface{}572}573574575