Path: blob/main/components/content-service/pkg/initializer/initializer.go
2499 views
// Copyright (c) 2020 Gitpod GmbH. All rights reserved.1// Licensed under the GNU Affero General Public License (AGPL).2// See License.AGPL.txt in the project root for license information.34package initializer56import (7"context"8"encoding/json"9"errors"10"fmt"11"io"12"io/fs"13"net/http"14"os"15"path/filepath"16"strings"17"syscall"18"time"1920"github.com/opencontainers/go-digest"21"github.com/opentracing/opentracing-go"22"golang.org/x/xerrors"23"google.golang.org/grpc/codes"24"google.golang.org/grpc/status"2526"github.com/gitpod-io/gitpod/common-go/log"27"github.com/gitpod-io/gitpod/common-go/tracing"28csapi "github.com/gitpod-io/gitpod/content-service/api"29"github.com/gitpod-io/gitpod/content-service/pkg/archive"30"github.com/gitpod-io/gitpod/content-service/pkg/git"31"github.com/gitpod-io/gitpod/content-service/pkg/storage"32)3334const (35// WorkspaceReadyFile is the name of the ready file we're placing in a workspace36WorkspaceReadyFile = ".gitpod/ready"3738// GitpodUID is the user ID of the gitpod user39GitpodUID = 333334041// GitpodGID is the group ID of the gitpod user group42GitpodGID = 333334344// otsDownloadAttempts is the number of times we'll attempt to download the one-time secret45otsDownloadAttempts = 2046)4748// Initializer can initialize a workspace with content49type Initializer interface {50Run(ctx context.Context, mappings []archive.IDMapping) (csapi.WorkspaceInitSource, csapi.InitializerMetrics, error)51}5253// EmptyInitializer does nothing54type EmptyInitializer struct{}5556// Run does nothing57func (e *EmptyInitializer) Run(ctx context.Context, mappings []archive.IDMapping) (csapi.WorkspaceInitSource, csapi.InitializerMetrics, error) {58return csapi.WorkspaceInitFromOther, nil, nil59}6061// CompositeInitializer does nothing62type CompositeInitializer []Initializer6364// Run calls run on all child initializers65func (e CompositeInitializer) Run(ctx context.Context, mappings []archive.IDMapping) (_ csapi.WorkspaceInitSource, _ csapi.InitializerMetrics, err error) {66span, ctx := opentracing.StartSpanFromContext(ctx, "CompositeInitializer.Run")67defer tracing.FinishSpan(span, &err)68start := time.Now()69initialSize, fsErr := getFsUsage()70if fsErr != nil {71log.WithError(fsErr).Error("could not get disk usage")72}7374total := []csapi.InitializerMetric{}75for _, init := range e {76_, stats, err := init.Run(ctx, mappings)77if err != nil {78return csapi.WorkspaceInitFromOther, nil, err79}80total = append(total, stats...)81}8283if fsErr == nil {84currentSize, fsErr := getFsUsage()85if fsErr != nil {86log.WithError(fsErr).Error("could not get disk usage")87}8889total = append(total, csapi.InitializerMetric{90Type: "composite",91Duration: time.Since(start),92Size: currentSize - initialSize,93})94}9596return csapi.WorkspaceInitFromOther, total, nil97}9899// NewFromRequestOpts configures the initializer produced from a content init request100type NewFromRequestOpts struct {101// ForceGitpodUserForGit forces gitpod:gitpod ownership on all files produced by the Git initializer.102// For FWB workspaces the content init is run from supervisor which runs as UID 0. Using this flag, the103// Git content is forced to the Gitpod user. All other content (backup, prebuild, snapshot) will already104// have the correct user.105ForceGitpodUserForGit bool106}107108// NewFromRequest picks the initializer from the request but does not execute it.109// Returns gRPC errors.110func NewFromRequest(ctx context.Context, loc string, rs storage.DirectDownloader, req *csapi.WorkspaceInitializer, opts NewFromRequestOpts) (i Initializer, err error) {111//nolint:ineffassign,staticcheck112span, ctx := opentracing.StartSpanFromContext(ctx, "NewFromRequest")113defer tracing.FinishSpan(span, &err)114span.LogKV("opts", opts)115116spec := req.Spec117var initializer Initializer118if _, ok := spec.(*csapi.WorkspaceInitializer_Empty); ok {119initializer = &EmptyInitializer{}120} else if ir, ok := spec.(*csapi.WorkspaceInitializer_Composite); ok {121initializers := make([]Initializer, len(ir.Composite.Initializer))122for i, init := range ir.Composite.Initializer {123initializers[i], err = NewFromRequest(ctx, loc, rs, init, opts)124if err != nil {125return nil, err126}127}128initializer = CompositeInitializer(initializers)129} else if ir, ok := spec.(*csapi.WorkspaceInitializer_Git); ok {130if ir.Git == nil {131return nil, status.Error(codes.InvalidArgument, "missing Git initializer spec")132}133134initializer, err = newGitInitializer(ctx, loc, ir.Git, opts.ForceGitpodUserForGit)135} else if ir, ok := spec.(*csapi.WorkspaceInitializer_Prebuild); ok {136if ir.Prebuild == nil {137return nil, status.Error(codes.InvalidArgument, "missing prebuild initializer spec")138}139var snapshot *SnapshotInitializer140if ir.Prebuild.Prebuild != nil {141snapshot, err = newSnapshotInitializer(loc, rs, ir.Prebuild.Prebuild)142if err != nil {143return nil, status.Error(codes.Internal, fmt.Sprintf("cannot setup prebuild init: %v", err))144}145}146var gits []*GitInitializer147for _, gi := range ir.Prebuild.Git {148gitinit, err := newGitInitializer(ctx, loc, gi, opts.ForceGitpodUserForGit)149if err != nil {150return nil, err151}152gits = append(gits, gitinit)153}154initializer = &PrebuildInitializer{155Prebuild: snapshot,156Git: gits,157}158} else if ir, ok := spec.(*csapi.WorkspaceInitializer_Snapshot); ok {159initializer, err = newSnapshotInitializer(loc, rs, ir.Snapshot)160} else if ir, ok := spec.(*csapi.WorkspaceInitializer_Download); ok {161initializer, err = newFileDownloadInitializer(loc, ir.Download)162} else if ir, ok := spec.(*csapi.WorkspaceInitializer_Backup); ok {163initializer, err = newFromBackupInitializer(loc, rs, ir.Backup)164} else {165initializer = &EmptyInitializer{}166}167if err != nil {168return nil, status.Error(codes.InvalidArgument, err.Error())169}170return initializer, nil171}172173// newFileDownloadInitializer creates a download initializer for a request174func newFileDownloadInitializer(loc string, req *csapi.FileDownloadInitializer) (*fileDownloadInitializer, error) {175fileInfos := make([]fileInfo, len(req.Files))176for i, f := range req.Files {177dgst, err := digest.Parse(f.Digest)178if err != nil {179return nil, xerrors.Errorf("invalid digest %s: %w", f.Digest, err)180}181fileInfos[i] = fileInfo{182URL: f.Url,183Path: f.FilePath,184Digest: dgst,185}186}187initializer := &fileDownloadInitializer{188FilesInfos: fileInfos,189TargetLocation: filepath.Join(loc, req.TargetLocation),190HTTPClient: http.DefaultClient,191RetryTimeout: 1 * time.Second,192}193return initializer, nil194}195196// newFromBackupInitializer creates a backup restoration initializer for a request197func newFromBackupInitializer(loc string, rs storage.DirectDownloader, req *csapi.FromBackupInitializer) (*fromBackupInitializer, error) {198return &fromBackupInitializer{199Location: loc,200RemoteStorage: rs,201FromVolumeSnapshot: req.FromVolumeSnapshot,202}, nil203}204205type fromBackupInitializer struct {206Location string207RemoteStorage storage.DirectDownloader208FromVolumeSnapshot bool209}210211func (bi *fromBackupInitializer) Run(ctx context.Context, mappings []archive.IDMapping) (src csapi.WorkspaceInitSource, stats csapi.InitializerMetrics, err error) {212if bi.FromVolumeSnapshot {213return csapi.WorkspaceInitFromBackup, nil, nil214}215216start := time.Now()217initialSize, fsErr := getFsUsage()218if fsErr != nil {219log.WithError(fsErr).Error("could not get disk usage")220}221222hasBackup, err := bi.RemoteStorage.Download(ctx, bi.Location, storage.DefaultBackup, mappings)223if !hasBackup {224if err != nil {225return src, nil, xerrors.Errorf("no backup found, error: %w", err)226}227return src, nil, xerrors.Errorf("no backup found")228}229if err != nil {230return src, nil, xerrors.Errorf("cannot restore backup: %w", err)231}232233if fsErr == nil {234currentSize, fsErr := getFsUsage()235if fsErr != nil {236log.WithError(fsErr).Error("could not get disk usage")237}238239stats = csapi.InitializerMetrics{csapi.InitializerMetric{240Type: "fromBackup",241Duration: time.Since(start),242Size: currentSize - initialSize,243}}244}245246return csapi.WorkspaceInitFromBackup, stats, nil247}248249// newGitInitializer creates a Git initializer based on the request.250// Returns gRPC errors.251func newGitInitializer(ctx context.Context, loc string, req *csapi.GitInitializer, forceGitpodUser bool) (*GitInitializer, error) {252if req.Config == nil {253return nil, status.Error(codes.InvalidArgument, "Git initializer misses config")254}255256var targetMode CloneTargetMode257switch req.TargetMode {258case csapi.CloneTargetMode_LOCAL_BRANCH:259targetMode = LocalBranch260case csapi.CloneTargetMode_REMOTE_BRANCH:261targetMode = RemoteBranch262case csapi.CloneTargetMode_REMOTE_COMMIT:263targetMode = RemoteCommit264case csapi.CloneTargetMode_REMOTE_HEAD:265targetMode = RemoteHead266default:267return nil, status.Error(codes.InvalidArgument, fmt.Sprintf("invalid target mode: %v", req.TargetMode))268}269270var authMethod = git.BasicAuth271if req.Config.Authentication == csapi.GitAuthMethod_NO_AUTH {272authMethod = git.NoAuth273}274275// the auth provider must cache the OTS because it may be used several times,276// but can download the one-time-secret only once.277authProvider := git.CachingAuthProvider(func() (user string, pwd string, err error) {278switch req.Config.Authentication {279case csapi.GitAuthMethod_BASIC_AUTH:280user = req.Config.AuthUser281pwd = req.Config.AuthPassword282case csapi.GitAuthMethod_BASIC_AUTH_OTS:283user, pwd, err = downloadOTS(ctx, req.Config.AuthOts)284if err != nil {285log.WithField("location", loc).WithError(err).Error("cannot download Git auth OTS")286return "", "", status.Error(codes.InvalidArgument, "cannot get OTS")287}288case csapi.GitAuthMethod_NO_AUTH:289default:290return "", "", status.Error(codes.InvalidArgument, fmt.Sprintf("invalid Git authentication method: %v", req.Config.Authentication))291}292293return294})295296log.WithField("location", loc).Debug("using Git initializer")297return &GitInitializer{298Client: git.Client{299Location: filepath.Join(loc, req.CheckoutLocation),300RemoteURI: req.RemoteUri,301UpstreamRemoteURI: req.Upstream_RemoteUri,302Config: req.Config.CustomConfig,303AuthMethod: authMethod,304AuthProvider: authProvider,305RunAsGitpodUser: forceGitpodUser,306FullClone: req.FullClone,307},308TargetMode: targetMode,309CloneTarget: req.CloneTaget,310Chown: false,311}, nil312}313314func newSnapshotInitializer(loc string, rs storage.DirectDownloader, req *csapi.SnapshotInitializer) (*SnapshotInitializer, error) {315return &SnapshotInitializer{316Location: loc,317Snapshot: req.Snapshot,318Storage: rs,319FromVolumeSnapshot: req.FromVolumeSnapshot,320}, nil321}322323func downloadOTS(ctx context.Context, url string) (user, pwd string, err error) {324//nolint:ineffassign325span, ctx := opentracing.StartSpanFromContext(ctx, "downloadOTS")326defer tracing.FinishSpan(span, &err)327span.LogKV("url", url)328329dl := func() (user, pwd string, err error) {330req, err := http.NewRequestWithContext(ctx, "GET", url, nil)331if err != nil {332return "", "", err333}334_ = opentracing.GlobalTracer().Inject(span.Context(), opentracing.HTTPHeaders, opentracing.HTTPHeadersCarrier(req.Header))335336resp, err := http.DefaultClient.Do(req)337if err != nil {338return "", "", err339}340defer resp.Body.Close()341if resp.StatusCode != http.StatusOK {342return "", "", xerrors.Errorf("non-OK OTS response: %s", resp.Status)343}344345secret, err := io.ReadAll(resp.Body)346if err != nil {347return "", "", err348}349350pwd = string(secret)351if segs := strings.Split(pwd, ":"); len(segs) >= 2 {352user = segs[0]353pwd = strings.Join(segs[1:], ":")354}355return356}357for i := 0; i < otsDownloadAttempts; i++ {358span.LogKV("attempt", i)359if i > 0 {360time.Sleep(time.Second)361}362363user, pwd, err = dl()364if err == context.Canceled || err == context.DeadlineExceeded {365return366}367if err == nil {368break369}370log.WithError(err).WithField("attempt", i).Warn("cannot download OTS")371}372if err != nil {373log.WithError(err).Warn("failed to download OTS")374return "", "", err375}376377return user, pwd, nil378}379380// InitializeOpt configures the initialisation procedure381type InitializeOpt func(*initializeOpts)382383type initializeOpts struct {384Initializer Initializer385CleanSlate bool386UID int387GID int388mappings []archive.IDMapping389}390391// WithMappings configures the UID mappings that're used during content initialization392func WithMappings(mappings []archive.IDMapping) InitializeOpt {393return func(o *initializeOpts) {394o.mappings = mappings395}396}397398// WithInitializer configures the initializer that's used during content initialization399func WithInitializer(initializer Initializer) InitializeOpt {400return func(o *initializeOpts) {401o.Initializer = initializer402}403}404405// WithCleanSlate ensures there's no prior content in the workspace location406func WithCleanSlate(o *initializeOpts) {407o.CleanSlate = true408}409410// WithChown sets a custom UID/GID the content will have after initialisation411func WithChown(uid, gid int) InitializeOpt {412return func(o *initializeOpts) {413o.UID = uid414o.GID = gid415}416}417418// InitializeWorkspace initializes a workspace from backup or an initializer419func InitializeWorkspace(ctx context.Context, location string, remoteStorage storage.DirectDownloader, opts ...InitializeOpt) (src csapi.WorkspaceInitSource, stats csapi.InitializerMetrics, err error) {420//nolint:ineffassign421span, ctx := opentracing.StartSpanFromContext(ctx, "InitializeWorkspace")422span.SetTag("location", location)423defer tracing.FinishSpan(span, &err)424425cfg := initializeOpts{426Initializer: &EmptyInitializer{},427CleanSlate: false,428GID: GitpodGID,429UID: GitpodUID,430}431for _, o := range opts {432o(&cfg)433}434435src = csapi.WorkspaceInitFromOther436437// Note: it's important that CleanSlate does not remove the location itself, but merely its content.438// If the location were removed that might break the filesystem quota we have put in place prior.439if cfg.CleanSlate {440// 1. Clean out the workspace directory441if _, err := os.Stat(location); errors.Is(err, fs.ErrNotExist) {442// in the very unlikely event that the workspace Pod did not mount (and thus create) the workspace directory, create it443err = os.Mkdir(location, 0755)444if os.IsExist(err) {445log.WithError(err).WithField("location", location).Debug("ran into non-atomic workspace location existence check")446span.SetTag("exists", true)447} else if err != nil {448return src, nil, xerrors.Errorf("cannot create workspace: %w", err)449}450}451fs, err := os.ReadDir(location)452if err != nil {453return src, nil, xerrors.Errorf("cannot clean workspace folder: %w", err)454}455for _, f := range fs {456path := filepath.Join(location, f.Name())457err := os.RemoveAll(path)458if err != nil {459return src, nil, xerrors.Errorf("cannot clean workspace folder: %w", err)460}461}462463// Chown the workspace directory464err = os.Chown(location, cfg.UID, cfg.GID)465if err != nil {466return src, nil, xerrors.Errorf("cannot create workspace: %w", err)467}468}469470// Try to download a backup first471initialSize, fsErr := getFsUsage()472if fsErr != nil {473log.WithError(fsErr).Error("could not get disk usage")474}475downloadStart := time.Now()476hasBackup, err := remoteStorage.Download(ctx, location, storage.DefaultBackup, cfg.mappings)477if err != nil {478return src, nil, xerrors.Errorf("cannot restore backup: %w", err)479}480downloadDuration := time.Since(downloadStart)481482span.SetTag("hasBackup", hasBackup)483if hasBackup {484src = csapi.WorkspaceInitFromBackup485486currentSize, fsErr := getFsUsage()487if fsErr != nil {488log.WithError(fsErr).Error("could not get disk usage")489}490stats = []csapi.InitializerMetric{{491Type: "fromBackup",492Duration: downloadDuration,493Size: currentSize - initialSize,494}}495return496}497498// If there is not backup, run the initializer499src, stats, err = cfg.Initializer.Run(ctx, cfg.mappings)500if err != nil {501return src, nil, xerrors.Errorf("cannot initialize workspace: %w", err)502}503504return505}506507// Some workspace content may have a `/dst/.gitpod` file or directory. That would break508// the workspace ready file placement (see https://github.com/gitpod-io/gitpod/issues/7694).509// This function ensures that workspaces do not have a `.gitpod` file or directory present.510func EnsureCleanDotGitpodDirectory(ctx context.Context, wspath string) error {511var mv func(src, dst string) error512if git.IsWorkingCopy(wspath) {513c := &git.Client{514Location: wspath,515}516mv = func(src, dst string) error {517return c.Git(ctx, "mv", src, dst)518}519} else {520mv = os.Rename521}522523dotGitpod := filepath.Join(wspath, ".gitpod")524stat, err := os.Stat(dotGitpod)525if errors.Is(err, fs.ErrNotExist) {526return nil527}528if stat.IsDir() {529// we need this to be a directory, we're probably ok530return nil531}532533candidateFN := filepath.Join(wspath, ".gitpod.yaml")534if _, err := os.Stat(candidateFN); err == nil {535// Our candidate file already exists, hence we cannot just move things.536// As fallback we'll delete the .gitpod entry.537return os.RemoveAll(dotGitpod)538}539540err = mv(dotGitpod, candidateFN)541if err != nil {542return err543}544545return nil546}547548// PlaceWorkspaceReadyFile writes a file in the workspace which indicates that the workspace has been initialized549func PlaceWorkspaceReadyFile(ctx context.Context, wspath string, initsrc csapi.WorkspaceInitSource, metrics csapi.InitializerMetrics, uid, gid int) (err error) {550//nolint:ineffassign,staticcheck551span, ctx := opentracing.StartSpanFromContext(ctx, "placeWorkspaceReadyFile")552span.SetTag("source", initsrc)553defer tracing.FinishSpan(span, &err)554555content := csapi.WorkspaceReadyMessage{556Source: initsrc,557Metrics: metrics,558}559fc, err := json.Marshal(content)560if err != nil {561return xerrors.Errorf("cannot marshal workspace ready message: %w", err)562}563564gitpodDir := filepath.Join(wspath, filepath.Dir(WorkspaceReadyFile))565err = os.MkdirAll(gitpodDir, 0777)566if err != nil {567return xerrors.Errorf("cannot create directory for workspace ready file: %w", err)568}569err = os.Chown(gitpodDir, uid, gid)570if err != nil {571return xerrors.Errorf("cannot chown directory for workspace ready file: %w", err)572}573574tempWorkspaceReadyFile := WorkspaceReadyFile + ".tmp"575fn := filepath.Join(wspath, tempWorkspaceReadyFile)576err = os.WriteFile(fn, []byte(fc), 0644)577if err != nil {578return xerrors.Errorf("cannot write workspace ready file content: %w", err)579}580err = os.Chown(fn, uid, gid)581if err != nil {582return xerrors.Errorf("cannot chown workspace ready file: %w", err)583}584585// Theia will listen for a rename event as trigger to start the tasks. This is a rename event586// because we're writing to the file and this is the most convenient way we can tell Theia that we're done writing.587err = os.Rename(fn, filepath.Join(wspath, WorkspaceReadyFile))588if err != nil {589return xerrors.Errorf("cannot rename workspace ready file: %w", err)590}591592log.WithField("content", string(fc)).WithField("destination", wspath).Info("ready file metrics")593594return nil595}596597func getFsUsage() (uint64, error) {598var stat syscall.Statfs_t599600err := syscall.Statfs("/dst", &stat)601if os.IsNotExist(err) {602err = syscall.Statfs("/workspace", &stat)603}604605if err != nil {606return 0, err607}608609size := uint64(stat.Blocks) * uint64(stat.Bsize)610free := uint64(stat.Bfree) * uint64(stat.Bsize)611612return size - free, nil613}614615616