Path: blob/main/components/content-service/pkg/archive/tar.go
2499 views
// Copyright (c) 2020 Gitpod GmbH. All rights reserved.1// Licensed under the GNU Affero General Public License (AGPL).2// See License.AGPL.txt in the project root for license information.34package archive56import (7"archive/tar"8"context"9"io"10"os"11"os/exec"12"path"13"sort"14"strings"15"syscall"16"time"1718"github.com/opentracing/opentracing-go"19"golang.org/x/sys/unix"20"golang.org/x/xerrors"2122"github.com/gitpod-io/gitpod/common-go/log"23"github.com/gitpod-io/gitpod/common-go/tracing"24)2526// TarConfig configures tarbal creation/extraction27type TarConfig struct {28UIDMaps []IDMapping29GIDMaps []IDMapping30}3132// BuildTarbalOption configures the tarbal creation33type TarOption func(o *TarConfig)3435// IDMapping maps user or group IDs36type IDMapping struct {37ContainerID int38HostID int39Size int40}4142// WithUIDMapping reverses the given user ID mapping during archive creation43func WithUIDMapping(mappings []IDMapping) TarOption {44return func(o *TarConfig) {45o.UIDMaps = mappings46}47}4849// WithGIDMapping reverses the given user ID mapping during archive creation50func WithGIDMapping(mappings []IDMapping) TarOption {51return func(o *TarConfig) {52o.GIDMaps = mappings53}54}5556// ExtractTarbal extracts an OCI compatible tar file src to the folder dst, expecting the overlay whiteout format57func ExtractTarbal(ctx context.Context, src io.Reader, dst string, opts ...TarOption) (err error) {58type Info struct {59UID, GID int60IsSymlink bool61Xattrs map[string]string62}6364//nolint:staticcheck,ineffassign65span, ctx := opentracing.StartSpanFromContext(ctx, "extractTarbal")66span.LogKV("dst", dst)67defer tracing.FinishSpan(span, &err)6869var cfg TarConfig70start := time.Now()71for _, opt := range opts {72opt(&cfg)73}7475pipeReader, pipeWriter := io.Pipe()76teeReader := io.TeeReader(src, pipeWriter)7778tarReader := tar.NewReader(pipeReader)7980finished := make(chan bool)81m := make(map[string]Info)8283unpackSpan := opentracing.StartSpan("unpackTarbal", opentracing.ChildOf(span.Context()))84go func() {85defer close(finished)86for {87hdr, err := tarReader.Next()88if err == io.EOF {89finished <- true90return91}9293if err != nil {94log.WithError(err).Error("error reading tar")95return96}9798m[hdr.Name] = Info{99UID: hdr.Uid,100GID: hdr.Gid,101IsSymlink: (hdr.Linkname != ""),102//nolint:staticcheck103Xattrs: hdr.Xattrs,104}105}106}()107108// Be explicit about the tar flags. We want to restore the exact content without changes109tarcmd := exec.Command(110"tar",111"--extract",112"--preserve-permissions",113)114tarcmd.Dir = dst115tarcmd.Stdin = teeReader116117var msg []byte118msg, err = tarcmd.CombinedOutput()119if err != nil {120return xerrors.Errorf("tar %s: %s", dst, err.Error()+";"+string(msg))121}122123log.WithField("log", string(msg)).Debug("decompressing tar stream log")124125<-finished126tracing.FinishSpan(unpackSpan, &err)127128chownSpan := opentracing.StartSpan("chown", opentracing.ChildOf(span.Context()))129// lets create a sorted list of pathes and chown depth first.130paths := make([]string, 0, len(m))131for path := range m {132paths = append(paths, path)133}134sort.Sort(sort.Reverse(sort.StringSlice(paths)))135136// We need to remap the UID and GID between the host and the container to avoid permission issues.137for _, p := range paths {138v := m[p]139140if v.IsSymlink {141continue142}143144uid := toHostID(v.UID, cfg.UIDMaps)145gid := toHostID(v.GID, cfg.GIDMaps)146147err = remapFile(path.Join(dst, p), uid, gid, v.Xattrs)148if err != nil {149log.WithError(err).WithField("uid", uid).WithField("gid", gid).WithField("path", p).Debug("cannot chown")150}151}152tracing.FinishSpan(chownSpan, &err)153154log.WithField("duration", time.Since(start).Milliseconds()).Debug("untar complete")155return nil156}157158func toHostID(containerID int, idMap []IDMapping) int {159for _, m := range idMap {160if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {161hostID := m.HostID + (containerID - m.ContainerID)162return hostID163}164}165return containerID166}167168// remapFile changes the UID and GID of a file preserving existing file mode bits.169func remapFile(name string, uid, gid int, xattrs map[string]string) error {170// current info of the file before any change171fileInfo, err := os.Stat(name)172if err != nil {173return err174}175176// nothing to do for symlinks177if fileInfo.Mode()&os.ModeSymlink == os.ModeSymlink {178return nil179}180181// changing UID or GID can break files with suid/sgid182err = os.Lchown(name, uid, gid)183if err != nil {184return err185}186187// restore original permissions188err = os.Chmod(name, fileInfo.Mode())189if err != nil {190return err191}192193for key, value := range xattrs {194// do not set trusted attributes195if strings.HasPrefix(key, "trusted.") {196continue197}198199if strings.HasPrefix(key, "user.") {200// This is a marker to match inodes, such as when an upper layer copies a lower layer file in overlayfs.201// However, when restoring a content, the container in the workspace is not always running, so there is no problem ignoring the failure.202if strings.HasSuffix(key, ".overlay.impure") || strings.HasSuffix(key, ".overlay.origin") {203continue204}205}206207if err := unix.Lsetxattr(name, key, []byte(value), 0); err != nil {208if err == syscall.ENOTSUP || err == syscall.EPERM {209continue210}211212log.WithField("name", key).WithField("value", value).WithField("file", name).WithError(err).Warn("restoring extended attributes")213}214}215216// restore file times217fileTime := fileInfo.Sys().(*syscall.Stat_t)218return os.Chtimes(name, timespecToTime(fileTime.Atim), timespecToTime(fileTime.Mtim))219}220221func timespecToTime(ts syscall.Timespec) time.Time {222return time.Unix(int64(ts.Sec), int64(ts.Nsec))223}224225226