Path: blob/main/components/content-service/pkg/storage/storage.go
2501 views
// Copyright (c) 2020 Gitpod GmbH. All rights reserved.1// Licensed under the GNU Affero General Public License (AGPL).2// See License.AGPL.txt in the project root for license information.34//go:generate ./generate-mock.sh56package storage78import (9"context"10"fmt"11"io"12"regexp"1314"golang.org/x/xerrors"1516"github.com/aws/aws-sdk-go-v2/aws"17awsconfig "github.com/aws/aws-sdk-go-v2/config"18"github.com/aws/aws-sdk-go-v2/service/s3"19"github.com/gitpod-io/gitpod/common-go/log"20config "github.com/gitpod-io/gitpod/content-service/api/config"21"github.com/gitpod-io/gitpod/content-service/pkg/archive"22)2324const (25// DefaultBackup is the name of the regular backup we upload to remote storage26DefaultBackup = "full.tar"2728// DefaultBackupManifest is the name of the manifest of the regular default backup we upload29DefaultBackupManifest = "wsfull.json"30)3132var (33// ErrNotFound is returned when an object is not found34ErrNotFound = fmt.Errorf("not found")35)3637// BucketNamer provides names for storage buckets38type BucketNamer interface {39// Bucket provides the bucket name for a particular user40Bucket(userID string) string41}4243// BackupObjectNamer provides names for workspace backup objects44type BackupObjectNamer interface {45// BackupObject returns a backup's object name that a direct downloader would download46BackupObject(name string) string47}4849// InstanceObjectNamer provides names for objects per workspace instance50type InstanceObjectNamer interface {51// InstanceObject returns a instance's object name that a direct downloader would download52InstanceObject(name string) string53}5455// BlobObjectNamer provides names for blob objects56type BlobObjectNamer interface {57// BlobObject returns a blob's object name58BlobObject(userID, name string) (string, error)59}6061// PresignedAccess provides presigned URLs to access remote storage objects62type PresignedAccess interface {63BucketNamer64BlobObjectNamer6566// EnsureExists makes sure that the remote storage location exists and can be up- or downloaded from67EnsureExists(ctx context.Context, bucket string) error6869// DiskUsage gives the total objects size of objects that have the given prefix70DiskUsage(ctx context.Context, bucket string, prefix string) (size int64, err error)7172// SignDownload describes an object for download - if the object is not found, ErrNotFound is returned73SignDownload(ctx context.Context, bucket, obj string, options *SignedURLOptions) (info *DownloadInfo, err error)7475// SignUpload describes an object for upload76SignUpload(ctx context.Context, bucket, obj string, options *SignedURLOptions) (info *UploadInfo, err error)7778// DeleteObject deletes objects in the given bucket specified by the given query79DeleteObject(ctx context.Context, bucket string, query *DeleteObjectQuery) error8081// DeleteBucket deletes a bucket82DeleteBucket(ctx context.Context, userID, bucket string) error8384// ObjectHash gets a hash value of an object85ObjectHash(ctx context.Context, bucket string, obj string) (string, error)8687// ObjectExists tells whether the given object exists or not88ObjectExists(ctx context.Context, bucket string, path string) (bool, error)8990// BackupObject returns a backup's object name that a direct downloader would download91BackupObject(ownerID string, workspaceID string, name string) string9293// InstanceObject returns a instance's object name that a direct downloader would download94InstanceObject(ownerID string, workspaceID string, instanceID string, name string) string95}9697// ObjectMeta describtes the metadata of a remote object98type ObjectMeta struct {99ContentType string100OCIMediaType string101Digest string102UncompressedDigest string103}104105// DownloadInfo describes an object for download106type DownloadInfo struct {107Meta ObjectMeta108URL string109Size int64110}111112// UploadInfo describes an object for upload113type UploadInfo struct {114URL string115}116117// DeleteObjectQuery specifies objects to delete, either by an exact name or prefix118type DeleteObjectQuery struct {119Prefix string120Name string121}122123// SignedURLOptions allows you to restrict the access to the signed URL.124type SignedURLOptions struct {125// ContentType is the content type header the client must provide126// to use the generated signed URL.127// Optional.128ContentType string129}130131// DirectDownloader downloads a snapshot132type DirectDownloader interface {133// Download takes the latest state from the remote storage and downloads it to a local path134Download(ctx context.Context, destination string, name string, mappings []archive.IDMapping) (found bool, err error)135136// Downloads a snapshot. The snapshot name is expected to be one produced by Qualify137DownloadSnapshot(ctx context.Context, destination string, name string, mappings []archive.IDMapping) (found bool, err error)138}139140// DirectAccess represents a remote location where we can store data141type DirectAccess interface {142BucketNamer143BackupObjectNamer144DirectDownloader145146// Init initializes the remote storage - call this before calling anything else on the interface147Init(ctx context.Context, owner, workspace, instance string) error148149// EnsureExists makes sure that the remote storage location exists and can be up- or downloaded from150EnsureExists(ctx context.Context) error151152// ListObjects returns all objects found with the given prefix. Returns an empty list if the bucket does not exuist (yet).153ListObjects(ctx context.Context, prefix string) ([]string, error)154155// Fully qualifies a snapshot name so that it can be downloaded using DownloadSnapshot156Qualify(name string) string157158// Upload takes all files from a local location and uploads it to the remote storage159Upload(ctx context.Context, source string, name string, options ...UploadOption) (bucket, obj string, err error)160161// UploadInstance takes all files from a local location and uploads it to the remote storage162UploadInstance(ctx context.Context, source string, name string, options ...UploadOption) (bucket, obj string, err error)163}164165// UploadOptions configure remote storage upload166type UploadOptions struct {167// Annotations are generic metadata atteched to a storage object168Annotations map[string]string169170ContentType string171}172173// UploadOption configures a particular aspect of remote storage upload174type UploadOption func(*UploadOptions) error175176// WithAnnotations adds arbitrary metadata to a storage object177func WithAnnotations(md map[string]string) UploadOption {178return func(opts *UploadOptions) error {179opts.Annotations = md180return nil181}182}183184// WithContentType sets the content mime type of the object185func WithContentType(ct string) UploadOption {186return func(opts *UploadOptions) error {187opts.ContentType = ct188return nil189}190}191192// GetUploadOptions turns functional opts into a struct193func GetUploadOptions(opts []UploadOption) (*UploadOptions, error) {194res := &UploadOptions{}195for _, o := range opts {196err := o(res)197if err != nil {198return nil, err199}200}201return res, nil202}203204const (205// ObjectAnnotationDigest is the digest of actual object206ObjectAnnotationDigest = "gitpod-digest"207208// ObjectAnnotationUncompressedDigest is the digest of the uncompressed object, if the object is compressed209ObjectAnnotationUncompressedDigest = "gitpod-uncompressedDigest"210211// ObjectAnnotationOCIContentType is the OCI media type of the object212ObjectAnnotationOCIContentType = "gitpod-oci-contentType"213)214215// NewDirectAccess provides direct access to a storage system216func NewDirectAccess(c *config.StorageConfig) (DirectAccess, error) {217stage := c.GetStage()218if stage == "" {219return nil, xerrors.Errorf("missing storage stage")220}221222switch c.Kind {223case config.GCloudStorage:224return newDirectGCPAccess(c.GCloudConfig, stage)225case config.MinIOStorage:226return newDirectMinIOAccess(c.MinIOConfig)227case config.S3Storage:228cfg, err := loadAwsConfig(c.S3Config)229if err != nil {230return nil, err231}232233return newDirectS3Access(s3.NewFromConfig(*cfg), S3Config{234Bucket: c.S3Config.Bucket,235}), nil236default:237return &DirectNoopStorage{}, nil238}239}240241// NewPresignedAccess provides presigned URLs to access a storage system242func NewPresignedAccess(c *config.StorageConfig) (PresignedAccess, error) {243stage := c.GetStage()244if stage == "" {245return nil, xerrors.Errorf("missing storage stage")246}247248switch c.Kind {249case config.GCloudStorage:250return newPresignedGCPAccess(c.GCloudConfig, stage)251case config.MinIOStorage:252return newPresignedMinIOAccess(c.MinIOConfig)253case config.S3Storage:254cfg, err := loadAwsConfig(c.S3Config)255if err != nil {256return nil, err257}258259return NewPresignedS3Access(s3.NewFromConfig(*cfg), S3Config{260Bucket: c.S3Config.Bucket,261}), nil262default:263log.Warnf("falling back to noop presigned storage access. Is this intentional? (storage kind: %s)", c.Kind)264return &PresignedNoopStorage{}, nil265}266}267268func loadAwsConfig(s3config *config.S3Config) (*aws.Config, error) {269var opts []func(*awsconfig.LoadOptions) error270if s3config.CredentialsFile != "" {271opts = append(opts, awsconfig.WithSharedConfigFiles([]string{s3config.CredentialsFile}))272}273274if s3config.Region != "" {275opts = append(opts, awsconfig.WithRegion(s3config.Region))276}277278cfg, err := awsconfig.LoadDefaultConfig(context.TODO(), opts...)279if err != nil {280return nil, err281}282283return &cfg, nil284}285286func extractTarbal(ctx context.Context, dest string, src io.Reader, mappings []archive.IDMapping) error {287err := archive.ExtractTarbal(ctx, src, dest, archive.WithUIDMapping(mappings), archive.WithGIDMapping(mappings))288if err != nil {289return xerrors.Errorf("tar %s: %s", dest, err.Error())290}291292return nil293}294295func blobObjectName(name string) (string, error) {296blobRegex := `^[a-zA-Z0-9._\-\/]+$`297b, err := regexp.MatchString(blobRegex, name)298if err != nil {299return "", err300}301if !b {302return "", xerrors.Errorf("blob name '%s' needs to match regex '%s'", name, blobRegex)303}304return fmt.Sprintf("blobs/%s", name), nil305}306307func InstanceObjectName(instanceID, name string) string {308return fmt.Sprintf("instances/%s/%s", instanceID, name)309}310311312