Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
gitpod-io
GitHub Repository: gitpod-io/gitpod
Path: blob/main/components/content-service/pkg/storage/storage.go
2501 views
1
// Copyright (c) 2020 Gitpod GmbH. All rights reserved.
2
// Licensed under the GNU Affero General Public License (AGPL).
3
// See License.AGPL.txt in the project root for license information.
4
5
//go:generate ./generate-mock.sh
6
7
package storage
8
9
import (
10
"context"
11
"fmt"
12
"io"
13
"regexp"
14
15
"golang.org/x/xerrors"
16
17
"github.com/aws/aws-sdk-go-v2/aws"
18
awsconfig "github.com/aws/aws-sdk-go-v2/config"
19
"github.com/aws/aws-sdk-go-v2/service/s3"
20
"github.com/gitpod-io/gitpod/common-go/log"
21
config "github.com/gitpod-io/gitpod/content-service/api/config"
22
"github.com/gitpod-io/gitpod/content-service/pkg/archive"
23
)
24
25
const (
26
// DefaultBackup is the name of the regular backup we upload to remote storage
27
DefaultBackup = "full.tar"
28
29
// DefaultBackupManifest is the name of the manifest of the regular default backup we upload
30
DefaultBackupManifest = "wsfull.json"
31
)
32
33
var (
34
// ErrNotFound is returned when an object is not found
35
ErrNotFound = fmt.Errorf("not found")
36
)
37
38
// BucketNamer provides names for storage buckets
39
type BucketNamer interface {
40
// Bucket provides the bucket name for a particular user
41
Bucket(userID string) string
42
}
43
44
// BackupObjectNamer provides names for workspace backup objects
45
type BackupObjectNamer interface {
46
// BackupObject returns a backup's object name that a direct downloader would download
47
BackupObject(name string) string
48
}
49
50
// InstanceObjectNamer provides names for objects per workspace instance
51
type InstanceObjectNamer interface {
52
// InstanceObject returns a instance's object name that a direct downloader would download
53
InstanceObject(name string) string
54
}
55
56
// BlobObjectNamer provides names for blob objects
57
type BlobObjectNamer interface {
58
// BlobObject returns a blob's object name
59
BlobObject(userID, name string) (string, error)
60
}
61
62
// PresignedAccess provides presigned URLs to access remote storage objects
63
type PresignedAccess interface {
64
BucketNamer
65
BlobObjectNamer
66
67
// EnsureExists makes sure that the remote storage location exists and can be up- or downloaded from
68
EnsureExists(ctx context.Context, bucket string) error
69
70
// DiskUsage gives the total objects size of objects that have the given prefix
71
DiskUsage(ctx context.Context, bucket string, prefix string) (size int64, err error)
72
73
// SignDownload describes an object for download - if the object is not found, ErrNotFound is returned
74
SignDownload(ctx context.Context, bucket, obj string, options *SignedURLOptions) (info *DownloadInfo, err error)
75
76
// SignUpload describes an object for upload
77
SignUpload(ctx context.Context, bucket, obj string, options *SignedURLOptions) (info *UploadInfo, err error)
78
79
// DeleteObject deletes objects in the given bucket specified by the given query
80
DeleteObject(ctx context.Context, bucket string, query *DeleteObjectQuery) error
81
82
// DeleteBucket deletes a bucket
83
DeleteBucket(ctx context.Context, userID, bucket string) error
84
85
// ObjectHash gets a hash value of an object
86
ObjectHash(ctx context.Context, bucket string, obj string) (string, error)
87
88
// ObjectExists tells whether the given object exists or not
89
ObjectExists(ctx context.Context, bucket string, path string) (bool, error)
90
91
// BackupObject returns a backup's object name that a direct downloader would download
92
BackupObject(ownerID string, workspaceID string, name string) string
93
94
// InstanceObject returns a instance's object name that a direct downloader would download
95
InstanceObject(ownerID string, workspaceID string, instanceID string, name string) string
96
}
97
98
// ObjectMeta describtes the metadata of a remote object
99
type ObjectMeta struct {
100
ContentType string
101
OCIMediaType string
102
Digest string
103
UncompressedDigest string
104
}
105
106
// DownloadInfo describes an object for download
107
type DownloadInfo struct {
108
Meta ObjectMeta
109
URL string
110
Size int64
111
}
112
113
// UploadInfo describes an object for upload
114
type UploadInfo struct {
115
URL string
116
}
117
118
// DeleteObjectQuery specifies objects to delete, either by an exact name or prefix
119
type DeleteObjectQuery struct {
120
Prefix string
121
Name string
122
}
123
124
// SignedURLOptions allows you to restrict the access to the signed URL.
125
type SignedURLOptions struct {
126
// ContentType is the content type header the client must provide
127
// to use the generated signed URL.
128
// Optional.
129
ContentType string
130
}
131
132
// DirectDownloader downloads a snapshot
133
type DirectDownloader interface {
134
// Download takes the latest state from the remote storage and downloads it to a local path
135
Download(ctx context.Context, destination string, name string, mappings []archive.IDMapping) (found bool, err error)
136
137
// Downloads a snapshot. The snapshot name is expected to be one produced by Qualify
138
DownloadSnapshot(ctx context.Context, destination string, name string, mappings []archive.IDMapping) (found bool, err error)
139
}
140
141
// DirectAccess represents a remote location where we can store data
142
type DirectAccess interface {
143
BucketNamer
144
BackupObjectNamer
145
DirectDownloader
146
147
// Init initializes the remote storage - call this before calling anything else on the interface
148
Init(ctx context.Context, owner, workspace, instance string) error
149
150
// EnsureExists makes sure that the remote storage location exists and can be up- or downloaded from
151
EnsureExists(ctx context.Context) error
152
153
// ListObjects returns all objects found with the given prefix. Returns an empty list if the bucket does not exuist (yet).
154
ListObjects(ctx context.Context, prefix string) ([]string, error)
155
156
// Fully qualifies a snapshot name so that it can be downloaded using DownloadSnapshot
157
Qualify(name string) string
158
159
// Upload takes all files from a local location and uploads it to the remote storage
160
Upload(ctx context.Context, source string, name string, options ...UploadOption) (bucket, obj string, err error)
161
162
// UploadInstance takes all files from a local location and uploads it to the remote storage
163
UploadInstance(ctx context.Context, source string, name string, options ...UploadOption) (bucket, obj string, err error)
164
}
165
166
// UploadOptions configure remote storage upload
167
type UploadOptions struct {
168
// Annotations are generic metadata atteched to a storage object
169
Annotations map[string]string
170
171
ContentType string
172
}
173
174
// UploadOption configures a particular aspect of remote storage upload
175
type UploadOption func(*UploadOptions) error
176
177
// WithAnnotations adds arbitrary metadata to a storage object
178
func WithAnnotations(md map[string]string) UploadOption {
179
return func(opts *UploadOptions) error {
180
opts.Annotations = md
181
return nil
182
}
183
}
184
185
// WithContentType sets the content mime type of the object
186
func WithContentType(ct string) UploadOption {
187
return func(opts *UploadOptions) error {
188
opts.ContentType = ct
189
return nil
190
}
191
}
192
193
// GetUploadOptions turns functional opts into a struct
194
func GetUploadOptions(opts []UploadOption) (*UploadOptions, error) {
195
res := &UploadOptions{}
196
for _, o := range opts {
197
err := o(res)
198
if err != nil {
199
return nil, err
200
}
201
}
202
return res, nil
203
}
204
205
const (
206
// ObjectAnnotationDigest is the digest of actual object
207
ObjectAnnotationDigest = "gitpod-digest"
208
209
// ObjectAnnotationUncompressedDigest is the digest of the uncompressed object, if the object is compressed
210
ObjectAnnotationUncompressedDigest = "gitpod-uncompressedDigest"
211
212
// ObjectAnnotationOCIContentType is the OCI media type of the object
213
ObjectAnnotationOCIContentType = "gitpod-oci-contentType"
214
)
215
216
// NewDirectAccess provides direct access to a storage system
217
func NewDirectAccess(c *config.StorageConfig) (DirectAccess, error) {
218
stage := c.GetStage()
219
if stage == "" {
220
return nil, xerrors.Errorf("missing storage stage")
221
}
222
223
switch c.Kind {
224
case config.GCloudStorage:
225
return newDirectGCPAccess(c.GCloudConfig, stage)
226
case config.MinIOStorage:
227
return newDirectMinIOAccess(c.MinIOConfig)
228
case config.S3Storage:
229
cfg, err := loadAwsConfig(c.S3Config)
230
if err != nil {
231
return nil, err
232
}
233
234
return newDirectS3Access(s3.NewFromConfig(*cfg), S3Config{
235
Bucket: c.S3Config.Bucket,
236
}), nil
237
default:
238
return &DirectNoopStorage{}, nil
239
}
240
}
241
242
// NewPresignedAccess provides presigned URLs to access a storage system
243
func NewPresignedAccess(c *config.StorageConfig) (PresignedAccess, error) {
244
stage := c.GetStage()
245
if stage == "" {
246
return nil, xerrors.Errorf("missing storage stage")
247
}
248
249
switch c.Kind {
250
case config.GCloudStorage:
251
return newPresignedGCPAccess(c.GCloudConfig, stage)
252
case config.MinIOStorage:
253
return newPresignedMinIOAccess(c.MinIOConfig)
254
case config.S3Storage:
255
cfg, err := loadAwsConfig(c.S3Config)
256
if err != nil {
257
return nil, err
258
}
259
260
return NewPresignedS3Access(s3.NewFromConfig(*cfg), S3Config{
261
Bucket: c.S3Config.Bucket,
262
}), nil
263
default:
264
log.Warnf("falling back to noop presigned storage access. Is this intentional? (storage kind: %s)", c.Kind)
265
return &PresignedNoopStorage{}, nil
266
}
267
}
268
269
func loadAwsConfig(s3config *config.S3Config) (*aws.Config, error) {
270
var opts []func(*awsconfig.LoadOptions) error
271
if s3config.CredentialsFile != "" {
272
opts = append(opts, awsconfig.WithSharedConfigFiles([]string{s3config.CredentialsFile}))
273
}
274
275
if s3config.Region != "" {
276
opts = append(opts, awsconfig.WithRegion(s3config.Region))
277
}
278
279
cfg, err := awsconfig.LoadDefaultConfig(context.TODO(), opts...)
280
if err != nil {
281
return nil, err
282
}
283
284
return &cfg, nil
285
}
286
287
func extractTarbal(ctx context.Context, dest string, src io.Reader, mappings []archive.IDMapping) error {
288
err := archive.ExtractTarbal(ctx, src, dest, archive.WithUIDMapping(mappings), archive.WithGIDMapping(mappings))
289
if err != nil {
290
return xerrors.Errorf("tar %s: %s", dest, err.Error())
291
}
292
293
return nil
294
}
295
296
func blobObjectName(name string) (string, error) {
297
blobRegex := `^[a-zA-Z0-9._\-\/]+$`
298
b, err := regexp.MatchString(blobRegex, name)
299
if err != nil {
300
return "", err
301
}
302
if !b {
303
return "", xerrors.Errorf("blob name '%s' needs to match regex '%s'", name, blobRegex)
304
}
305
return fmt.Sprintf("blobs/%s", name), nil
306
}
307
308
func InstanceObjectName(instanceID, name string) string {
309
return fmt.Sprintf("instances/%s/%s", instanceID, name)
310
}
311
312