Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/system/lib/wasmfs/file.h
6174 views
1
// Copyright 2021 The Emscripten Authors. All rights reserved.
2
// Emscripten is available under two separate licenses, the MIT license and the
3
// University of Illinois/NCSA Open Source License. Both these licenses can be
4
// found in the LICENSE file.
5
6
// This file defines the file object.
7
8
#pragma once
9
10
#include "support.h"
11
#include <assert.h>
12
#include <emscripten.h>
13
#include <emscripten/html5.h>
14
#include <map>
15
#include <memory>
16
#include <mutex>
17
#include <optional>
18
#include <sys/stat.h>
19
#include <variant>
20
#include <vector>
21
#include <wasi/api.h>
22
23
namespace wasmfs {
24
25
// Note: The general locking strategy for all Files is to only hold 1 lock at a
26
// time to prevent deadlock. This methodology can be seen in getDirs().
27
28
class Backend;
29
class Directory;
30
class Symlink;
31
32
// This represents an opaque pointer to a Backend. A user may use this to
33
// specify a backend in file operations.
34
using backend_t = Backend*;
35
const backend_t NullBackend = nullptr;
36
37
// Access mode, file creation and file status flags for open.
38
using oflags_t = uint32_t;
39
40
// An abstract representation of an underlying file. All `File` objects
41
// correspond to underlying (real or conceptual) files in a file system managed
42
// by some backend, but not all underlying files have a corresponding `File`
43
// object. For example, a persistent backend may contain some files that have
44
// not yet been discovered by WasmFS and that therefore do not yet have
45
// corresponding `File` objects. Backends override the `File` family of classes
46
// to implement the mapping from `File` objects to their underlying files.
47
class File : public std::enable_shared_from_this<File> {
48
public:
49
enum FileKind {
50
UnknownKind = 0,
51
DataFileKind = 1,
52
DirectoryKind = 2,
53
SymlinkKind = 3
54
};
55
56
const FileKind kind;
57
58
template<class T> bool is() const {
59
static_assert(std::is_base_of<File, T>::value,
60
"File is not a base of destination type T");
61
return int(kind) == int(T::expectedKind);
62
}
63
64
template<class T> std::shared_ptr<T> dynCast() {
65
static_assert(std::is_base_of<File, T>::value,
66
"File is not a base of destination type T");
67
if (int(kind) == int(T::expectedKind)) {
68
return std::static_pointer_cast<T>(shared_from_this());
69
} else {
70
return nullptr;
71
}
72
}
73
74
template<class T> std::shared_ptr<T> cast() {
75
static_assert(std::is_base_of<File, T>::value,
76
"File is not a base of destination type T");
77
assert(int(kind) == int(T::expectedKind));
78
return std::static_pointer_cast<T>(shared_from_this());
79
}
80
81
ino_t getIno() {
82
// Set inode number to the file pointer. This gives a unique inode number.
83
// TODO: For security it would be better to use an indirect mapping.
84
// Ensure that the pointer will not overflow an ino_t.
85
static_assert(sizeof(this) <= sizeof(ino_t));
86
return (ino_t)this;
87
}
88
89
backend_t getBackend() const { return backend; }
90
91
bool isSeekable() const { return seekable; }
92
93
class Handle;
94
Handle locked();
95
96
protected:
97
File(FileKind kind, mode_t mode, backend_t backend)
98
: kind(kind), mode(mode), backend(backend) {
99
atime = mtime = ctime = emscripten_date_now();
100
}
101
102
// A mutex is needed for multiple accesses to the same file.
103
std::recursive_mutex mutex;
104
105
// The size in bytes of a file or return a negative error code. May be
106
// called on files that have not been opened.
107
virtual off_t getSize() = 0;
108
109
mode_t mode = 0; // User and group mode bits for access permission.
110
111
double atime; // Time when the content was last accessed, in ms.
112
double mtime; // Time when the file content was last modified, in ms.
113
double ctime; // Time when the file node was last modified, in ms.
114
115
// Reference to parent of current file node. This can be used to
116
// traverse up the directory tree. A weak_ptr ensures that the ref
117
// count is not incremented. This also ensures that there are no cyclic
118
// dependencies where the parent and child have shared_ptrs that reference
119
// each other. This prevents the case in which an uncollectable cycle occurs.
120
std::weak_ptr<Directory> parent;
121
122
// This specifies which backend a file is associated with. It may be null
123
// (NullBackend) if there is no particular backend associated with the file.
124
backend_t backend;
125
126
// By default files are seekable. The rare exceptions are things like pipes
127
// and sockets.
128
bool seekable = true;
129
};
130
131
class DataFile : public File {
132
protected:
133
// Notify the backend when this file is opened or closed. The backend is
134
// responsible for keeping files accessible as long as they are open, even if
135
// they are unlinked. Returns 0 on success or a negative error code.
136
virtual int open(oflags_t flags) = 0;
137
virtual int close() = 0;
138
139
// Return the accessed length or a negative error code. It is not an error to
140
// access fewer bytes than requested. Will only be called on opened files.
141
// TODO: Allow backends to override the version of read with
142
// multiple iovecs to make it possible to implement pipes. See #16269.
143
virtual ssize_t read(uint8_t* buf, size_t len, off_t offset) = 0;
144
virtual ssize_t write(const uint8_t* buf, size_t len, off_t offset) = 0;
145
146
// Sets the size of the file to a specific size. If new space is allocated, it
147
// should be zero-initialized. May be called on files that have not been
148
// opened. Returns 0 on success or a negative error code.
149
virtual int setSize(off_t size) = 0;
150
151
// Sync the file data to the underlying persistent storage, if any. Returns 0
152
// on success or a negative error code.
153
virtual int flush() = 0;
154
155
public:
156
static constexpr FileKind expectedKind = File::DataFileKind;
157
DataFile(mode_t mode, backend_t backend)
158
: File(File::DataFileKind, mode | S_IFREG, backend) {}
159
DataFile(mode_t mode, backend_t backend, mode_t fileType)
160
: File(File::DataFileKind, mode | fileType, backend) {}
161
virtual ~DataFile() = default;
162
163
class Handle;
164
Handle locked();
165
};
166
167
class Directory : public File {
168
public:
169
struct Entry {
170
std::string name;
171
FileKind kind;
172
ino_t ino;
173
};
174
175
struct MaybeEntries : std::variant<std::vector<Entry>, int> {
176
int getError() {
177
if (int* err = std::get_if<int>(this)) {
178
assert(*err < 0);
179
return *err;
180
}
181
return 0;
182
}
183
184
std::vector<Entry>& operator*() {
185
return *std::get_if<std::vector<Entry>>(this);
186
}
187
188
std::vector<Entry>* operator->() {
189
return std::get_if<std::vector<Entry>>(this);
190
}
191
};
192
193
private:
194
// The directory cache, or `dcache`, stores `File` objects for the children of
195
// each directory so that subsequent lookups do not need to query the backend.
196
// It also supports cross-backend mount point children that are stored
197
// exclusively in the cache and not reflected in any backend.
198
enum class DCacheKind { Normal, Mount };
199
struct DCacheEntry {
200
DCacheKind kind;
201
std::shared_ptr<File> file;
202
};
203
// TODO: Use a cache data structure with smaller code size.
204
std::map<std::string, DCacheEntry> dcache;
205
206
protected:
207
// Return the `File` object corresponding to the file with the given name or
208
// null if there is none.
209
virtual std::shared_ptr<File> getChild(const std::string& name) = 0;
210
211
// Inserts a file with the given name, kind, and mode. Returns a `File` object
212
// corresponding to the newly created file or nullptr if the new file could
213
// not be created. Assumes a child with this name does not already exist.
214
// If the operation failed, returns nullptr.
215
virtual std::shared_ptr<DataFile> insertDataFile(const std::string& name,
216
mode_t mode) = 0;
217
virtual std::shared_ptr<Directory> insertDirectory(const std::string& name,
218
mode_t mode) = 0;
219
virtual std::shared_ptr<Symlink> insertSymlink(const std::string& name,
220
const std::string& target) = 0;
221
222
// Move the file represented by `file` from its current directory to this
223
// directory with the new `name`, possibly overwriting another file that
224
// already exists with that name. The old directory may be the same as this
225
// directory. On success return 0 and otherwise return a negative error code
226
// without changing any underlying state.
227
virtual int insertMove(const std::string& name,
228
std::shared_ptr<File> file) = 0;
229
230
// Remove the file with the given name. Returns zero on success or if the
231
// child has already been removed and otherwise returns a negative error code
232
// if the child cannot be removed.
233
virtual int removeChild(const std::string& name) = 0;
234
235
// The number of entries in this directory. Returns the number of entries or a
236
// negative error code.
237
virtual ssize_t getNumEntries() = 0;
238
239
// The list of entries in this directory or a negative error code.
240
virtual MaybeEntries getEntries() = 0;
241
242
// Only backends that maintain file identity themselves (see below) need to
243
// implement this.
244
virtual std::string getName(std::shared_ptr<File> file) {
245
WASMFS_UNREACHABLE("getName unimplemented");
246
}
247
248
// Whether this directory implementation always returns the same `File` object
249
// for a given file. Most backends can be much simpler if they don't handle
250
// this themselves. Instead, they rely on the directory cache (dcache) to
251
// maintain file identity for them by ensuring each file is looked up in the
252
// backend only once. Some backends, however, already track file identity, so
253
// the dcache is not necessary (or would even introduce problems).
254
//
255
// When this is `true`, backends are responsible for:
256
//
257
// 1. Ensuring that all insert* and getChild calls returning a particular
258
// file return the same File object.
259
//
260
// 2. Clearing unlinked Files' parents in `removeChild` and `insertMove`.
261
//
262
// 3. Implementing `getName`, since it cannot be implemented in terms of the
263
// dcache.
264
virtual bool maintainsFileIdentity() { return false; }
265
266
public:
267
static constexpr FileKind expectedKind = File::DirectoryKind;
268
Directory(mode_t mode, backend_t backend)
269
: File(File::DirectoryKind, mode | S_IFDIR, backend) {}
270
virtual ~Directory() = default;
271
272
class Handle;
273
Handle locked();
274
275
protected:
276
// 4096 bytes is the size of a block in ext4.
277
// This value was also copied from the JS file system.
278
off_t getSize() override { return 4096; }
279
};
280
281
class Symlink : public File {
282
public:
283
static constexpr FileKind expectedKind = File::SymlinkKind;
284
// Note that symlinks provide a mode of 0 to File. The mode of a symlink does
285
// not matter, so that value will never be read (what matters is the mode of
286
// the target).
287
Symlink(backend_t backend) : File(File::SymlinkKind, S_IFLNK, backend) {}
288
virtual ~Symlink() = default;
289
290
// Constant, and therefore thread-safe, and can be done without locking.
291
virtual std::string getTarget() const = 0;
292
293
protected:
294
off_t getSize() override { return getTarget().size(); }
295
};
296
297
class File::Handle {
298
protected:
299
// This mutex is needed when one needs to access access a previously locked
300
// file in the same thread. For example, rename will need to traverse
301
// 2 paths and access the same locked directory twice.
302
// TODO: During benchmarking, test recursive vs normal mutex performance.
303
std::unique_lock<std::recursive_mutex> lock;
304
std::shared_ptr<File> file;
305
306
public:
307
Handle(std::shared_ptr<File> file) : lock(file->mutex), file(file) {}
308
Handle(std::shared_ptr<File> file, std::defer_lock_t)
309
: lock(file->mutex, std::defer_lock), file(file) {}
310
off_t getSize() { return file->getSize(); }
311
mode_t getMode() { return file->mode; }
312
void setMode(mode_t mode) {
313
// The type bits can never be changed (whether something is a file or a
314
// directory, for example).
315
file->mode = (file->mode & S_IFMT) | (mode & ~S_IFMT);
316
}
317
double getCTime() {
318
return file->ctime;
319
}
320
void setCTime(double time) { file->ctime = time; }
321
// updateCTime() updates the ctime to the current time.
322
void updateCTime() {
323
file->ctime = emscripten_date_now();
324
}
325
double getMTime() {
326
return file->mtime;
327
}
328
void setMTime(double time) { file->mtime = time; }
329
// updateMTime() updates the mtime to the current time.
330
void updateMTime() {
331
file->mtime = emscripten_date_now();
332
}
333
double getATime() {
334
return file->atime;
335
}
336
void setATime(double time) { file->atime = time; }
337
// updateATime() updates the atime to the current time.
338
void updateATime() {
339
file->atime = emscripten_date_now();
340
}
341
342
// Note: parent.lock() creates a new shared_ptr to the same Directory
343
// specified by the parent weak_ptr.
344
std::shared_ptr<Directory> getParent() { return file->parent.lock(); }
345
void setParent(std::shared_ptr<Directory> parent) { file->parent = parent; }
346
347
std::shared_ptr<File> unlocked() { return file; }
348
};
349
350
class DataFile::Handle : public File::Handle {
351
std::shared_ptr<DataFile> getFile() { return file->cast<DataFile>(); }
352
353
public:
354
Handle(std::shared_ptr<File> dataFile) : File::Handle(dataFile) {}
355
Handle(Handle&&) = default;
356
357
[[nodiscard]] int open(oflags_t flags) { return getFile()->open(flags); }
358
[[nodiscard]] int close() { return getFile()->close(); }
359
360
ssize_t read(uint8_t* buf, size_t len, off_t offset) {
361
return getFile()->read(buf, len, offset);
362
}
363
ssize_t write(const uint8_t* buf, size_t len, off_t offset) {
364
return getFile()->write(buf, len, offset);
365
}
366
367
[[nodiscard]] int setSize(off_t size) { return getFile()->setSize(size); }
368
369
// TODO: Design a proper API for flushing files.
370
[[nodiscard]] int flush() { return getFile()->flush(); }
371
372
// This function loads preloaded files from JS Memory into this DataFile.
373
// TODO: Make this virtual so specific backends can specialize it for better
374
// performance.
375
void preloadFromJS(int index);
376
};
377
378
class Directory::Handle : public File::Handle {
379
std::shared_ptr<Directory> getDir() { return file->cast<Directory>(); }
380
void cacheChild(const std::string& name,
381
std::shared_ptr<File> child,
382
DCacheKind kind);
383
384
public:
385
Handle(std::shared_ptr<File> directory) : File::Handle(directory) {}
386
Handle(std::shared_ptr<File> directory, std::defer_lock_t)
387
: File::Handle(directory, std::defer_lock) {}
388
389
// Retrieve the child if it is in the dcache and otherwise forward the request
390
// to the backend, caching any `File` object it returns.
391
std::shared_ptr<File> getChild(const std::string& name);
392
393
// Add a child to this directory's entry cache without actually inserting it
394
// in the underlying backend. Assumes a child with this name does not already
395
// exist. Return `true` on success and `false` otherwise.
396
bool mountChild(const std::string& name, std::shared_ptr<File> file);
397
398
// Insert a child of the given name, kind, and mode in the underlying backend,
399
// which will allocate and return a corresponding `File` on success or return
400
// nullptr otherwise. Assumes a child with this name does not already exist.
401
// If the operation failed, returns nullptr.
402
std::shared_ptr<DataFile> insertDataFile(const std::string& name,
403
mode_t mode);
404
std::shared_ptr<Directory> insertDirectory(const std::string& name,
405
mode_t mode);
406
std::shared_ptr<Symlink> insertSymlink(const std::string& name,
407
const std::string& target);
408
409
// Move the file represented by `file` from its current directory to this
410
// directory with the new `name`, possibly overwriting another file that
411
// already exists with that name. The old directory may be the same as this
412
// directory. On success return 0 and otherwise return a negative error code
413
// without changing any underlying state. This should only be called from
414
// renameat with the locks on the old and new parents already held.
415
[[nodiscard]] int insertMove(const std::string& name,
416
std::shared_ptr<File> file);
417
418
// Remove the file with the given name. Returns zero on success or if the
419
// child has already been removed and otherwise returns a negative error code
420
// if the child cannot be removed.
421
[[nodiscard]] int removeChild(const std::string& name);
422
423
std::string getName(std::shared_ptr<File> file);
424
425
[[nodiscard]] ssize_t getNumEntries();
426
[[nodiscard]] MaybeEntries getEntries();
427
};
428
429
inline File::Handle File::locked() { return Handle(shared_from_this()); }
430
431
inline DataFile::Handle DataFile::locked() {
432
return Handle(shared_from_this());
433
}
434
435
inline Directory::Handle Directory::locked() {
436
return Handle(shared_from_this());
437
}
438
439
} // namespace wasmfs
440
441