// Copyright 2021 The Emscripten Authors. All rights reserved.1// Emscripten is available under two separate licenses, the MIT license and the2// University of Illinois/NCSA Open Source License. Both these licenses can be3// found in the LICENSE file.45// This file defines the file object.67#pragma once89#include "support.h"10#include <assert.h>11#include <emscripten.h>12#include <emscripten/html5.h>13#include <map>14#include <memory>15#include <mutex>16#include <optional>17#include <sys/stat.h>18#include <variant>19#include <vector>20#include <wasi/api.h>2122namespace wasmfs {2324// Note: The general locking strategy for all Files is to only hold 1 lock at a25// time to prevent deadlock. This methodology can be seen in getDirs().2627class Backend;28class Directory;29class Symlink;3031// This represents an opaque pointer to a Backend. A user may use this to32// specify a backend in file operations.33using backend_t = Backend*;34const backend_t NullBackend = nullptr;3536// Access mode, file creation and file status flags for open.37using oflags_t = uint32_t;3839// An abstract representation of an underlying file. All `File` objects40// correspond to underlying (real or conceptual) files in a file system managed41// by some backend, but not all underlying files have a corresponding `File`42// object. For example, a persistent backend may contain some files that have43// not yet been discovered by WasmFS and that therefore do not yet have44// corresponding `File` objects. Backends override the `File` family of classes45// to implement the mapping from `File` objects to their underlying files.46class File : public std::enable_shared_from_this<File> {47public:48enum FileKind {49UnknownKind = 0,50DataFileKind = 1,51DirectoryKind = 2,52SymlinkKind = 353};5455const FileKind kind;5657template<class T> bool is() const {58static_assert(std::is_base_of<File, T>::value,59"File is not a base of destination type T");60return int(kind) == int(T::expectedKind);61}6263template<class T> std::shared_ptr<T> dynCast() {64static_assert(std::is_base_of<File, T>::value,65"File is not a base of destination type T");66if (int(kind) == int(T::expectedKind)) {67return std::static_pointer_cast<T>(shared_from_this());68} else {69return nullptr;70}71}7273template<class T> std::shared_ptr<T> cast() {74static_assert(std::is_base_of<File, T>::value,75"File is not a base of destination type T");76assert(int(kind) == int(T::expectedKind));77return std::static_pointer_cast<T>(shared_from_this());78}7980ino_t getIno() {81// Set inode number to the file pointer. This gives a unique inode number.82// TODO: For security it would be better to use an indirect mapping.83// Ensure that the pointer will not overflow an ino_t.84static_assert(sizeof(this) <= sizeof(ino_t));85return (ino_t)this;86}8788backend_t getBackend() const { return backend; }8990bool isSeekable() const { return seekable; }9192class Handle;93Handle locked();9495protected:96File(FileKind kind, mode_t mode, backend_t backend)97: kind(kind), mode(mode), backend(backend) {98atime = mtime = ctime = emscripten_date_now();99}100101// A mutex is needed for multiple accesses to the same file.102std::recursive_mutex mutex;103104// The size in bytes of a file or return a negative error code. May be105// called on files that have not been opened.106virtual off_t getSize() = 0;107108mode_t mode = 0; // User and group mode bits for access permission.109110double atime; // Time when the content was last accessed, in ms.111double mtime; // Time when the file content was last modified, in ms.112double ctime; // Time when the file node was last modified, in ms.113114// Reference to parent of current file node. This can be used to115// traverse up the directory tree. A weak_ptr ensures that the ref116// count is not incremented. This also ensures that there are no cyclic117// dependencies where the parent and child have shared_ptrs that reference118// each other. This prevents the case in which an uncollectable cycle occurs.119std::weak_ptr<Directory> parent;120121// This specifies which backend a file is associated with. It may be null122// (NullBackend) if there is no particular backend associated with the file.123backend_t backend;124125// By default files are seekable. The rare exceptions are things like pipes126// and sockets.127bool seekable = true;128};129130class DataFile : public File {131protected:132// Notify the backend when this file is opened or closed. The backend is133// responsible for keeping files accessible as long as they are open, even if134// they are unlinked. Returns 0 on success or a negative error code.135virtual int open(oflags_t flags) = 0;136virtual int close() = 0;137138// Return the accessed length or a negative error code. It is not an error to139// access fewer bytes than requested. Will only be called on opened files.140// TODO: Allow backends to override the version of read with141// multiple iovecs to make it possible to implement pipes. See #16269.142virtual ssize_t read(uint8_t* buf, size_t len, off_t offset) = 0;143virtual ssize_t write(const uint8_t* buf, size_t len, off_t offset) = 0;144145// Sets the size of the file to a specific size. If new space is allocated, it146// should be zero-initialized. May be called on files that have not been147// opened. Returns 0 on success or a negative error code.148virtual int setSize(off_t size) = 0;149150// Sync the file data to the underlying persistent storage, if any. Returns 0151// on success or a negative error code.152virtual int flush() = 0;153154public:155static constexpr FileKind expectedKind = File::DataFileKind;156DataFile(mode_t mode, backend_t backend)157: File(File::DataFileKind, mode | S_IFREG, backend) {}158DataFile(mode_t mode, backend_t backend, mode_t fileType)159: File(File::DataFileKind, mode | fileType, backend) {}160virtual ~DataFile() = default;161162class Handle;163Handle locked();164};165166class Directory : public File {167public:168struct Entry {169std::string name;170FileKind kind;171ino_t ino;172};173174struct MaybeEntries : std::variant<std::vector<Entry>, int> {175int getError() {176if (int* err = std::get_if<int>(this)) {177assert(*err < 0);178return *err;179}180return 0;181}182183std::vector<Entry>& operator*() {184return *std::get_if<std::vector<Entry>>(this);185}186187std::vector<Entry>* operator->() {188return std::get_if<std::vector<Entry>>(this);189}190};191192private:193// The directory cache, or `dcache`, stores `File` objects for the children of194// each directory so that subsequent lookups do not need to query the backend.195// It also supports cross-backend mount point children that are stored196// exclusively in the cache and not reflected in any backend.197enum class DCacheKind { Normal, Mount };198struct DCacheEntry {199DCacheKind kind;200std::shared_ptr<File> file;201};202// TODO: Use a cache data structure with smaller code size.203std::map<std::string, DCacheEntry> dcache;204205protected:206// Return the `File` object corresponding to the file with the given name or207// null if there is none.208virtual std::shared_ptr<File> getChild(const std::string& name) = 0;209210// Inserts a file with the given name, kind, and mode. Returns a `File` object211// corresponding to the newly created file or nullptr if the new file could212// not be created. Assumes a child with this name does not already exist.213// If the operation failed, returns nullptr.214virtual std::shared_ptr<DataFile> insertDataFile(const std::string& name,215mode_t mode) = 0;216virtual std::shared_ptr<Directory> insertDirectory(const std::string& name,217mode_t mode) = 0;218virtual std::shared_ptr<Symlink> insertSymlink(const std::string& name,219const std::string& target) = 0;220221// Move the file represented by `file` from its current directory to this222// directory with the new `name`, possibly overwriting another file that223// already exists with that name. The old directory may be the same as this224// directory. On success return 0 and otherwise return a negative error code225// without changing any underlying state.226virtual int insertMove(const std::string& name,227std::shared_ptr<File> file) = 0;228229// Remove the file with the given name. Returns zero on success or if the230// child has already been removed and otherwise returns a negative error code231// if the child cannot be removed.232virtual int removeChild(const std::string& name) = 0;233234// The number of entries in this directory. Returns the number of entries or a235// negative error code.236virtual ssize_t getNumEntries() = 0;237238// The list of entries in this directory or a negative error code.239virtual MaybeEntries getEntries() = 0;240241// Only backends that maintain file identity themselves (see below) need to242// implement this.243virtual std::string getName(std::shared_ptr<File> file) {244WASMFS_UNREACHABLE("getName unimplemented");245}246247// Whether this directory implementation always returns the same `File` object248// for a given file. Most backends can be much simpler if they don't handle249// this themselves. Instead, they rely on the directory cache (dcache) to250// maintain file identity for them by ensuring each file is looked up in the251// backend only once. Some backends, however, already track file identity, so252// the dcache is not necessary (or would even introduce problems).253//254// When this is `true`, backends are responsible for:255//256// 1. Ensuring that all insert* and getChild calls returning a particular257// file return the same File object.258//259// 2. Clearing unlinked Files' parents in `removeChild` and `insertMove`.260//261// 3. Implementing `getName`, since it cannot be implemented in terms of the262// dcache.263virtual bool maintainsFileIdentity() { return false; }264265public:266static constexpr FileKind expectedKind = File::DirectoryKind;267Directory(mode_t mode, backend_t backend)268: File(File::DirectoryKind, mode | S_IFDIR, backend) {}269virtual ~Directory() = default;270271class Handle;272Handle locked();273274protected:275// 4096 bytes is the size of a block in ext4.276// This value was also copied from the JS file system.277off_t getSize() override { return 4096; }278};279280class Symlink : public File {281public:282static constexpr FileKind expectedKind = File::SymlinkKind;283// Note that symlinks provide a mode of 0 to File. The mode of a symlink does284// not matter, so that value will never be read (what matters is the mode of285// the target).286Symlink(backend_t backend) : File(File::SymlinkKind, S_IFLNK, backend) {}287virtual ~Symlink() = default;288289// Constant, and therefore thread-safe, and can be done without locking.290virtual std::string getTarget() const = 0;291292protected:293off_t getSize() override { return getTarget().size(); }294};295296class File::Handle {297protected:298// This mutex is needed when one needs to access access a previously locked299// file in the same thread. For example, rename will need to traverse300// 2 paths and access the same locked directory twice.301// TODO: During benchmarking, test recursive vs normal mutex performance.302std::unique_lock<std::recursive_mutex> lock;303std::shared_ptr<File> file;304305public:306Handle(std::shared_ptr<File> file) : lock(file->mutex), file(file) {}307Handle(std::shared_ptr<File> file, std::defer_lock_t)308: lock(file->mutex, std::defer_lock), file(file) {}309off_t getSize() { return file->getSize(); }310mode_t getMode() { return file->mode; }311void setMode(mode_t mode) {312// The type bits can never be changed (whether something is a file or a313// directory, for example).314file->mode = (file->mode & S_IFMT) | (mode & ~S_IFMT);315}316double getCTime() {317return file->ctime;318}319void setCTime(double time) { file->ctime = time; }320// updateCTime() updates the ctime to the current time.321void updateCTime() {322file->ctime = emscripten_date_now();323}324double getMTime() {325return file->mtime;326}327void setMTime(double time) { file->mtime = time; }328// updateMTime() updates the mtime to the current time.329void updateMTime() {330file->mtime = emscripten_date_now();331}332double getATime() {333return file->atime;334}335void setATime(double time) { file->atime = time; }336// updateATime() updates the atime to the current time.337void updateATime() {338file->atime = emscripten_date_now();339}340341// Note: parent.lock() creates a new shared_ptr to the same Directory342// specified by the parent weak_ptr.343std::shared_ptr<Directory> getParent() { return file->parent.lock(); }344void setParent(std::shared_ptr<Directory> parent) { file->parent = parent; }345346std::shared_ptr<File> unlocked() { return file; }347};348349class DataFile::Handle : public File::Handle {350std::shared_ptr<DataFile> getFile() { return file->cast<DataFile>(); }351352public:353Handle(std::shared_ptr<File> dataFile) : File::Handle(dataFile) {}354Handle(Handle&&) = default;355356[[nodiscard]] int open(oflags_t flags) { return getFile()->open(flags); }357[[nodiscard]] int close() { return getFile()->close(); }358359ssize_t read(uint8_t* buf, size_t len, off_t offset) {360return getFile()->read(buf, len, offset);361}362ssize_t write(const uint8_t* buf, size_t len, off_t offset) {363return getFile()->write(buf, len, offset);364}365366[[nodiscard]] int setSize(off_t size) { return getFile()->setSize(size); }367368// TODO: Design a proper API for flushing files.369[[nodiscard]] int flush() { return getFile()->flush(); }370371// This function loads preloaded files from JS Memory into this DataFile.372// TODO: Make this virtual so specific backends can specialize it for better373// performance.374void preloadFromJS(int index);375};376377class Directory::Handle : public File::Handle {378std::shared_ptr<Directory> getDir() { return file->cast<Directory>(); }379void cacheChild(const std::string& name,380std::shared_ptr<File> child,381DCacheKind kind);382383public:384Handle(std::shared_ptr<File> directory) : File::Handle(directory) {}385Handle(std::shared_ptr<File> directory, std::defer_lock_t)386: File::Handle(directory, std::defer_lock) {}387388// Retrieve the child if it is in the dcache and otherwise forward the request389// to the backend, caching any `File` object it returns.390std::shared_ptr<File> getChild(const std::string& name);391392// Add a child to this directory's entry cache without actually inserting it393// in the underlying backend. Assumes a child with this name does not already394// exist. Return `true` on success and `false` otherwise.395bool mountChild(const std::string& name, std::shared_ptr<File> file);396397// Insert a child of the given name, kind, and mode in the underlying backend,398// which will allocate and return a corresponding `File` on success or return399// nullptr otherwise. Assumes a child with this name does not already exist.400// If the operation failed, returns nullptr.401std::shared_ptr<DataFile> insertDataFile(const std::string& name,402mode_t mode);403std::shared_ptr<Directory> insertDirectory(const std::string& name,404mode_t mode);405std::shared_ptr<Symlink> insertSymlink(const std::string& name,406const std::string& target);407408// Move the file represented by `file` from its current directory to this409// directory with the new `name`, possibly overwriting another file that410// already exists with that name. The old directory may be the same as this411// directory. On success return 0 and otherwise return a negative error code412// without changing any underlying state. This should only be called from413// renameat with the locks on the old and new parents already held.414[[nodiscard]] int insertMove(const std::string& name,415std::shared_ptr<File> file);416417// Remove the file with the given name. Returns zero on success or if the418// child has already been removed and otherwise returns a negative error code419// if the child cannot be removed.420[[nodiscard]] int removeChild(const std::string& name);421422std::string getName(std::shared_ptr<File> file);423424[[nodiscard]] ssize_t getNumEntries();425[[nodiscard]] MaybeEntries getEntries();426};427428inline File::Handle File::locked() { return Handle(shared_from_this()); }429430inline DataFile::Handle DataFile::locked() {431return Handle(shared_from_this());432}433434inline Directory::Handle Directory::locked() {435return Handle(shared_from_this());436}437438} // namespace wasmfs439440441