/* $NetBSD: tmpfs.h,v 1.26 2007/02/22 06:37:00 thorpej Exp $ */12/*-3* SPDX-License-Identifier: BSD-2-Clause4*5* Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.6* All rights reserved.7*8* This code is derived from software contributed to The NetBSD Foundation9* by Julio M. Merino Vidal, developed as part of Google's Summer of Code10* 2005 program.11*12* Redistribution and use in source and binary forms, with or without13* modification, are permitted provided that the following conditions14* are met:15* 1. Redistributions of source code must retain the above copyright16* notice, this list of conditions and the following disclaimer.17* 2. Redistributions in binary form must reproduce the above copyright18* notice, this list of conditions and the following disclaimer in the19* documentation and/or other materials provided with the distribution.20*21* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS22* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED23* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR24* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS25* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR26* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF27* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS28* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN29* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)30* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE31* POSSIBILITY OF SUCH DAMAGE.32*/3334#ifndef _FS_TMPFS_TMPFS_H_35#define _FS_TMPFS_TMPFS_H_3637#include <sys/cdefs.h>38#include <sys/queue.h>39#include <sys/tree.h>4041#ifdef _SYS_MALLOC_H_42MALLOC_DECLARE(M_TMPFSNAME);43#endif4445#define OBJ_TMPFS OBJ_PAGERPRIV1 /* has tmpfs vnode allocated */46#define OBJ_TMPFS_VREF OBJ_PAGERPRIV2 /* vnode is referenced */4748/*49* Internal representation of a tmpfs directory entry.50*/5152LIST_HEAD(tmpfs_dir_duphead, tmpfs_dirent);5354struct tmpfs_dirent {55/*56* Depending on td_cookie flag entry can be of 3 types:57* - regular -- no hash collisions, stored in RB-Tree58* - duphead -- synthetic linked list head for dup entries59* - dup -- stored in linked list instead of RB-Tree60*/61union {62/* regular and duphead entry types */63RB_ENTRY(tmpfs_dirent) td_entries;6465/* dup entry type */66struct {67LIST_ENTRY(tmpfs_dirent) entries;68LIST_ENTRY(tmpfs_dirent) index_entries;69} td_dup;70} uh;7172uint32_t td_cookie;73uint32_t td_hash;74u_int td_namelen;7576/*77* Pointer to the node this entry refers to. In case this field78* is NULL, the node is a whiteout.79*/80struct tmpfs_node * td_node;8182union {83/*84* The name of the entry, allocated from a string pool. This85* string is not required to be zero-terminated.86*/87char * td_name; /* regular, dup */88struct tmpfs_dir_duphead td_duphead; /* duphead */89} ud;90};9192/*93* A directory in tmpfs holds a collection of directory entries, which94* in turn point to other files (which can be directories themselves).95*96* In tmpfs, this collection is managed by a RB-Tree, whose head is97* defined by the struct tmpfs_dir type.98*99* It is important to notice that directories do not have entries for . and100* .. as other file systems do. These can be generated when requested101* based on information available by other means, such as the pointer to102* the node itself in the former case or the pointer to the parent directory103* in the latter case. This is done to simplify tmpfs's code and, more104* importantly, to remove redundancy.105*/106RB_HEAD(tmpfs_dir, tmpfs_dirent);107108/*109* Each entry in a directory has a cookie that identifies it. Cookies110* supersede offsets within directories because, given how tmpfs stores111* directories in memory, there is no such thing as an offset.112*113* The '.', '..' and the end of directory markers have fixed cookies which114* cannot collide with the cookies generated by other entries. The cookies115* for the other entries are generated based on the file name hash value or116* unique number in case of name hash collision.117*118* To preserve compatibility cookies are limited to 31 bits.119*/120121#define TMPFS_DIRCOOKIE_DOT 0122#define TMPFS_DIRCOOKIE_DOTDOT 1123#define TMPFS_DIRCOOKIE_EOF 2124#define TMPFS_DIRCOOKIE_MASK ((off_t)0x3fffffffU)125#define TMPFS_DIRCOOKIE_MIN ((off_t)0x00000004U)126#define TMPFS_DIRCOOKIE_DUP ((off_t)0x40000000U)127#define TMPFS_DIRCOOKIE_DUPHEAD ((off_t)0x80000000U)128#define TMPFS_DIRCOOKIE_DUP_MIN TMPFS_DIRCOOKIE_DUP129#define TMPFS_DIRCOOKIE_DUP_MAX \130(TMPFS_DIRCOOKIE_DUP | TMPFS_DIRCOOKIE_MASK)131132/*133* Internal representation of a tmpfs extended attribute entry.134*/135LIST_HEAD(tmpfs_extattr_list, tmpfs_extattr);136137struct tmpfs_extattr {138LIST_ENTRY(tmpfs_extattr) ea_extattrs;139int ea_namespace; /* attr namespace */140char *ea_name; /* attr name */141unsigned char ea_namelen; /* attr name length */142char *ea_value; /* attr value buffer */143ssize_t ea_size; /* attr value size */144};145146/*147* Internal representation of a tmpfs file system node.148*149* This structure is splitted in two parts: one holds attributes common150* to all file types and the other holds data that is only applicable to151* a particular type. The code must be careful to only access those152* attributes that are actually allowed by the node's type.153*154* Below is the key of locks used to protected the fields in the following155* structures.156* (v) vnode lock in exclusive mode157* (vi) vnode lock in exclusive mode, or vnode lock in shared vnode and158* tn_interlock159* (i) tn_interlock160* (m) tmpfs_mount tm_allnode_lock161* (c) stable after creation162* (v) tn_reg.tn_aobj vm_object lock163*/164struct tmpfs_node {165/*166* Doubly-linked list entry which links all existing nodes for167* a single file system. This is provided to ease the removal168* of all nodes during the unmount operation, and to support169* the implementation of VOP_VNTOCNP(). tn_attached is false170* when the node is removed from list and unlocked.171*/172LIST_ENTRY(tmpfs_node) tn_entries; /* (m) */173174/* Node identifier. */175ino_t tn_id; /* (c) */176177/*178* The node's type. Any of 'VBLK', 'VCHR', 'VDIR', 'VFIFO',179* 'VLNK', 'VREG' and 'VSOCK' is allowed. The usage of vnode180* types instead of a custom enumeration is to make things simpler181* and faster, as we do not need to convert between two types.182*/183__enum_uint8(vtype) tn_type; /* (c) */184185/*186* See the top comment. Reordered here to fill LP64 hole.187*/188bool tn_attached; /* (m) */189190/*191* Node's internal status. This is used by several file system192* operations to do modifications to the node in a delayed193* fashion.194*195* tn_accessed has a dedicated byte to allow update by store without196* using atomics. This provides a micro-optimization to e.g.197* tmpfs_read_pgcache().198*/199uint8_t tn_status; /* (vi) */200uint8_t tn_accessed; /* unlocked */201202/*203* The node size. It does not necessarily match the real amount204* of memory consumed by it.205*/206off_t tn_size; /* (v) */207208/* Generic node attributes. */209uid_t tn_uid; /* (v) */210gid_t tn_gid; /* (v) */211mode_t tn_mode; /* (v) */212int tn_links; /* (v) */213u_long tn_flags; /* (v) */214struct timespec tn_atime; /* (vi) */215struct timespec tn_mtime; /* (vi) */216struct timespec tn_ctime; /* (vi) */217struct timespec tn_birthtime; /* (v) */218unsigned long tn_gen; /* (c) */219220/*221* As there is a single vnode for each active file within the222* system, care has to be taken to avoid allocating more than one223* vnode per file. In order to do this, a bidirectional association224* is kept between vnodes and nodes.225*226* Whenever a vnode is allocated, its v_data field is updated to227* point to the node it references. At the same time, the node's228* tn_vnode field is modified to point to the new vnode representing229* it. Further attempts to allocate a vnode for this same node will230* result in returning a new reference to the value stored in231* tn_vnode.232*233* May be NULL when the node is unused (that is, no vnode has been234* allocated for it or it has been reclaimed).235*/236struct vnode * tn_vnode; /* (i) */237238/*239* Interlock to protect tn_vpstate, and tn_status under shared240* vnode lock.241*/242struct mtx tn_interlock;243244/*245* Identify if current node has vnode assiocate with246* or allocating vnode.247*/248int tn_vpstate; /* (i) */249250/* Transient refcounter on this node. */251u_int tn_refcount; /* 0<->1 (m) + (i) */252253/* Extended attributes of this node. */254struct tmpfs_extattr_list tn_extattrs; /* (v) */255256/* misc data field for different tn_type node */257union {258/* Valid when tn_type == VBLK || tn_type == VCHR. */259dev_t tn_rdev; /* (c) */260261/* Valid when tn_type == VDIR. */262struct tn_dir {263/*264* Pointer to the parent directory. The root265* directory has a pointer to itself in this field;266* this property identifies the root node.267*/268struct tmpfs_node * tn_parent;269270/*271* Head of a tree that links the contents of272* the directory together.273*/274struct tmpfs_dir tn_dirhead;275276/*277* Head of a list the contains fake directory entries278* heads, i.e. entries with TMPFS_DIRCOOKIE_DUPHEAD279* flag.280*/281struct tmpfs_dir_duphead tn_dupindex;282283/*284* Number and pointer of the first directory entry285* returned by the readdir operation if it were286* called again to continue reading data from the287* same directory as before. This is used to speed288* up reads of long directories, assuming that no289* more than one read is in progress at a given time.290* Otherwise, these values are discarded.291*/292off_t tn_readdir_lastn;293struct tmpfs_dirent * tn_readdir_lastp;294295/*296* Total size of whiteout directory entries. This297* must be a multiple of sizeof(struct tmpfs_dirent)298* and is used to determine whether a directory is299* empty (excluding whiteout entries) during rename/300* rmdir operations.301*/302off_t tn_wht_size; /* (v) */303} tn_dir;304305/* Valid when tn_type == VLNK. */306/* The link's target, allocated from a string pool. */307struct tn_link {308char * tn_link_target; /* (c) */309char tn_link_smr; /* (c) */310} tn_link;311312/* Valid when tn_type == VREG. */313struct tn_reg {314/*315* The contents of regular files stored in a316* tmpfs file system are represented by a317* single anonymous memory object (aobj, for318* short). The aobj provides direct access to319* any position within the file. It is a task320* of the memory management subsystem to issue321* the required page ins or page outs whenever322* a position within the file is accessed.323*/324vm_object_t tn_aobj; /* (c) */325struct tmpfs_mount *tn_tmp; /* (c) */326vm_pindex_t tn_pages; /* (v) */327} tn_reg;328} tn_spec; /* (v) */329};330LIST_HEAD(tmpfs_node_list, tmpfs_node);331332#define tn_rdev tn_spec.tn_rdev333#define tn_dir tn_spec.tn_dir334#define tn_link_target tn_spec.tn_link.tn_link_target335#define tn_link_smr tn_spec.tn_link.tn_link_smr336#define tn_reg tn_spec.tn_reg337#define tn_fifo tn_spec.tn_fifo338339#define TMPFS_LINK_MAX INT_MAX340341#define TMPFS_NODE_LOCK(node) mtx_lock(&(node)->tn_interlock)342#define TMPFS_NODE_UNLOCK(node) mtx_unlock(&(node)->tn_interlock)343#define TMPFS_NODE_MTX(node) (&(node)->tn_interlock)344#define TMPFS_NODE_ASSERT_LOCKED(node) mtx_assert(TMPFS_NODE_MTX(node), \345MA_OWNED)346347#ifdef INVARIANTS348#define TMPFS_ASSERT_LOCKED(node) do { \349MPASS((node) != NULL); \350MPASS((node)->tn_vnode != NULL); \351ASSERT_VOP_LOCKED((node)->tn_vnode, "tmpfs assert"); \352} while (0)353#else354#define TMPFS_ASSERT_LOCKED(node) (void)0355#endif356357/* tn_vpstate */358#define TMPFS_VNODE_ALLOCATING 1359#define TMPFS_VNODE_WANT 2360#define TMPFS_VNODE_DOOMED 4361#define TMPFS_VNODE_WRECLAIM 8362363/* tn_status */364#define TMPFS_NODE_MODIFIED 0x01365#define TMPFS_NODE_CHANGED 0x02366367/*368* Internal representation of a tmpfs mount point.369*/370struct tmpfs_mount {371/*372* Original value of the "size" parameter, for reference purposes,373* mostly.374*/375off_t tm_size_max;376/*377* Maximum number of memory pages available for use by the file378* system, set during mount time. This variable must never be379* used directly as it may be bigger than the current amount of380* free memory; in the extreme case, it will hold the ULONG_MAX381* value.382*/383u_long tm_pages_max;384385/* Number of pages in use by the file system. */386u_long tm_pages_used;387388/*389* Pointer to the node representing the root directory of this390* file system.391*/392struct tmpfs_node * tm_root;393394/*395* Maximum number of possible nodes for this file system; set396* during mount time. We need a hard limit on the maximum number397* of nodes to avoid allocating too much of them; their objects398* cannot be released until the file system is unmounted.399* Otherwise, we could easily run out of memory by creating lots400* of empty files and then simply removing them.401*/402ino_t tm_nodes_max;403404/* unrhdr used to allocate inode numbers */405struct unrhdr64 tm_ino_unr;406407/* Number of nodes currently that are in use. */408ino_t tm_nodes_inuse;409410/* Memory used by extended attributes */411uint64_t tm_ea_memory_inuse;412413/* Maximum memory available for extended attributes */414uint64_t tm_ea_memory_max;415416/* Refcounter on this struct tmpfs_mount. */417uint64_t tm_refcount;418419/* maximum representable file size */420u_int64_t tm_maxfilesize;421422/*423* The used list contains all nodes that are currently used by424* the file system; i.e., they refer to existing files.425*/426struct tmpfs_node_list tm_nodes_used;427428/* All node lock to protect the node list and tmp_pages_used. */429struct mtx tm_allnode_lock;430431/* Read-only status. */432bool tm_ronly;433/* Do not use namecache. */434bool tm_nonc;435/* Do not update mtime on writes through mmaped areas. */436bool tm_nomtime;437438/* Read from page cache directly. */439bool tm_pgread;440};441#define TMPFS_LOCK(tm) mtx_lock(&(tm)->tm_allnode_lock)442#define TMPFS_UNLOCK(tm) mtx_unlock(&(tm)->tm_allnode_lock)443#define TMPFS_MP_ASSERT_LOCKED(tm) mtx_assert(&(tm)->tm_allnode_lock, MA_OWNED)444445/*446* This structure maps a file identifier to a tmpfs node. Used by the447* NFS code.448*/449struct tmpfs_fid_data {450unsigned short tfd_len;451ino_t tfd_id;452unsigned long tfd_gen;453} __packed;454455struct tmpfs_dir_cursor {456struct tmpfs_dirent *tdc_current;457struct tmpfs_dirent *tdc_tree;458};459460#ifdef _KERNEL461/*462* Prototypes for tmpfs_subr.c.463*/464465void tmpfs_ref_node(struct tmpfs_node *node);466int tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *, __enum_uint8(vtype),467uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *,468const char *, dev_t, struct tmpfs_node **);469int tmpfs_fo_close(struct file *fp, struct thread *td);470void tmpfs_free_node(struct tmpfs_mount *, struct tmpfs_node *);471bool tmpfs_free_node_locked(struct tmpfs_mount *, struct tmpfs_node *, bool);472void tmpfs_free_tmp(struct tmpfs_mount *);473int tmpfs_alloc_dirent(struct tmpfs_mount *, struct tmpfs_node *,474const char *, u_int, struct tmpfs_dirent **);475void tmpfs_free_dirent(struct tmpfs_mount *, struct tmpfs_dirent *);476void tmpfs_dirent_init(struct tmpfs_dirent *, const char *, u_int);477void tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj);478int tmpfs_alloc_vp(struct mount *, struct tmpfs_node *, int,479struct vnode **);480void tmpfs_free_vp(struct vnode *);481int tmpfs_alloc_file(struct vnode *, struct vnode **, struct vattr *,482struct componentname *, const char *);483void tmpfs_check_mtime(struct vnode *);484void tmpfs_dir_attach(struct vnode *, struct tmpfs_dirent *);485void tmpfs_dir_detach(struct vnode *, struct tmpfs_dirent *);486void tmpfs_dir_destroy(struct tmpfs_mount *, struct tmpfs_node *);487struct tmpfs_dirent * tmpfs_dir_lookup(struct tmpfs_node *node,488struct tmpfs_node *f,489struct componentname *cnp);490int tmpfs_dir_getdents(struct tmpfs_mount *, struct tmpfs_node *,491struct uio *, int, uint64_t *, int *);492int tmpfs_dir_whiteout_add(struct vnode *, struct componentname *);493void tmpfs_dir_whiteout_remove(struct vnode *, struct componentname *);494void tmpfs_dir_clear_whiteouts(struct vnode *);495int tmpfs_reg_resize(struct vnode *, off_t, boolean_t);496int tmpfs_reg_punch_hole(struct vnode *vp, off_t *, off_t *);497int tmpfs_chflags(struct vnode *, u_long, struct ucred *, struct thread *);498int tmpfs_chmod(struct vnode *, mode_t, struct ucred *, struct thread *);499int tmpfs_chown(struct vnode *, uid_t, gid_t, struct ucred *,500struct thread *);501int tmpfs_chsize(struct vnode *, u_quad_t, struct ucred *, struct thread *);502int tmpfs_chtimes(struct vnode *, struct vattr *, struct ucred *cred,503struct thread *);504void tmpfs_itimes(struct vnode *, const struct timespec *,505const struct timespec *);506507void tmpfs_set_accessed(struct tmpfs_mount *tm, struct tmpfs_node *node);508void tmpfs_set_status(struct tmpfs_mount *tm, struct tmpfs_node *node,509int status);510int tmpfs_truncate(struct vnode *, off_t);511struct tmpfs_dirent *tmpfs_dir_first(struct tmpfs_node *dnode,512struct tmpfs_dir_cursor *dc);513struct tmpfs_dirent *tmpfs_dir_next(struct tmpfs_node *dnode,514struct tmpfs_dir_cursor *dc);515bool tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages);516void tmpfs_extattr_free(struct tmpfs_extattr* ea);517static __inline void518tmpfs_update(struct vnode *vp)519{520521tmpfs_itimes(vp, NULL, NULL);522}523524/*525* Convenience macros to simplify some logical expressions.526*/527#define IMPLIES(a, b) (!(a) || (b))528529/*530* Checks that the directory entry pointed by 'de' matches the name 'name'531* with a length of 'len'.532*/533#define TMPFS_DIRENT_MATCHES(de, name, len) \534(de->td_namelen == len && \535bcmp((de)->ud.td_name, (name), (de)->td_namelen) == 0)536537/*538* Ensures that the node pointed by 'node' is a directory and that its539* contents are consistent with respect to directories.540*/541#define TMPFS_VALIDATE_DIR(node) do { \542MPASS((node)->tn_type == VDIR); \543MPASS((node)->tn_size % sizeof(struct tmpfs_dirent) == 0); \544MPASS((node)->tn_dir.tn_wht_size % sizeof(struct tmpfs_dirent) == 0); \545MPASS((node)->tn_dir.tn_wht_size <= (node)->tn_size); \546} while (0)547548/*549* Amount of memory pages to reserve for the system (e.g., to not use by550* tmpfs).551*/552#if !defined(TMPFS_PAGES_MINRESERVED)553#define TMPFS_PAGES_MINRESERVED (4 * 1024 * 1024 / PAGE_SIZE)554#endif555556/*557* Percent of available memory + swap available to use by tmpfs file systems558* without a size limit.559*/560#if !defined(TMPFS_MEM_PERCENT)561#define TMPFS_MEM_PERCENT 100562#endif563564/*565* Amount of memory to reserve for extended attributes.566*/567#if !defined(TMPFS_EA_MEMORY_RESERVED)568#define TMPFS_EA_MEMORY_RESERVED (16 * 1024 * 1024)569#endif570571size_t tmpfs_mem_avail(void);572size_t tmpfs_pages_used(struct tmpfs_mount *tmp);573int tmpfs_subr_init(void);574void tmpfs_subr_uninit(void);575576extern int tmpfs_pager_type;577578/*579* Macros/functions to convert from generic data structures to tmpfs580* specific ones.581*/582583static inline struct vnode *584VM_TO_TMPFS_VP(vm_object_t obj)585{586struct tmpfs_node *node;587588if ((obj->flags & OBJ_TMPFS) == 0)589return (NULL);590591/*592* swp_priv is the back-pointer to the tmpfs node, if any,593* which uses the vm object as backing store. The object594* handle is not used to avoid locking sw_alloc_sx on tmpfs595* node instantiation/destroy.596*/597node = obj->un_pager.swp.swp_priv;598return (node->tn_vnode);599}600601static inline struct tmpfs_mount *602VM_TO_TMPFS_MP(vm_object_t obj)603{604struct tmpfs_node *node;605606if ((obj->flags & OBJ_TMPFS) == 0)607return (NULL);608609node = obj->un_pager.swp.swp_priv;610MPASS(node->tn_type == VREG);611return (node->tn_reg.tn_tmp);612}613614static inline struct tmpfs_mount *615VFS_TO_TMPFS(struct mount *mp)616{617struct tmpfs_mount *tmp;618619MPASS(mp != NULL && mp->mnt_data != NULL);620tmp = (struct tmpfs_mount *)mp->mnt_data;621return (tmp);622}623624static inline struct tmpfs_node *625VP_TO_TMPFS_NODE(struct vnode *vp)626{627struct tmpfs_node *node;628629MPASS(vp != NULL && vp->v_data != NULL);630node = (struct tmpfs_node *)vp->v_data;631return (node);632}633634#define VP_TO_TMPFS_NODE_SMR(vp) \635((struct tmpfs_node *)vn_load_v_data_smr(vp))636637static inline struct tmpfs_node *638VP_TO_TMPFS_DIR(struct vnode *vp)639{640struct tmpfs_node *node;641642node = VP_TO_TMPFS_NODE(vp);643TMPFS_VALIDATE_DIR(node);644return (node);645}646647static inline bool648tmpfs_use_nc(struct vnode *vp)649{650651return (!(VFS_TO_TMPFS(vp->v_mount)->tm_nonc));652}653654static inline void655tmpfs_update_getattr(struct vnode *vp)656{657struct tmpfs_node *node;658659node = VP_TO_TMPFS_NODE(vp);660if (__predict_false((node->tn_status & (TMPFS_NODE_MODIFIED |661TMPFS_NODE_CHANGED)) != 0 || node->tn_accessed))662tmpfs_update(vp);663}664665extern struct fileops tmpfs_fnops;666667#endif /* _KERNEL */668669#endif /* _FS_TMPFS_TMPFS_H_ */670671672