Path: blob/main/sys/contrib/openzfs/lib/libzpool/zfs_file_os.c
178696 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/21/*22* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.23* Copyright (c) 2012, 2018 by Delphix. All rights reserved.24* Copyright (c) 2016 Actifio, Inc. All rights reserved.25* Copyright (c) 2025, Klara, Inc.26*/2728#include <sys/zfs_context.h>29#include <sys/zfs_file.h>30#include <libzpool.h>31#include <libzutil.h>3233/* If set, all blocks read will be copied to the specified directory. */34char *vn_dumpdir = NULL;3536/*37* Open file38*39* path - fully qualified path to file40* flags - file attributes O_READ / O_WRITE / O_EXCL41* fpp - pointer to return file pointer42*43* Returns 0 on success underlying error on failure.44*/45int46zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)47{48int fd;49int dump_fd;50int err;51int old_umask = 0;52zfs_file_t *fp;53struct stat64 st;5455if (!(flags & O_CREAT) && stat64(path, &st) == -1)56return (errno);5758if (!(flags & O_CREAT) && S_ISBLK(st.st_mode))59flags |= O_DIRECT;6061if (flags & O_CREAT)62old_umask = umask(0);6364fd = open64(path, flags, mode);65if (fd == -1)66return (errno);6768if (flags & O_CREAT)69(void) umask(old_umask);7071if (vn_dumpdir != NULL) {72char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);73const char *inpath = zfs_basename(path);7475(void) snprintf(dumppath, MAXPATHLEN,76"%s/%s", vn_dumpdir, inpath);77dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);78umem_free(dumppath, MAXPATHLEN);79if (dump_fd == -1) {80err = errno;81close(fd);82return (err);83}84} else {85dump_fd = -1;86}8788(void) fcntl(fd, F_SETFD, FD_CLOEXEC);8990fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL);91fp->f_fd = fd;92fp->f_dump_fd = dump_fd;93*fpp = fp;9495return (0);96}9798void99zfs_file_close(zfs_file_t *fp)100{101close(fp->f_fd);102if (fp->f_dump_fd != -1)103close(fp->f_dump_fd);104105umem_free(fp, sizeof (zfs_file_t));106}107108/*109* Stateful write - use os internal file pointer to determine where to110* write and update on successful completion.111*112* fp - pointer to file (pipe, socket, etc) to write to113* buf - buffer to write114* count - # of bytes to write115* resid - pointer to count of unwritten bytes (if short write)116*117* Returns 0 on success errno on failure.118*/119int120zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)121{122ssize_t rc;123124rc = write(fp->f_fd, buf, count);125if (rc < 0)126return (errno);127128if (resid) {129*resid = count - rc;130} else if (rc != count) {131return (EIO);132}133134return (0);135}136137/*138* Stateless write - os internal file pointer is not updated.139*140* fp - pointer to file (pipe, socket, etc) to write to141* buf - buffer to write142* count - # of bytes to write143* off - file offset to write to (only valid for seekable types)144* resid - pointer to count of unwritten bytes145*146* Returns 0 on success errno on failure.147*/148int149zfs_file_pwrite(zfs_file_t *fp, const void *buf,150size_t count, loff_t pos, uint8_t ashift, ssize_t *resid)151{152ssize_t rc, split, done;153int sectors;154155/*156* To simulate partial disk writes, we split writes into two157* system calls so that the process can be killed in between.158* This is used by ztest to simulate realistic failure modes.159*/160sectors = count >> ashift;161split = (sectors > 0 ? rand() % sectors : 0) << ashift;162rc = pwrite64(fp->f_fd, buf, split, pos);163if (rc != -1) {164done = rc;165rc = pwrite64(fp->f_fd, (char *)buf + split,166count - split, pos + split);167}168#ifdef __linux__169if (rc == -1 && errno == EINVAL) {170/*171* Under Linux, this most likely means an alignment issue172* (memory or disk) due to O_DIRECT, so we abort() in order173* to catch the offender.174*/175abort();176}177#endif178179if (rc < 0)180return (errno);181182done += rc;183184if (resid) {185*resid = count - done;186} else if (done != count) {187return (EIO);188}189190return (0);191}192193/*194* Stateful read - use os internal file pointer to determine where to195* read and update on successful completion.196*197* fp - pointer to file (pipe, socket, etc) to read from198* buf - buffer to write199* count - # of bytes to read200* resid - pointer to count of unread bytes (if short read)201*202* Returns 0 on success errno on failure.203*/204int205zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)206{207int rc;208209rc = read(fp->f_fd, buf, count);210if (rc < 0)211return (errno);212213if (resid) {214*resid = count - rc;215} else if (rc != count) {216return (EIO);217}218219return (0);220}221222/*223* Stateless read - os internal file pointer is not updated.224*225* fp - pointer to file (pipe, socket, etc) to read from226* buf - buffer to write227* count - # of bytes to write228* off - file offset to read from (only valid for seekable types)229* resid - pointer to count of unwritten bytes (if short write)230*231* Returns 0 on success errno on failure.232*/233int234zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,235ssize_t *resid)236{237ssize_t rc;238239rc = pread64(fp->f_fd, buf, count, off);240if (rc < 0) {241#ifdef __linux__242/*243* Under Linux, this most likely means an alignment issue244* (memory or disk) due to O_DIRECT, so we abort() in order to245* catch the offender.246*/247if (errno == EINVAL)248abort();249#endif250return (errno);251}252253if (fp->f_dump_fd != -1) {254int status;255256status = pwrite64(fp->f_dump_fd, buf, rc, off);257ASSERT(status != -1);258}259260if (resid) {261*resid = count - rc;262} else if (rc != count) {263return (EIO);264}265266return (0);267}268269/*270* lseek - set / get file pointer271*272* fp - pointer to file (pipe, socket, etc) to read from273* offp - value to seek to, returns current value plus passed offset274* whence - see man pages for standard lseek whence values275*276* Returns 0 on success errno on failure (ESPIPE for non seekable types)277*/278int279zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)280{281loff_t rc;282283rc = lseek(fp->f_fd, *offp, whence);284if (rc < 0)285return (errno);286287*offp = rc;288289return (0);290}291292/*293* Get file attributes294*295* filp - file pointer296* zfattr - pointer to file attr structure297*298* Currently only used for fetching size and file mode299*300* Returns 0 on success or error code of underlying getattr call on failure.301*/302int303zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)304{305struct stat64 st;306307if (fstat64_blk(fp->f_fd, &st) == -1)308return (errno);309310zfattr->zfa_size = st.st_size;311zfattr->zfa_mode = st.st_mode;312313return (0);314}315316/*317* Sync file to disk318*319* filp - file pointer320* flags - O_SYNC and or O_DSYNC321*322* Returns 0 on success or error code of underlying sync call on failure.323*/324int325zfs_file_fsync(zfs_file_t *fp, int flags)326{327(void) flags;328329if (fsync(fp->f_fd) < 0)330return (errno);331332return (0);333}334335/*336* deallocate - zero and/or deallocate file storage337*338* fp - file pointer339* offset - offset to start zeroing or deallocating340* len - length to zero or deallocate341*/342int343zfs_file_deallocate(zfs_file_t *fp, loff_t offset, loff_t len)344{345int rc;346#if defined(__linux__)347rc = fallocate(fp->f_fd,348FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, len);349#elif defined(__FreeBSD__) && (__FreeBSD_version >= 1400029)350struct spacectl_range rqsr = {351.r_offset = offset,352.r_len = len,353};354rc = fspacectl(fp->f_fd, SPACECTL_DEALLOC, &rqsr, 0, &rqsr);355#else356(void) fp, (void) offset, (void) len;357rc = EOPNOTSUPP;358#endif359if (rc)360return (SET_ERROR(rc));361return (0);362}363364/*365* Request current file pointer offset366*367* fp - pointer to file368*369* Returns current file offset.370*/371loff_t372zfs_file_off(zfs_file_t *fp)373{374return (lseek(fp->f_fd, SEEK_CUR, 0));375}376377/*378* unlink file379*380* path - fully qualified file path381*382* Returns 0 on success.383*384* OPTIONAL385*/386int387zfs_file_unlink(const char *path)388{389return (remove(path));390}391392/*393* Get reference to file pointer394*395* fd - input file descriptor396*397* Returns pointer to file struct or NULL.398* Unsupported in user space.399*/400zfs_file_t *401zfs_file_get(int fd)402{403(void) fd;404abort();405return (NULL);406}407/*408* Drop reference to file pointer409*410* fp - pointer to file struct411*412* Unsupported in user space.413*/414void415zfs_file_put(zfs_file_t *fp)416{417abort();418(void) fp;419}420421422