Path: blob/main/sys/contrib/openzfs/lib/libzpool/kernel.c
48375 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/21/*22* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.23* Copyright (c) 2012, 2018 by Delphix. All rights reserved.24* Copyright (c) 2016 Actifio, Inc. All rights reserved.25* Copyright (c) 2025, Klara, Inc.26*/2728#include <assert.h>29#include <fcntl.h>30#include <libgen.h>31#include <poll.h>32#include <stdio.h>33#include <stdlib.h>34#include <string.h>35#include <limits.h>36#include <libzutil.h>37#include <sys/crypto/icp.h>38#include <sys/processor.h>39#include <sys/rrwlock.h>40#include <sys/spa.h>41#include <sys/spa_impl.h>42#include <sys/stat.h>43#include <sys/systeminfo.h>44#include <sys/time.h>45#include <sys/utsname.h>46#include <sys/zfs_context.h>47#include <sys/zfs_onexit.h>48#include <sys/zfs_vfsops.h>49#include <sys/zstd/zstd.h>50#include <sys/zvol.h>51#include <zfs_fletcher.h>52#include <zlib.h>5354/*55* Emulation of kernel services in userland.56*/5758uint64_t physmem;59uint32_t hostid;60struct utsname hw_utsname;6162/* If set, all blocks read will be copied to the specified directory. */63char *vn_dumpdir = NULL;6465/* this only exists to have its address taken */66struct proc p0;6768/*69* =========================================================================70* threads71* =========================================================================72*73* TS_STACK_MIN is dictated by the minimum allowed pthread stack size. While74* TS_STACK_MAX is somewhat arbitrary, it was selected to be large enough for75* the expected stack depth while small enough to avoid exhausting address76* space with high thread counts.77*/78#define TS_STACK_MIN MAX(PTHREAD_STACK_MIN, 32768)79#define TS_STACK_MAX (256 * 1024)8081struct zk_thread_wrapper {82void (*func)(void *);83void *arg;84};8586static void *87zk_thread_wrapper(void *arg)88{89struct zk_thread_wrapper ztw;90memcpy(&ztw, arg, sizeof (ztw));91free(arg);92ztw.func(ztw.arg);93return (NULL);94}9596kthread_t *97zk_thread_create(const char *name, void (*func)(void *), void *arg,98size_t stksize, int state)99{100pthread_attr_t attr;101pthread_t tid;102char *stkstr;103struct zk_thread_wrapper *ztw;104int detachstate = PTHREAD_CREATE_DETACHED;105106VERIFY0(pthread_attr_init(&attr));107108if (state & TS_JOINABLE)109detachstate = PTHREAD_CREATE_JOINABLE;110111VERIFY0(pthread_attr_setdetachstate(&attr, detachstate));112113/*114* We allow the default stack size in user space to be specified by115* setting the ZFS_STACK_SIZE environment variable. This allows us116* the convenience of observing and debugging stack overruns in117* user space. Explicitly specified stack sizes will be honored.118* The usage of ZFS_STACK_SIZE is discussed further in the119* ENVIRONMENT VARIABLES sections of the ztest(1) man page.120*/121if (stksize == 0) {122stkstr = getenv("ZFS_STACK_SIZE");123124if (stkstr == NULL)125stksize = TS_STACK_MAX;126else127stksize = MAX(atoi(stkstr), TS_STACK_MIN);128}129130VERIFY3S(stksize, >, 0);131stksize = P2ROUNDUP(MAX(stksize, TS_STACK_MIN), PAGESIZE);132133/*134* If this ever fails, it may be because the stack size is not a135* multiple of system page size.136*/137VERIFY0(pthread_attr_setstacksize(&attr, stksize));138VERIFY0(pthread_attr_setguardsize(&attr, PAGESIZE));139140VERIFY(ztw = malloc(sizeof (*ztw)));141ztw->func = func;142ztw->arg = arg;143VERIFY0(pthread_create(&tid, &attr, zk_thread_wrapper, ztw));144VERIFY0(pthread_attr_destroy(&attr));145146pthread_setname_np(tid, name);147148return ((void *)(uintptr_t)tid);149}150151/*152* =========================================================================153* kstats154* =========================================================================155*/156kstat_t *157kstat_create(const char *module, int instance, const char *name,158const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag)159{160(void) module, (void) instance, (void) name, (void) class, (void) type,161(void) ndata, (void) ks_flag;162return (NULL);163}164165void166kstat_install(kstat_t *ksp)167{168(void) ksp;169}170171void172kstat_delete(kstat_t *ksp)173{174(void) ksp;175}176177void178kstat_set_raw_ops(kstat_t *ksp,179int (*headers)(char *buf, size_t size),180int (*data)(char *buf, size_t size, void *data),181void *(*addr)(kstat_t *ksp, loff_t index))182{183(void) ksp, (void) headers, (void) data, (void) addr;184}185186/*187* =========================================================================188* mutexes189* =========================================================================190*/191192void193mutex_init(kmutex_t *mp, char *name, int type, void *cookie)194{195(void) name, (void) type, (void) cookie;196VERIFY0(pthread_mutex_init(&mp->m_lock, NULL));197memset(&mp->m_owner, 0, sizeof (pthread_t));198}199200void201mutex_destroy(kmutex_t *mp)202{203VERIFY0(pthread_mutex_destroy(&mp->m_lock));204}205206void207mutex_enter(kmutex_t *mp)208{209VERIFY0(pthread_mutex_lock(&mp->m_lock));210mp->m_owner = pthread_self();211}212213int214mutex_enter_check_return(kmutex_t *mp)215{216int error = pthread_mutex_lock(&mp->m_lock);217if (error == 0)218mp->m_owner = pthread_self();219return (error);220}221222int223mutex_tryenter(kmutex_t *mp)224{225int error = pthread_mutex_trylock(&mp->m_lock);226if (error == 0) {227mp->m_owner = pthread_self();228return (1);229} else {230VERIFY3S(error, ==, EBUSY);231return (0);232}233}234235void236mutex_exit(kmutex_t *mp)237{238memset(&mp->m_owner, 0, sizeof (pthread_t));239VERIFY0(pthread_mutex_unlock(&mp->m_lock));240}241242/*243* =========================================================================244* rwlocks245* =========================================================================246*/247248void249rw_init(krwlock_t *rwlp, char *name, int type, void *arg)250{251(void) name, (void) type, (void) arg;252VERIFY0(pthread_rwlock_init(&rwlp->rw_lock, NULL));253rwlp->rw_readers = 0;254rwlp->rw_owner = 0;255}256257void258rw_destroy(krwlock_t *rwlp)259{260VERIFY0(pthread_rwlock_destroy(&rwlp->rw_lock));261}262263void264rw_enter(krwlock_t *rwlp, krw_t rw)265{266if (rw == RW_READER) {267VERIFY0(pthread_rwlock_rdlock(&rwlp->rw_lock));268atomic_inc_uint(&rwlp->rw_readers);269} else {270VERIFY0(pthread_rwlock_wrlock(&rwlp->rw_lock));271rwlp->rw_owner = pthread_self();272}273}274275void276rw_exit(krwlock_t *rwlp)277{278if (RW_READ_HELD(rwlp))279atomic_dec_uint(&rwlp->rw_readers);280else281rwlp->rw_owner = 0;282283VERIFY0(pthread_rwlock_unlock(&rwlp->rw_lock));284}285286int287rw_tryenter(krwlock_t *rwlp, krw_t rw)288{289int error;290291if (rw == RW_READER)292error = pthread_rwlock_tryrdlock(&rwlp->rw_lock);293else294error = pthread_rwlock_trywrlock(&rwlp->rw_lock);295296if (error == 0) {297if (rw == RW_READER)298atomic_inc_uint(&rwlp->rw_readers);299else300rwlp->rw_owner = pthread_self();301302return (1);303}304305VERIFY3S(error, ==, EBUSY);306307return (0);308}309310uint32_t311zone_get_hostid(void *zonep)312{313/*314* We're emulating the system's hostid in userland.315*/316(void) zonep;317return (hostid);318}319320int321rw_tryupgrade(krwlock_t *rwlp)322{323(void) rwlp;324return (0);325}326327/*328* =========================================================================329* condition variables330* =========================================================================331*/332333void334cv_init(kcondvar_t *cv, char *name, int type, void *arg)335{336(void) name, (void) type, (void) arg;337VERIFY0(pthread_cond_init(cv, NULL));338}339340void341cv_destroy(kcondvar_t *cv)342{343VERIFY0(pthread_cond_destroy(cv));344}345346void347cv_wait(kcondvar_t *cv, kmutex_t *mp)348{349memset(&mp->m_owner, 0, sizeof (pthread_t));350VERIFY0(pthread_cond_wait(cv, &mp->m_lock));351mp->m_owner = pthread_self();352}353354int355cv_wait_sig(kcondvar_t *cv, kmutex_t *mp)356{357cv_wait(cv, mp);358return (1);359}360361int362cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)363{364int error;365struct timeval tv;366struct timespec ts;367clock_t delta;368369delta = abstime - ddi_get_lbolt();370if (delta <= 0)371return (-1);372373VERIFY0(gettimeofday(&tv, NULL));374375ts.tv_sec = tv.tv_sec + delta / hz;376ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % hz) * (NANOSEC / hz);377if (ts.tv_nsec >= NANOSEC) {378ts.tv_sec++;379ts.tv_nsec -= NANOSEC;380}381382memset(&mp->m_owner, 0, sizeof (pthread_t));383error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);384mp->m_owner = pthread_self();385386if (error == ETIMEDOUT)387return (-1);388389VERIFY0(error);390391return (1);392}393394int395cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,396int flag)397{398(void) res;399int error;400struct timeval tv;401struct timespec ts;402hrtime_t delta;403404ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);405406delta = tim;407if (flag & CALLOUT_FLAG_ABSOLUTE)408delta -= gethrtime();409410if (delta <= 0)411return (-1);412413VERIFY0(gettimeofday(&tv, NULL));414415ts.tv_sec = tv.tv_sec + delta / NANOSEC;416ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % NANOSEC);417if (ts.tv_nsec >= NANOSEC) {418ts.tv_sec++;419ts.tv_nsec -= NANOSEC;420}421422memset(&mp->m_owner, 0, sizeof (pthread_t));423error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);424mp->m_owner = pthread_self();425426if (error == ETIMEDOUT)427return (-1);428429VERIFY0(error);430431return (1);432}433434void435cv_signal(kcondvar_t *cv)436{437VERIFY0(pthread_cond_signal(cv));438}439440void441cv_broadcast(kcondvar_t *cv)442{443VERIFY0(pthread_cond_broadcast(cv));444}445446/*447* =========================================================================448* procfs list449* =========================================================================450*/451452void453seq_printf(struct seq_file *m, const char *fmt, ...)454{455(void) m, (void) fmt;456}457458void459procfs_list_install(const char *module,460const char *submodule,461const char *name,462mode_t mode,463procfs_list_t *procfs_list,464int (*show)(struct seq_file *f, void *p),465int (*show_header)(struct seq_file *f),466int (*clear)(procfs_list_t *procfs_list),467size_t procfs_list_node_off)468{469(void) module, (void) submodule, (void) name, (void) mode, (void) show,470(void) show_header, (void) clear;471mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);472list_create(&procfs_list->pl_list,473procfs_list_node_off + sizeof (procfs_list_node_t),474procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));475procfs_list->pl_next_id = 1;476procfs_list->pl_node_offset = procfs_list_node_off;477}478479void480procfs_list_uninstall(procfs_list_t *procfs_list)481{482(void) procfs_list;483}484485void486procfs_list_destroy(procfs_list_t *procfs_list)487{488ASSERT(list_is_empty(&procfs_list->pl_list));489list_destroy(&procfs_list->pl_list);490mutex_destroy(&procfs_list->pl_lock);491}492493#define NODE_ID(procfs_list, obj) \494(((procfs_list_node_t *)(((char *)obj) + \495(procfs_list)->pl_node_offset))->pln_id)496497void498procfs_list_add(procfs_list_t *procfs_list, void *p)499{500ASSERT(MUTEX_HELD(&procfs_list->pl_lock));501NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;502list_insert_tail(&procfs_list->pl_list, p);503}504505/*506* =========================================================================507* vnode operations508* =========================================================================509*/510511/*512* =========================================================================513* Figure out which debugging statements to print514* =========================================================================515*/516517static char *dprintf_string;518static int dprintf_print_all;519520int521dprintf_find_string(const char *string)522{523char *tmp_str = dprintf_string;524int len = strlen(string);525526/*527* Find out if this is a string we want to print.528* String format: file1.c,function_name1,file2.c,file3.c529*/530531while (tmp_str != NULL) {532if (strncmp(tmp_str, string, len) == 0 &&533(tmp_str[len] == ',' || tmp_str[len] == '\0'))534return (1);535tmp_str = strchr(tmp_str, ',');536if (tmp_str != NULL)537tmp_str++; /* Get rid of , */538}539return (0);540}541542void543dprintf_setup(int *argc, char **argv)544{545int i, j;546547/*548* Debugging can be specified two ways: by setting the549* environment variable ZFS_DEBUG, or by including a550* "debug=..." argument on the command line. The command551* line setting overrides the environment variable.552*/553554for (i = 1; i < *argc; i++) {555int len = strlen("debug=");556/* First look for a command line argument */557if (strncmp("debug=", argv[i], len) == 0) {558dprintf_string = argv[i] + len;559/* Remove from args */560for (j = i; j < *argc; j++)561argv[j] = argv[j+1];562argv[j] = NULL;563(*argc)--;564}565}566567if (dprintf_string == NULL) {568/* Look for ZFS_DEBUG environment variable */569dprintf_string = getenv("ZFS_DEBUG");570}571572/*573* Are we just turning on all debugging?574*/575if (dprintf_find_string("on"))576dprintf_print_all = 1;577578if (dprintf_string != NULL)579zfs_flags |= ZFS_DEBUG_DPRINTF;580}581582/*583* =========================================================================584* debug printfs585* =========================================================================586*/587void588__dprintf(boolean_t dprint, const char *file, const char *func,589int line, const char *fmt, ...)590{591/* Get rid of annoying "../common/" prefix to filename. */592const char *newfile = zfs_basename(file);593594va_list adx;595if (dprint) {596/* dprintf messages are printed immediately */597598if (!dprintf_print_all &&599!dprintf_find_string(newfile) &&600!dprintf_find_string(func))601return;602603/* Print out just the function name if requested */604flockfile(stdout);605if (dprintf_find_string("pid"))606(void) printf("%d ", getpid());607if (dprintf_find_string("tid"))608(void) printf("%ju ",609(uintmax_t)(uintptr_t)pthread_self());610if (dprintf_find_string("cpu"))611(void) printf("%u ", getcpuid());612if (dprintf_find_string("time"))613(void) printf("%llu ", gethrtime());614if (dprintf_find_string("long"))615(void) printf("%s, line %d: ", newfile, line);616(void) printf("dprintf: %s: ", func);617va_start(adx, fmt);618(void) vprintf(fmt, adx);619va_end(adx);620funlockfile(stdout);621} else {622/* zfs_dbgmsg is logged for dumping later */623size_t size;624char *buf;625int i;626627size = 1024;628buf = umem_alloc(size, UMEM_NOFAIL);629i = snprintf(buf, size, "%s:%d:%s(): ", newfile, line, func);630631if (i < size) {632va_start(adx, fmt);633(void) vsnprintf(buf + i, size - i, fmt, adx);634va_end(adx);635}636637__zfs_dbgmsg(buf);638639umem_free(buf, size);640}641}642643/*644* =========================================================================645* cmn_err() and panic()646* =========================================================================647*/648649static __attribute__((noreturn)) void650panic_stop_or_abort(void)651{652const char *stopenv = getenv("LIBZPOOL_PANIC_STOP");653if (stopenv != NULL && atoi(stopenv)) {654fputs("libzpool: LIBZPOOL_PANIC_STOP is set, sending "655"SIGSTOP to process group\n", stderr);656fflush(stderr);657658kill(0, SIGSTOP);659660fputs("libzpool: continued after panic stop, "661"aborting\n", stderr);662}663664abort(); /* think of it as a "user-level crash dump" */665}666667static void668vcmn_msg(int ce, const char *fmt, va_list adx)669{670switch (ce) {671case CE_IGNORE:672return;673case CE_CONT:674break;675case CE_NOTE:676fputs("libzpool: NOTICE: ", stderr);677break;678case CE_WARN:679fputs("libzpool: WARNING: ", stderr);680break;681case CE_PANIC:682fputs("libzpool: PANIC: ", stderr);683break;684default:685fputs("libzpool: [unknown severity %d]: ", stderr);686break;687}688689vfprintf(stderr, fmt, adx);690if (ce != CE_CONT)691fputc('\n', stderr);692fflush(stderr);693}694695void696vcmn_err(int ce, const char *fmt, va_list adx)697{698vcmn_msg(ce, fmt, adx);699700if (ce == CE_PANIC)701panic_stop_or_abort();702}703704void705cmn_err(int ce, const char *fmt, ...)706{707va_list adx;708709va_start(adx, fmt);710vcmn_err(ce, fmt, adx);711va_end(adx);712}713714__attribute__((noreturn)) void715panic(const char *fmt, ...)716{717va_list adx;718719va_start(adx, fmt);720vcmn_msg(CE_PANIC, fmt, adx);721va_end(adx);722723panic_stop_or_abort();724}725726__attribute__((noreturn)) void727vpanic(const char *fmt, va_list adx)728{729vcmn_msg(CE_PANIC, fmt, adx);730panic_stop_or_abort();731}732733/*734* =========================================================================735* misc routines736* =========================================================================737*/738739void740delay(clock_t ticks)741{742(void) poll(0, 0, ticks * (1000 / hz));743}744745/*746* Find highest one bit set.747* Returns bit number + 1 of highest bit that is set, otherwise returns 0.748* The __builtin_clzll() function is supported by both GCC and Clang.749*/750int751highbit64(uint64_t i)752{753if (i == 0)754return (0);755756return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));757}758759/*760* Find lowest one bit set.761* Returns bit number + 1 of lowest bit that is set, otherwise returns 0.762* The __builtin_ffsll() function is supported by both GCC and Clang.763*/764int765lowbit64(uint64_t i)766{767if (i == 0)768return (0);769770return (__builtin_ffsll(i));771}772773const char *random_path = "/dev/random";774const char *urandom_path = "/dev/urandom";775static int random_fd = -1, urandom_fd = -1;776777void778random_init(void)779{780VERIFY((random_fd = open(random_path, O_RDONLY | O_CLOEXEC)) != -1);781VERIFY((urandom_fd = open(urandom_path, O_RDONLY | O_CLOEXEC)) != -1);782}783784void785random_fini(void)786{787close(random_fd);788close(urandom_fd);789790random_fd = -1;791urandom_fd = -1;792}793794static int795random_get_bytes_common(uint8_t *ptr, size_t len, int fd)796{797size_t resid = len;798ssize_t bytes;799800ASSERT(fd != -1);801802while (resid != 0) {803bytes = read(fd, ptr, resid);804ASSERT3S(bytes, >=, 0);805ptr += bytes;806resid -= bytes;807}808809return (0);810}811812int813random_get_bytes(uint8_t *ptr, size_t len)814{815return (random_get_bytes_common(ptr, len, random_fd));816}817818int819random_get_pseudo_bytes(uint8_t *ptr, size_t len)820{821return (random_get_bytes_common(ptr, len, urandom_fd));822}823824int825ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)826{827errno = 0;828*result = strtoull(str, nptr, base);829if (*result == 0)830return (errno);831return (0);832}833834utsname_t *835utsname(void)836{837return (&hw_utsname);838}839840/*841* =========================================================================842* kernel emulation setup & teardown843* =========================================================================844*/845static int846umem_out_of_memory(void)847{848char errmsg[] = "out of memory -- generating core dump\n";849850(void) fprintf(stderr, "%s", errmsg);851abort();852return (0);853}854855static void856spa_config_load(void)857{858void *buf = NULL;859nvlist_t *nvlist, *child;860nvpair_t *nvpair;861char *pathname;862zfs_file_t *fp;863zfs_file_attr_t zfa;864uint64_t fsize;865int err;866867/*868* Open the configuration file.869*/870pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);871872(void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path);873874err = zfs_file_open(pathname, O_RDONLY, 0, &fp);875if (err)876err = zfs_file_open(ZPOOL_CACHE_BOOT, O_RDONLY, 0, &fp);877878kmem_free(pathname, MAXPATHLEN);879880if (err)881return;882883if (zfs_file_getattr(fp, &zfa))884goto out;885886fsize = zfa.zfa_size;887buf = kmem_alloc(fsize, KM_SLEEP);888889/*890* Read the nvlist from the file.891*/892if (zfs_file_read(fp, buf, fsize, NULL) < 0)893goto out;894895/*896* Unpack the nvlist.897*/898if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0)899goto out;900901/*902* Iterate over all elements in the nvlist, creating a new spa_t for903* each one with the specified configuration.904*/905mutex_enter(&spa_namespace_lock);906nvpair = NULL;907while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) {908if (nvpair_type(nvpair) != DATA_TYPE_NVLIST)909continue;910911child = fnvpair_value_nvlist(nvpair);912913if (spa_lookup(nvpair_name(nvpair)) != NULL)914continue;915(void) spa_add(nvpair_name(nvpair), child, NULL);916}917mutex_exit(&spa_namespace_lock);918919nvlist_free(nvlist);920921out:922if (buf != NULL)923kmem_free(buf, fsize);924925zfs_file_close(fp);926}927928void929kernel_init(int mode)930{931extern uint_t rrw_tsd_key;932933umem_nofail_callback(umem_out_of_memory);934935physmem = sysconf(_SC_PHYS_PAGES);936937dprintf("physmem = %llu pages (%.2f GB)\n", (u_longlong_t)physmem,938(double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));939940hostid = (mode & SPA_MODE_WRITE) ? get_system_hostid() : 0;941942random_init();943944VERIFY0(uname(&hw_utsname));945946system_taskq_init();947icp_init();948949zstd_init();950951spa_init((spa_mode_t)mode);952spa_config_load();953954fletcher_4_init();955956tsd_create(&rrw_tsd_key, rrw_tsd_destroy);957}958959void960kernel_fini(void)961{962fletcher_4_fini();963spa_fini();964965zstd_fini();966967icp_fini();968system_taskq_fini();969970random_fini();971}972973uid_t974crgetuid(cred_t *cr)975{976(void) cr;977return (0);978}979980uid_t981crgetruid(cred_t *cr)982{983(void) cr;984return (0);985}986987gid_t988crgetgid(cred_t *cr)989{990(void) cr;991return (0);992}993994int995crgetngroups(cred_t *cr)996{997(void) cr;998return (0);999}10001001gid_t *1002crgetgroups(cred_t *cr)1003{1004(void) cr;1005return (NULL);1006}10071008int1009zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)1010{1011(void) name, (void) cr;1012return (0);1013}10141015int1016zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)1017{1018(void) from, (void) to, (void) cr;1019return (0);1020}10211022int1023zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)1024{1025(void) name, (void) cr;1026return (0);1027}10281029int1030secpolicy_zfs(const cred_t *cr)1031{1032(void) cr;1033return (0);1034}10351036ksiddomain_t *1037ksid_lookupdomain(const char *dom)1038{1039ksiddomain_t *kd;10401041kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);1042kd->kd_name = spa_strdup(dom);1043return (kd);1044}10451046void1047ksiddomain_rele(ksiddomain_t *ksid)1048{1049spa_strfree(ksid->kd_name);1050umem_free(ksid, sizeof (ksiddomain_t));1051}10521053char *1054kmem_vasprintf(const char *fmt, va_list adx)1055{1056char *buf = NULL;1057va_list adx_copy;10581059va_copy(adx_copy, adx);1060VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);1061va_end(adx_copy);10621063return (buf);1064}10651066char *1067kmem_asprintf(const char *fmt, ...)1068{1069char *buf = NULL;1070va_list adx;10711072va_start(adx, fmt);1073VERIFY(vasprintf(&buf, fmt, adx) != -1);1074va_end(adx);10751076return (buf);1077}10781079/*1080* kmem_scnprintf() will return the number of characters that it would have1081* printed whenever it is limited by value of the size variable, rather than1082* the number of characters that it did print. This can cause misbehavior on1083* subsequent uses of the return value, so we define a safe version that will1084* return the number of characters actually printed, minus the NULL format1085* character. Subsequent use of this by the safe string functions is safe1086* whether it is snprintf(), strlcat() or strlcpy().1087*/1088int1089kmem_scnprintf(char *restrict str, size_t size, const char *restrict fmt, ...)1090{1091int n;1092va_list ap;10931094/* Make the 0 case a no-op so that we do not return -1 */1095if (size == 0)1096return (0);10971098va_start(ap, fmt);1099n = vsnprintf(str, size, fmt, ap);1100va_end(ap);11011102if (n >= size)1103n = size - 1;11041105return (n);1106}11071108zfs_file_t *1109zfs_onexit_fd_hold(int fd, minor_t *minorp)1110{1111(void) fd;1112*minorp = 0;1113return (NULL);1114}11151116void1117zfs_onexit_fd_rele(zfs_file_t *fp)1118{1119(void) fp;1120}11211122int1123zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,1124uintptr_t *action_handle)1125{1126(void) minor, (void) func, (void) data, (void) action_handle;1127return (0);1128}11291130fstrans_cookie_t1131spl_fstrans_mark(void)1132{1133return ((fstrans_cookie_t)0);1134}11351136void1137spl_fstrans_unmark(fstrans_cookie_t cookie)1138{1139(void) cookie;1140}11411142int1143kmem_cache_reap_active(void)1144{1145return (0);1146}11471148void1149zvol_create_minors(const char *name)1150{1151(void) name;1152}11531154void1155zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)1156{1157(void) spa, (void) name, (void) async;1158}11591160void1161zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname,1162boolean_t async)1163{1164(void) spa, (void) oldname, (void) newname, (void) async;1165}11661167/*1168* Open file1169*1170* path - fully qualified path to file1171* flags - file attributes O_READ / O_WRITE / O_EXCL1172* fpp - pointer to return file pointer1173*1174* Returns 0 on success underlying error on failure.1175*/1176int1177zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)1178{1179int fd;1180int dump_fd;1181int err;1182int old_umask = 0;1183zfs_file_t *fp;1184struct stat64 st;11851186if (!(flags & O_CREAT) && stat64(path, &st) == -1)1187return (errno);11881189if (!(flags & O_CREAT) && S_ISBLK(st.st_mode))1190flags |= O_DIRECT;11911192if (flags & O_CREAT)1193old_umask = umask(0);11941195fd = open64(path, flags, mode);1196if (fd == -1)1197return (errno);11981199if (flags & O_CREAT)1200(void) umask(old_umask);12011202if (vn_dumpdir != NULL) {1203char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);1204const char *inpath = zfs_basename(path);12051206(void) snprintf(dumppath, MAXPATHLEN,1207"%s/%s", vn_dumpdir, inpath);1208dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);1209umem_free(dumppath, MAXPATHLEN);1210if (dump_fd == -1) {1211err = errno;1212close(fd);1213return (err);1214}1215} else {1216dump_fd = -1;1217}12181219(void) fcntl(fd, F_SETFD, FD_CLOEXEC);12201221fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL);1222fp->f_fd = fd;1223fp->f_dump_fd = dump_fd;1224*fpp = fp;12251226return (0);1227}12281229void1230zfs_file_close(zfs_file_t *fp)1231{1232close(fp->f_fd);1233if (fp->f_dump_fd != -1)1234close(fp->f_dump_fd);12351236umem_free(fp, sizeof (zfs_file_t));1237}12381239/*1240* Stateful write - use os internal file pointer to determine where to1241* write and update on successful completion.1242*1243* fp - pointer to file (pipe, socket, etc) to write to1244* buf - buffer to write1245* count - # of bytes to write1246* resid - pointer to count of unwritten bytes (if short write)1247*1248* Returns 0 on success errno on failure.1249*/1250int1251zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)1252{1253ssize_t rc;12541255rc = write(fp->f_fd, buf, count);1256if (rc < 0)1257return (errno);12581259if (resid) {1260*resid = count - rc;1261} else if (rc != count) {1262return (EIO);1263}12641265return (0);1266}12671268/*1269* Stateless write - os internal file pointer is not updated.1270*1271* fp - pointer to file (pipe, socket, etc) to write to1272* buf - buffer to write1273* count - # of bytes to write1274* off - file offset to write to (only valid for seekable types)1275* resid - pointer to count of unwritten bytes1276*1277* Returns 0 on success errno on failure.1278*/1279int1280zfs_file_pwrite(zfs_file_t *fp, const void *buf,1281size_t count, loff_t pos, uint8_t ashift, ssize_t *resid)1282{1283ssize_t rc, split, done;1284int sectors;12851286/*1287* To simulate partial disk writes, we split writes into two1288* system calls so that the process can be killed in between.1289* This is used by ztest to simulate realistic failure modes.1290*/1291sectors = count >> ashift;1292split = (sectors > 0 ? rand() % sectors : 0) << ashift;1293rc = pwrite64(fp->f_fd, buf, split, pos);1294if (rc != -1) {1295done = rc;1296rc = pwrite64(fp->f_fd, (char *)buf + split,1297count - split, pos + split);1298}1299#ifdef __linux__1300if (rc == -1 && errno == EINVAL) {1301/*1302* Under Linux, this most likely means an alignment issue1303* (memory or disk) due to O_DIRECT, so we abort() in order1304* to catch the offender.1305*/1306abort();1307}1308#endif13091310if (rc < 0)1311return (errno);13121313done += rc;13141315if (resid) {1316*resid = count - done;1317} else if (done != count) {1318return (EIO);1319}13201321return (0);1322}13231324/*1325* Stateful read - use os internal file pointer to determine where to1326* read and update on successful completion.1327*1328* fp - pointer to file (pipe, socket, etc) to read from1329* buf - buffer to write1330* count - # of bytes to read1331* resid - pointer to count of unread bytes (if short read)1332*1333* Returns 0 on success errno on failure.1334*/1335int1336zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)1337{1338int rc;13391340rc = read(fp->f_fd, buf, count);1341if (rc < 0)1342return (errno);13431344if (resid) {1345*resid = count - rc;1346} else if (rc != count) {1347return (EIO);1348}13491350return (0);1351}13521353/*1354* Stateless read - os internal file pointer is not updated.1355*1356* fp - pointer to file (pipe, socket, etc) to read from1357* buf - buffer to write1358* count - # of bytes to write1359* off - file offset to read from (only valid for seekable types)1360* resid - pointer to count of unwritten bytes (if short write)1361*1362* Returns 0 on success errno on failure.1363*/1364int1365zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,1366ssize_t *resid)1367{1368ssize_t rc;13691370rc = pread64(fp->f_fd, buf, count, off);1371if (rc < 0) {1372#ifdef __linux__1373/*1374* Under Linux, this most likely means an alignment issue1375* (memory or disk) due to O_DIRECT, so we abort() in order to1376* catch the offender.1377*/1378if (errno == EINVAL)1379abort();1380#endif1381return (errno);1382}13831384if (fp->f_dump_fd != -1) {1385int status;13861387status = pwrite64(fp->f_dump_fd, buf, rc, off);1388ASSERT(status != -1);1389}13901391if (resid) {1392*resid = count - rc;1393} else if (rc != count) {1394return (EIO);1395}13961397return (0);1398}13991400/*1401* lseek - set / get file pointer1402*1403* fp - pointer to file (pipe, socket, etc) to read from1404* offp - value to seek to, returns current value plus passed offset1405* whence - see man pages for standard lseek whence values1406*1407* Returns 0 on success errno on failure (ESPIPE for non seekable types)1408*/1409int1410zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)1411{1412loff_t rc;14131414rc = lseek(fp->f_fd, *offp, whence);1415if (rc < 0)1416return (errno);14171418*offp = rc;14191420return (0);1421}14221423/*1424* Get file attributes1425*1426* filp - file pointer1427* zfattr - pointer to file attr structure1428*1429* Currently only used for fetching size and file mode1430*1431* Returns 0 on success or error code of underlying getattr call on failure.1432*/1433int1434zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)1435{1436struct stat64 st;14371438if (fstat64_blk(fp->f_fd, &st) == -1)1439return (errno);14401441zfattr->zfa_size = st.st_size;1442zfattr->zfa_mode = st.st_mode;14431444return (0);1445}14461447/*1448* Sync file to disk1449*1450* filp - file pointer1451* flags - O_SYNC and or O_DSYNC1452*1453* Returns 0 on success or error code of underlying sync call on failure.1454*/1455int1456zfs_file_fsync(zfs_file_t *fp, int flags)1457{1458(void) flags;14591460if (fsync(fp->f_fd) < 0)1461return (errno);14621463return (0);1464}14651466/*1467* deallocate - zero and/or deallocate file storage1468*1469* fp - file pointer1470* offset - offset to start zeroing or deallocating1471* len - length to zero or deallocate1472*/1473int1474zfs_file_deallocate(zfs_file_t *fp, loff_t offset, loff_t len)1475{1476int rc;1477#if defined(__linux__)1478rc = fallocate(fp->f_fd,1479FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, len);1480#elif defined(__FreeBSD__) && (__FreeBSD_version >= 1400029)1481struct spacectl_range rqsr = {1482.r_offset = offset,1483.r_len = len,1484};1485rc = fspacectl(fp->f_fd, SPACECTL_DEALLOC, &rqsr, 0, &rqsr);1486#else1487(void) fp, (void) offset, (void) len;1488rc = EOPNOTSUPP;1489#endif1490if (rc)1491return (SET_ERROR(rc));1492return (0);1493}14941495/*1496* Request current file pointer offset1497*1498* fp - pointer to file1499*1500* Returns current file offset.1501*/1502loff_t1503zfs_file_off(zfs_file_t *fp)1504{1505return (lseek(fp->f_fd, SEEK_CUR, 0));1506}15071508/*1509* unlink file1510*1511* path - fully qualified file path1512*1513* Returns 0 on success.1514*1515* OPTIONAL1516*/1517int1518zfs_file_unlink(const char *path)1519{1520return (remove(path));1521}15221523/*1524* Get reference to file pointer1525*1526* fd - input file descriptor1527*1528* Returns pointer to file struct or NULL.1529* Unsupported in user space.1530*/1531zfs_file_t *1532zfs_file_get(int fd)1533{1534(void) fd;1535abort();1536return (NULL);1537}1538/*1539* Drop reference to file pointer1540*1541* fp - pointer to file struct1542*1543* Unsupported in user space.1544*/1545void1546zfs_file_put(zfs_file_t *fp)1547{1548abort();1549(void) fp;1550}15511552void1553zfsvfs_update_fromname(const char *oldname, const char *newname)1554{1555(void) oldname, (void) newname;1556}15571558void1559spa_import_os(spa_t *spa)1560{1561(void) spa;1562}15631564void1565spa_export_os(spa_t *spa)1566{1567(void) spa;1568}15691570void1571spa_activate_os(spa_t *spa)1572{1573(void) spa;1574}15751576void1577spa_deactivate_os(spa_t *spa)1578{1579(void) spa;1580}158115821583