Path: blob/main/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
48775 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/21/*22* Copyright (c) 2014 by Chunwei Chen. All rights reserved.23* Copyright (c) 2019 by Delphix. All rights reserved.24* Copyright (c) 2023, 2024, Klara Inc.25* Copyright (c) 2025, Rob Norris <[email protected]>26*/2728/*29* See abd.c for a general overview of the arc buffered data (ABD).30*31* Linear buffers act exactly like normal buffers and are always mapped into the32* kernel's virtual memory space, while scattered ABD data chunks are allocated33* as physical pages and then mapped in only while they are actually being34* accessed through one of the abd_* library functions. Using scattered ABDs35* provides several benefits:36*37* (1) They avoid use of kmem_*, preventing performance problems where running38* kmem_reap on very large memory systems never finishes and causes39* constant TLB shootdowns.40*41* (2) Fragmentation is less of an issue since when we are at the limit of42* allocatable space, we won't have to search around for a long free43* hole in the VA space for large ARC allocations. Each chunk is mapped in44* individually, so even if we are using HIGHMEM (see next point) we45* wouldn't need to worry about finding a contiguous address range.46*47* (3) If we are not using HIGHMEM, then all physical memory is always48* mapped into the kernel's address space, so we also avoid the map /49* unmap costs on each ABD access.50*51* If we are not using HIGHMEM, scattered buffers which have only one chunk52* can be treated as linear buffers, because they are contiguous in the53* kernel's virtual address space. See abd_alloc_chunks() for details.54*/5556#include <sys/abd_impl.h>57#include <sys/param.h>58#include <sys/zio.h>59#include <sys/arc.h>60#include <sys/zfs_context.h>61#include <sys/zfs_znode.h>62#include <linux/kmap_compat.h>63#include <linux/mm_compat.h>64#include <linux/scatterlist.h>65#include <linux/version.h>6667#if defined(MAX_ORDER)68#define ABD_MAX_ORDER (MAX_ORDER)69#elif defined(MAX_PAGE_ORDER)70#define ABD_MAX_ORDER (MAX_PAGE_ORDER)71#endif7273typedef struct abd_stats {74kstat_named_t abdstat_struct_size;75kstat_named_t abdstat_linear_cnt;76kstat_named_t abdstat_linear_data_size;77kstat_named_t abdstat_scatter_cnt;78kstat_named_t abdstat_scatter_data_size;79kstat_named_t abdstat_scatter_chunk_waste;80kstat_named_t abdstat_scatter_orders[ABD_MAX_ORDER];81kstat_named_t abdstat_scatter_page_multi_chunk;82kstat_named_t abdstat_scatter_page_multi_zone;83kstat_named_t abdstat_scatter_page_alloc_retry;84kstat_named_t abdstat_scatter_sg_table_retry;85} abd_stats_t;8687static abd_stats_t abd_stats = {88/* Amount of memory occupied by all of the abd_t struct allocations */89{ "struct_size", KSTAT_DATA_UINT64 },90/*91* The number of linear ABDs which are currently allocated, excluding92* ABDs which don't own their data (for instance the ones which were93* allocated through abd_get_offset() and abd_get_from_buf()). If an94* ABD takes ownership of its buf then it will become tracked.95*/96{ "linear_cnt", KSTAT_DATA_UINT64 },97/* Amount of data stored in all linear ABDs tracked by linear_cnt */98{ "linear_data_size", KSTAT_DATA_UINT64 },99/*100* The number of scatter ABDs which are currently allocated, excluding101* ABDs which don't own their data (for instance the ones which were102* allocated through abd_get_offset()).103*/104{ "scatter_cnt", KSTAT_DATA_UINT64 },105/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */106{ "scatter_data_size", KSTAT_DATA_UINT64 },107/*108* The amount of space wasted at the end of the last chunk across all109* scatter ABDs tracked by scatter_cnt.110*/111{ "scatter_chunk_waste", KSTAT_DATA_UINT64 },112/*113* The number of compound allocations of a given order. These114* allocations are spread over all currently allocated ABDs, and115* act as a measure of memory fragmentation.116*/117{ { "scatter_order_N", KSTAT_DATA_UINT64 } },118/*119* The number of scatter ABDs which contain multiple chunks.120* ABDs are preferentially allocated from the minimum number of121* contiguous multi-page chunks, a single chunk is optimal.122*/123{ "scatter_page_multi_chunk", KSTAT_DATA_UINT64 },124/*125* The number of scatter ABDs which are split across memory zones.126* ABDs are preferentially allocated using pages from a single zone.127*/128{ "scatter_page_multi_zone", KSTAT_DATA_UINT64 },129/*130* The total number of retries encountered when attempting to131* allocate the pages to populate the scatter ABD.132*/133{ "scatter_page_alloc_retry", KSTAT_DATA_UINT64 },134/*135* The total number of retries encountered when attempting to136* allocate the sg table for an ABD.137*/138{ "scatter_sg_table_retry", KSTAT_DATA_UINT64 },139};140141static struct {142wmsum_t abdstat_struct_size;143wmsum_t abdstat_linear_cnt;144wmsum_t abdstat_linear_data_size;145wmsum_t abdstat_scatter_cnt;146wmsum_t abdstat_scatter_data_size;147wmsum_t abdstat_scatter_chunk_waste;148wmsum_t abdstat_scatter_orders[ABD_MAX_ORDER];149wmsum_t abdstat_scatter_page_multi_chunk;150wmsum_t abdstat_scatter_page_multi_zone;151wmsum_t abdstat_scatter_page_alloc_retry;152wmsum_t abdstat_scatter_sg_table_retry;153} abd_sums;154155#define abd_for_each_sg(abd, sg, n, i) \156for_each_sg(ABD_SCATTER(abd).abd_sgl, sg, n, i)157158/*159* zfs_abd_scatter_min_size is the minimum allocation size to use scatter160* ABD's. Smaller allocations will use linear ABD's which uses161* zio_[data_]buf_alloc().162*163* Scatter ABD's use at least one page each, so sub-page allocations waste164* some space when allocated as scatter (e.g. 2KB scatter allocation wastes165* half of each page). Using linear ABD's for small allocations means that166* they will be put on slabs which contain many allocations. This can167* improve memory efficiency, but it also makes it much harder for ARC168* evictions to actually free pages, because all the buffers on one slab need169* to be freed in order for the slab (and underlying pages) to be freed.170* Typically, 512B and 1KB kmem caches have 16 buffers per slab, so it's171* possible for them to actually waste more memory than scatter (one page per172* buf = wasting 3/4 or 7/8th; one buf per slab = wasting 15/16th).173*174* Spill blocks are typically 512B and are heavily used on systems running175* selinux with the default dnode size and the `xattr=sa` property set.176*177* By default we use linear allocations for 512B and 1KB, and scatter178* allocations for larger (1.5KB and up).179*/180static int zfs_abd_scatter_min_size = 512 * 3;181182/*183* We use a scattered SPA_MAXBLOCKSIZE sized ABD whose pages are184* just a single zero'd page. This allows us to conserve memory by185* only using a single zero page for the scatterlist.186*/187abd_t *abd_zero_scatter = NULL;188189struct page;190191/*192* abd_zero_page is assigned to each of the pages of abd_zero_scatter. It will193* point to ZERO_PAGE if it is available or it will be an allocated zero'd194* PAGESIZE buffer.195*/196static struct page *abd_zero_page = NULL;197198static kmem_cache_t *abd_cache = NULL;199static kstat_t *abd_ksp;200201static uint_t202abd_chunkcnt_for_bytes(size_t size)203{204return (P2ROUNDUP(size, PAGESIZE) / PAGESIZE);205}206207abd_t *208abd_alloc_struct_impl(size_t size)209{210/*211* In Linux we do not use the size passed in during ABD212* allocation, so we just ignore it.213*/214(void) size;215abd_t *abd = kmem_cache_alloc(abd_cache, KM_PUSHPAGE);216ASSERT3P(abd, !=, NULL);217ABDSTAT_INCR(abdstat_struct_size, sizeof (abd_t));218219return (abd);220}221222void223abd_free_struct_impl(abd_t *abd)224{225kmem_cache_free(abd_cache, abd);226ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t));227}228229static unsigned zfs_abd_scatter_max_order = ABD_MAX_ORDER - 1;230231/*232* Mark zfs data pages so they can be excluded from kernel crash dumps233*/234#ifdef _LP64235#define ABD_FILE_CACHE_PAGE 0x2F5ABDF11ECAC4E236237static inline void238abd_mark_zfs_page(struct page *page)239{240get_page(page);241SetPagePrivate(page);242set_page_private(page, ABD_FILE_CACHE_PAGE);243}244245static inline void246abd_unmark_zfs_page(struct page *page)247{248set_page_private(page, 0UL);249ClearPagePrivate(page);250put_page(page);251}252#else253#define abd_mark_zfs_page(page)254#define abd_unmark_zfs_page(page)255#endif /* _LP64 */256257#ifndef CONFIG_HIGHMEM258259/*260* The goal is to minimize fragmentation by preferentially populating ABDs261* with higher order compound pages from a single zone. Allocation size is262* progressively decreased until it can be satisfied without performing263* reclaim or compaction. When necessary this function will degenerate to264* allocating individual pages and allowing reclaim to satisfy allocations.265*/266void267abd_alloc_chunks(abd_t *abd, size_t size)268{269struct list_head pages;270struct sg_table table;271struct scatterlist *sg;272struct page *page, *tmp_page = NULL;273gfp_t gfp = __GFP_RECLAIMABLE | __GFP_NOWARN | GFP_NOIO;274gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM;275unsigned int max_order = MIN(zfs_abd_scatter_max_order,276ABD_MAX_ORDER - 1);277unsigned int nr_pages = abd_chunkcnt_for_bytes(size);278unsigned int chunks = 0, zones = 0;279size_t remaining_size;280int nid = NUMA_NO_NODE;281unsigned int alloc_pages = 0;282283INIT_LIST_HEAD(&pages);284285ASSERT3U(alloc_pages, <, nr_pages);286287while (alloc_pages < nr_pages) {288unsigned int chunk_pages;289unsigned int order;290291order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order);292chunk_pages = (1U << order);293294page = alloc_pages_node(nid, order ? gfp_comp : gfp, order);295if (page == NULL) {296if (order == 0) {297ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);298schedule_timeout_interruptible(1);299} else {300max_order = MAX(0, order - 1);301}302continue;303}304305list_add_tail(&page->lru, &pages);306307if ((nid != NUMA_NO_NODE) && (page_to_nid(page) != nid))308zones++;309310nid = page_to_nid(page);311ABDSTAT_BUMP(abdstat_scatter_orders[order]);312chunks++;313alloc_pages += chunk_pages;314}315316ASSERT3S(alloc_pages, ==, nr_pages);317318while (sg_alloc_table(&table, chunks, gfp)) {319ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);320schedule_timeout_interruptible(1);321}322323sg = table.sgl;324remaining_size = size;325list_for_each_entry_safe(page, tmp_page, &pages, lru) {326size_t sg_size = MIN(PAGESIZE << compound_order(page),327remaining_size);328sg_set_page(sg, page, sg_size, 0);329abd_mark_zfs_page(page);330remaining_size -= sg_size;331332sg = sg_next(sg);333list_del(&page->lru);334}335336/*337* These conditions ensure that a possible transformation to a linear338* ABD would be valid.339*/340ASSERT(!PageHighMem(sg_page(table.sgl)));341ASSERT0(ABD_SCATTER(abd).abd_offset);342343if (table.nents == 1) {344/*345* Since there is only one entry, this ABD can be represented346* as a linear buffer. All single-page (4K) ABD's can be347* represented this way. Some multi-page ABD's can also be348* represented this way, if we were able to allocate a single349* "chunk" (higher-order "page" which represents a power-of-2350* series of physically-contiguous pages). This is often the351* case for 2-page (8K) ABD's.352*353* Representing a single-entry scatter ABD as a linear ABD354* has the performance advantage of avoiding the copy (and355* allocation) in abd_borrow_buf_copy / abd_return_buf_copy.356* A performance increase of around 5% has been observed for357* ARC-cached reads (of small blocks which can take advantage358* of this).359*360* Note that this optimization is only possible because the361* pages are always mapped into the kernel's address space.362* This is not the case for highmem pages, so the363* optimization can not be made there.364*/365abd->abd_flags |= ABD_FLAG_LINEAR;366abd->abd_flags |= ABD_FLAG_LINEAR_PAGE;367abd->abd_u.abd_linear.abd_sgl = table.sgl;368ABD_LINEAR_BUF(abd) = page_address(sg_page(table.sgl));369} else if (table.nents > 1) {370ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);371abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;372373if (zones) {374ABDSTAT_BUMP(abdstat_scatter_page_multi_zone);375abd->abd_flags |= ABD_FLAG_MULTI_ZONE;376}377378ABD_SCATTER(abd).abd_sgl = table.sgl;379ABD_SCATTER(abd).abd_nents = table.nents;380}381}382#else383384/*385* Allocate N individual pages to construct a scatter ABD. This function386* makes no attempt to request contiguous pages and requires the minimal387* number of kernel interfaces. It's designed for maximum compatibility.388*/389void390abd_alloc_chunks(abd_t *abd, size_t size)391{392struct scatterlist *sg = NULL;393struct sg_table table;394struct page *page;395gfp_t gfp = __GFP_RECLAIMABLE | __GFP_NOWARN | GFP_NOIO;396int nr_pages = abd_chunkcnt_for_bytes(size);397int i = 0;398399while (sg_alloc_table(&table, nr_pages, gfp)) {400ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);401schedule_timeout_interruptible(1);402}403404ASSERT3U(table.nents, ==, nr_pages);405ABD_SCATTER(abd).abd_sgl = table.sgl;406ABD_SCATTER(abd).abd_nents = nr_pages;407408abd_for_each_sg(abd, sg, nr_pages, i) {409while ((page = __page_cache_alloc(gfp)) == NULL) {410ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);411schedule_timeout_interruptible(1);412}413414ABDSTAT_BUMP(abdstat_scatter_orders[0]);415sg_set_page(sg, page, PAGESIZE, 0);416abd_mark_zfs_page(page);417}418419if (nr_pages > 1) {420ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);421abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;422}423}424#endif /* !CONFIG_HIGHMEM */425426/*427* This must be called if any of the sg_table allocation functions428* are called.429*/430static void431abd_free_sg_table(abd_t *abd)432{433struct sg_table table;434435table.sgl = ABD_SCATTER(abd).abd_sgl;436table.nents = table.orig_nents = ABD_SCATTER(abd).abd_nents;437sg_free_table(&table);438}439440void441abd_free_chunks(abd_t *abd)442{443struct scatterlist *sg = NULL;444struct page *page;445int nr_pages = ABD_SCATTER(abd).abd_nents;446int order, i = 0;447448if (abd->abd_flags & ABD_FLAG_MULTI_ZONE)449ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_zone);450451if (abd->abd_flags & ABD_FLAG_MULTI_CHUNK)452ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk);453454/*455* Scatter ABDs may be constructed by abd_alloc_from_pages() from456* an array of pages. In which case they should not be freed.457*/458if (!abd_is_from_pages(abd)) {459abd_for_each_sg(abd, sg, nr_pages, i) {460page = sg_page(sg);461abd_unmark_zfs_page(page);462order = compound_order(page);463__free_pages(page, order);464ASSERT3U(sg->length, <=, PAGE_SIZE << order);465ABDSTAT_BUMPDOWN(abdstat_scatter_orders[order]);466}467}468469abd_free_sg_table(abd);470}471472/*473* Allocate scatter ABD of size SPA_MAXBLOCKSIZE, where each page in474* the scatterlist will be set to the zero'd out buffer abd_zero_page.475*/476static void477abd_alloc_zero_scatter(void)478{479struct scatterlist *sg = NULL;480struct sg_table table;481gfp_t gfp = __GFP_NOWARN | GFP_NOIO;482int nr_pages = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);483int i = 0;484485#if defined(HAVE_ZERO_PAGE_GPL_ONLY)486gfp_t gfp_zero_page = gfp | __GFP_ZERO;487while ((abd_zero_page = __page_cache_alloc(gfp_zero_page)) == NULL) {488ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);489schedule_timeout_interruptible(1);490}491abd_mark_zfs_page(abd_zero_page);492#else493abd_zero_page = ZERO_PAGE(0);494#endif /* HAVE_ZERO_PAGE_GPL_ONLY */495496while (sg_alloc_table(&table, nr_pages, gfp)) {497ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);498schedule_timeout_interruptible(1);499}500ASSERT3U(table.nents, ==, nr_pages);501502abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);503abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER;504ABD_SCATTER(abd_zero_scatter).abd_offset = 0;505ABD_SCATTER(abd_zero_scatter).abd_sgl = table.sgl;506ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages;507abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;508abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK;509510abd_for_each_sg(abd_zero_scatter, sg, nr_pages, i) {511sg_set_page(sg, abd_zero_page, PAGESIZE, 0);512}513514ABDSTAT_BUMP(abdstat_scatter_cnt);515ABDSTAT_INCR(abdstat_scatter_data_size, PAGESIZE);516ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);517}518519boolean_t520abd_size_alloc_linear(size_t size)521{522return (!zfs_abd_scatter_enabled || size < zfs_abd_scatter_min_size);523}524525void526abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)527{528ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);529int waste = P2ROUNDUP(abd->abd_size, PAGESIZE) - abd->abd_size;530if (op == ABDSTAT_INCR) {531ABDSTAT_BUMP(abdstat_scatter_cnt);532ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);533ABDSTAT_INCR(abdstat_scatter_chunk_waste, waste);534arc_space_consume(waste, ARC_SPACE_ABD_CHUNK_WASTE);535} else {536ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);537ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);538ABDSTAT_INCR(abdstat_scatter_chunk_waste, -waste);539arc_space_return(waste, ARC_SPACE_ABD_CHUNK_WASTE);540}541}542543void544abd_update_linear_stats(abd_t *abd, abd_stats_op_t op)545{546ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);547if (op == ABDSTAT_INCR) {548ABDSTAT_BUMP(abdstat_linear_cnt);549ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);550} else {551ABDSTAT_BUMPDOWN(abdstat_linear_cnt);552ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);553}554}555556void557abd_verify_scatter(abd_t *abd)558{559ASSERT3U(ABD_SCATTER(abd).abd_nents, >, 0);560ASSERT3U(ABD_SCATTER(abd).abd_offset, <,561ABD_SCATTER(abd).abd_sgl->length);562563#ifdef ZFS_DEBUG564struct scatterlist *sg = NULL;565size_t n = ABD_SCATTER(abd).abd_nents;566int i = 0;567568abd_for_each_sg(abd, sg, n, i) {569ASSERT3P(sg_page(sg), !=, NULL);570}571#endif572}573574static void575abd_free_zero_scatter(void)576{577ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);578ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGESIZE);579ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk);580581abd_free_sg_table(abd_zero_scatter);582abd_free_struct(abd_zero_scatter);583abd_zero_scatter = NULL;584ASSERT3P(abd_zero_page, !=, NULL);585#if defined(HAVE_ZERO_PAGE_GPL_ONLY)586abd_unmark_zfs_page(abd_zero_page);587__free_page(abd_zero_page);588#endif /* HAVE_ZERO_PAGE_GPL_ONLY */589}590591static int592abd_kstats_update(kstat_t *ksp, int rw)593{594abd_stats_t *as = ksp->ks_data;595596if (rw == KSTAT_WRITE)597return (EACCES);598as->abdstat_struct_size.value.ui64 =599wmsum_value(&abd_sums.abdstat_struct_size);600as->abdstat_linear_cnt.value.ui64 =601wmsum_value(&abd_sums.abdstat_linear_cnt);602as->abdstat_linear_data_size.value.ui64 =603wmsum_value(&abd_sums.abdstat_linear_data_size);604as->abdstat_scatter_cnt.value.ui64 =605wmsum_value(&abd_sums.abdstat_scatter_cnt);606as->abdstat_scatter_data_size.value.ui64 =607wmsum_value(&abd_sums.abdstat_scatter_data_size);608as->abdstat_scatter_chunk_waste.value.ui64 =609wmsum_value(&abd_sums.abdstat_scatter_chunk_waste);610for (int i = 0; i < ABD_MAX_ORDER; i++) {611as->abdstat_scatter_orders[i].value.ui64 =612wmsum_value(&abd_sums.abdstat_scatter_orders[i]);613}614as->abdstat_scatter_page_multi_chunk.value.ui64 =615wmsum_value(&abd_sums.abdstat_scatter_page_multi_chunk);616as->abdstat_scatter_page_multi_zone.value.ui64 =617wmsum_value(&abd_sums.abdstat_scatter_page_multi_zone);618as->abdstat_scatter_page_alloc_retry.value.ui64 =619wmsum_value(&abd_sums.abdstat_scatter_page_alloc_retry);620as->abdstat_scatter_sg_table_retry.value.ui64 =621wmsum_value(&abd_sums.abdstat_scatter_sg_table_retry);622return (0);623}624625void626abd_init(void)627{628int i;629630abd_cache = kmem_cache_create("abd_t", sizeof (abd_t),6310, NULL, NULL, NULL, NULL, NULL, KMC_RECLAIMABLE);632633wmsum_init(&abd_sums.abdstat_struct_size, 0);634wmsum_init(&abd_sums.abdstat_linear_cnt, 0);635wmsum_init(&abd_sums.abdstat_linear_data_size, 0);636wmsum_init(&abd_sums.abdstat_scatter_cnt, 0);637wmsum_init(&abd_sums.abdstat_scatter_data_size, 0);638wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0);639for (i = 0; i < ABD_MAX_ORDER; i++)640wmsum_init(&abd_sums.abdstat_scatter_orders[i], 0);641wmsum_init(&abd_sums.abdstat_scatter_page_multi_chunk, 0);642wmsum_init(&abd_sums.abdstat_scatter_page_multi_zone, 0);643wmsum_init(&abd_sums.abdstat_scatter_page_alloc_retry, 0);644wmsum_init(&abd_sums.abdstat_scatter_sg_table_retry, 0);645646abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,647sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);648if (abd_ksp != NULL) {649for (i = 0; i < ABD_MAX_ORDER; i++) {650snprintf(abd_stats.abdstat_scatter_orders[i].name,651KSTAT_STRLEN, "scatter_order_%d", i);652abd_stats.abdstat_scatter_orders[i].data_type =653KSTAT_DATA_UINT64;654}655abd_ksp->ks_data = &abd_stats;656abd_ksp->ks_update = abd_kstats_update;657kstat_install(abd_ksp);658}659660abd_alloc_zero_scatter();661}662663void664abd_fini(void)665{666abd_free_zero_scatter();667668if (abd_ksp != NULL) {669kstat_delete(abd_ksp);670abd_ksp = NULL;671}672673wmsum_fini(&abd_sums.abdstat_struct_size);674wmsum_fini(&abd_sums.abdstat_linear_cnt);675wmsum_fini(&abd_sums.abdstat_linear_data_size);676wmsum_fini(&abd_sums.abdstat_scatter_cnt);677wmsum_fini(&abd_sums.abdstat_scatter_data_size);678wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste);679for (int i = 0; i < ABD_MAX_ORDER; i++)680wmsum_fini(&abd_sums.abdstat_scatter_orders[i]);681wmsum_fini(&abd_sums.abdstat_scatter_page_multi_chunk);682wmsum_fini(&abd_sums.abdstat_scatter_page_multi_zone);683wmsum_fini(&abd_sums.abdstat_scatter_page_alloc_retry);684wmsum_fini(&abd_sums.abdstat_scatter_sg_table_retry);685686if (abd_cache) {687kmem_cache_destroy(abd_cache);688abd_cache = NULL;689}690}691692void693abd_free_linear_page(abd_t *abd)694{695/* Transform it back into a scatter ABD for freeing */696struct scatterlist *sg = abd->abd_u.abd_linear.abd_sgl;697698/* When backed by user page unmap it */699if (abd_is_from_pages(abd))700zfs_kunmap(sg_page(sg));701else702abd_update_scatter_stats(abd, ABDSTAT_DECR);703704abd->abd_flags &= ~ABD_FLAG_LINEAR;705abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE;706ABD_SCATTER(abd).abd_nents = 1;707ABD_SCATTER(abd).abd_offset = 0;708ABD_SCATTER(abd).abd_sgl = sg;709abd_free_chunks(abd);710}711712/*713* Allocate a scatter ABD structure from user pages. The pages must be714* pinned with get_user_pages, or similiar, but need not be mapped via715* the kmap interfaces.716*/717abd_t *718abd_alloc_from_pages(struct page **pages, unsigned long offset, uint64_t size)719{720uint_t npages = DIV_ROUND_UP(size, PAGE_SIZE);721struct sg_table table;722723VERIFY3U(size, <=, DMU_MAX_ACCESS);724ASSERT3U(offset, <, PAGE_SIZE);725ASSERT3P(pages, !=, NULL);726727/*728* Even if this buf is filesystem metadata, we only track that we729* own the underlying data buffer, which is not true in this case.730* Therefore, we don't ever use ABD_FLAG_META here.731*/732abd_t *abd = abd_alloc_struct(0);733abd->abd_flags |= ABD_FLAG_FROM_PAGES | ABD_FLAG_OWNER;734abd->abd_size = size;735736while (sg_alloc_table_from_pages(&table, pages, npages, offset,737size, __GFP_NOWARN | GFP_NOIO) != 0) {738ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);739schedule_timeout_interruptible(1);740}741742if ((offset + size) <= PAGE_SIZE) {743/*744* Since there is only one entry, this ABD can be represented745* as a linear buffer. All single-page (4K) ABD's constructed746* from a user page can be represented this way as long as the747* page is mapped to a virtual address. This allows us to748* apply an offset in to the mapped page.749*750* Note that kmap() must be used, not kmap_atomic(), because751* the mapping needs to bet set up on all CPUs. Using kmap()752* also enables the user of highmem pages when required.753*/754abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_LINEAR_PAGE;755abd->abd_u.abd_linear.abd_sgl = table.sgl;756zfs_kmap(sg_page(table.sgl));757ABD_LINEAR_BUF(abd) = sg_virt(table.sgl);758} else {759ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);760abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;761762ABD_SCATTER(abd).abd_offset = offset;763ABD_SCATTER(abd).abd_sgl = table.sgl;764ABD_SCATTER(abd).abd_nents = table.nents;765766ASSERT0(ABD_SCATTER(abd).abd_offset);767}768769return (abd);770}771772/*773* If we're going to use this ABD for doing I/O using the block layer, the774* consumer of the ABD data doesn't care if it's scattered or not, and we don't775* plan to store this ABD in memory for a long period of time, we should776* allocate the ABD type that requires the least data copying to do the I/O.777*778* On Linux the optimal thing to do would be to use abd_get_offset() and779* construct a new ABD which shares the original pages thereby eliminating780* the copy. But for the moment a new linear ABD is allocated until this781* performance optimization can be implemented.782*/783abd_t *784abd_alloc_for_io(size_t size, boolean_t is_metadata)785{786return (abd_alloc(size, is_metadata));787}788789abd_t *790abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,791size_t size)792{793(void) size;794int i = 0;795struct scatterlist *sg = NULL;796797abd_verify(sabd);798ASSERT3U(off, <=, sabd->abd_size);799800size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;801802if (abd == NULL)803abd = abd_alloc_struct(0);804805/*806* Even if this buf is filesystem metadata, we only track that807* if we own the underlying data buffer, which is not true in808* this case. Therefore, we don't ever use ABD_FLAG_META here.809*/810811abd_for_each_sg(sabd, sg, ABD_SCATTER(sabd).abd_nents, i) {812if (new_offset < sg->length)813break;814new_offset -= sg->length;815}816817ABD_SCATTER(abd).abd_sgl = sg;818ABD_SCATTER(abd).abd_offset = new_offset;819ABD_SCATTER(abd).abd_nents = ABD_SCATTER(sabd).abd_nents - i;820821if (abd_is_from_pages(sabd))822abd->abd_flags |= ABD_FLAG_FROM_PAGES;823824return (abd);825}826827/*828* Initialize the abd_iter.829*/830void831abd_iter_init(struct abd_iter *aiter, abd_t *abd)832{833ASSERT(!abd_is_gang(abd));834abd_verify(abd);835memset(aiter, 0, sizeof (struct abd_iter));836aiter->iter_abd = abd;837if (!abd_is_linear(abd)) {838aiter->iter_offset = ABD_SCATTER(abd).abd_offset;839aiter->iter_sg = ABD_SCATTER(abd).abd_sgl;840}841}842843/*844* This is just a helper function to see if we have exhausted the845* abd_iter and reached the end.846*/847boolean_t848abd_iter_at_end(struct abd_iter *aiter)849{850ASSERT3U(aiter->iter_pos, <=, aiter->iter_abd->abd_size);851return (aiter->iter_pos == aiter->iter_abd->abd_size);852}853854/*855* Advance the iterator by a certain amount. Cannot be called when a chunk is856* in use. This can be safely called when the aiter has already exhausted, in857* which case this does nothing.858*/859void860abd_iter_advance(struct abd_iter *aiter, size_t amount)861{862/*863* Ensure that last chunk is not in use. abd_iterate_*() must clear864* this state (directly or abd_iter_unmap()) before advancing.865*/866ASSERT0P(aiter->iter_mapaddr);867ASSERT0(aiter->iter_mapsize);868ASSERT0P(aiter->iter_page);869ASSERT0(aiter->iter_page_doff);870ASSERT0(aiter->iter_page_dsize);871872/* There's nothing left to advance to, so do nothing */873if (abd_iter_at_end(aiter))874return;875876aiter->iter_pos += amount;877aiter->iter_offset += amount;878if (!abd_is_linear(aiter->iter_abd)) {879while (aiter->iter_offset >= aiter->iter_sg->length) {880aiter->iter_offset -= aiter->iter_sg->length;881aiter->iter_sg = sg_next(aiter->iter_sg);882if (aiter->iter_sg == NULL) {883ASSERT0(aiter->iter_offset);884break;885}886}887}888}889890/*891* Map the current chunk into aiter. This can be safely called when the aiter892* has already exhausted, in which case this does nothing.893*/894void895abd_iter_map(struct abd_iter *aiter)896{897void *paddr;898size_t offset = 0;899900ASSERT0P(aiter->iter_mapaddr);901ASSERT0(aiter->iter_mapsize);902903/* There's nothing left to iterate over, so do nothing */904if (abd_iter_at_end(aiter))905return;906907if (abd_is_linear(aiter->iter_abd)) {908ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);909offset = aiter->iter_offset;910aiter->iter_mapsize = aiter->iter_abd->abd_size - offset;911paddr = ABD_LINEAR_BUF(aiter->iter_abd);912} else {913offset = aiter->iter_offset;914aiter->iter_mapsize = MIN(aiter->iter_sg->length - offset,915aiter->iter_abd->abd_size - aiter->iter_pos);916917paddr = zfs_kmap_local(sg_page(aiter->iter_sg));918}919920aiter->iter_mapaddr = (char *)paddr + offset;921}922923/*924* Unmap the current chunk from aiter. This can be safely called when the aiter925* has already exhausted, in which case this does nothing.926*/927void928abd_iter_unmap(struct abd_iter *aiter)929{930/* There's nothing left to unmap, so do nothing */931if (abd_iter_at_end(aiter))932return;933934if (!abd_is_linear(aiter->iter_abd)) {935/* LINTED E_FUNC_SET_NOT_USED */936zfs_kunmap_local(aiter->iter_mapaddr - aiter->iter_offset);937}938939ASSERT3P(aiter->iter_mapaddr, !=, NULL);940ASSERT3U(aiter->iter_mapsize, >, 0);941942aiter->iter_mapaddr = NULL;943aiter->iter_mapsize = 0;944}945946void947abd_cache_reap_now(void)948{949}950951/*952* Borrow a raw buffer from an ABD without copying the contents of the ABD953* into the buffer. If the ABD is scattered, this will allocate a raw buffer954* whose contents are undefined. To copy over the existing data in the ABD, use955* abd_borrow_buf_copy() instead.956*/957void *958abd_borrow_buf(abd_t *abd, size_t n)959{960void *buf;961abd_verify(abd);962ASSERT3U(abd->abd_size, >=, 0);963/*964* In the event the ABD is composed of a single user page from Direct965* I/O we can not direclty return the raw buffer. This is a consequence966* of not being able to write protect the page and the contents of the967* page can be changed at any time by the user.968*/969if (abd_is_from_pages(abd)) {970buf = zio_buf_alloc(n);971} else if (abd_is_linear(abd)) {972buf = abd_to_buf(abd);973} else {974buf = zio_buf_alloc(n);975}976977#ifdef ZFS_DEBUG978(void) zfs_refcount_add_many(&abd->abd_children, n, buf);979#endif980return (buf);981}982983void *984abd_borrow_buf_copy(abd_t *abd, size_t n)985{986void *buf = abd_borrow_buf(abd, n);987988/*989* In the event the ABD is composed of a single user page from Direct990* I/O we must make sure copy the data over into the newly allocated991* buffer. This is a consequence of the fact that we can not write992* protect the user page and there is a risk the contents of the page993* could be changed by the user at any moment.994*/995if (!abd_is_linear(abd) || abd_is_from_pages(abd)) {996abd_copy_to_buf(buf, abd, n);997}998return (buf);999}10001001/*1002* Return a borrowed raw buffer to an ABD. If the ABD is scatterd, this will1003* not change the contents of the ABD. If you want any changes you made to1004* buf to be copied back to abd, use abd_return_buf_copy() instead. If the1005* ABD is not constructed from user pages for Direct I/O then an ASSERT1006* checks to make sure the contents of buffer have not changed since it was1007* borrowed. We can not ASSERT that the contents of the buffer have not changed1008* if it is composed of user pages because the pages can not be placed under1009* write protection and the user could have possibly changed the contents in1010* the pages at any time. This is also an issue for Direct I/O reads. Checksum1011* verifications in the ZIO pipeline check for this issue and handle it by1012* returning an error on checksum verification failure.1013*/1014void1015abd_return_buf(abd_t *abd, void *buf, size_t n)1016{1017abd_verify(abd);1018ASSERT3U(abd->abd_size, >=, n);1019#ifdef ZFS_DEBUG1020(void) zfs_refcount_remove_many(&abd->abd_children, n, buf);1021#endif1022if (abd_is_from_pages(abd)) {1023zio_buf_free(buf, n);1024} else if (abd_is_linear(abd)) {1025ASSERT3P(buf, ==, abd_to_buf(abd));1026} else if (abd_is_gang(abd)) {1027#ifdef ZFS_DEBUG1028/*1029* We have to be careful with gang ABD's that we do not ASSERT01030* for any ABD's that contain user pages from Direct I/O. In1031* order to handle this, we just iterate through the gang ABD1032* and only verify ABDs that are not from user pages.1033*/1034void *cmp_buf = buf;10351036for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain);1037cabd != NULL;1038cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {1039if (!abd_is_from_pages(cabd)) {1040ASSERT0(abd_cmp_buf(cabd, cmp_buf,1041cabd->abd_size));1042}1043cmp_buf = (char *)cmp_buf + cabd->abd_size;1044}1045#endif1046zio_buf_free(buf, n);1047} else {1048ASSERT0(abd_cmp_buf(abd, buf, n));1049zio_buf_free(buf, n);1050}1051}10521053void1054abd_return_buf_copy(abd_t *abd, void *buf, size_t n)1055{1056if (!abd_is_linear(abd) || abd_is_from_pages(abd)) {1057abd_copy_from_buf(abd, buf, n);1058}1059abd_return_buf(abd, buf, n);1060}10611062/*1063* This is abd_iter_page(), the function underneath abd_iterate_page_func().1064* It yields the next page struct and data offset and size within it, without1065* mapping it into the address space.1066*/10671068/*1069* "Compound pages" are a group of pages that can be referenced from a single1070* struct page *. Its organised as a "head" page, followed by a series of1071* "tail" pages.1072*1073* In OpenZFS, compound pages are allocated using the __GFP_COMP flag, which we1074* get from scatter ABDs and SPL vmalloc slabs (ie >16K allocations). So a1075* great many of the IO buffers we get are going to be of this type.1076*1077* The tail pages are just regular PAGESIZE pages, and can be safely used1078* as-is. However, the head page has length covering itself and all the tail1079* pages. If the ABD chunk spans multiple pages, then we can use the head page1080* and a >PAGESIZE length, which is far more efficient.1081*1082* Before kernel 4.5 however, compound page heads were refcounted separately1083* from tail pages, such that moving back to the head page would require us to1084* take a reference to it and releasing it once we're completely finished with1085* it. In practice, that meant when our caller is done with the ABD, which we1086* have no insight into from here. Rather than contort this API to track head1087* page references on such ancient kernels, we disabled this special compound1088* page handling on kernels before 4.5, instead just using treating each page1089* within it as a regular PAGESIZE page (which it is). This is slightly less1090* efficient, but makes everything far simpler.1091*1092* We no longer support kernels before 4.5, so in theory none of this is1093* necessary. However, this code is still relatively new in the grand scheme of1094* things, so I'm leaving the ability to compile this out for the moment.1095*1096* Setting/clearing ABD_ITER_COMPOUND_PAGES below enables/disables the special1097* handling, by defining the ABD_ITER_PAGE_SIZE(page) macro to understand1098* compound pages, or not, and compiling in/out the support to detect compound1099* tail pages and move back to the start.1100*/11011102/* On by default */1103#define ABD_ITER_COMPOUND_PAGES11041105#ifdef ABD_ITER_COMPOUND_PAGES1106#define ABD_ITER_PAGE_SIZE(page) \1107(PageCompound(page) ? page_size(page) : PAGESIZE)1108#else1109#define ABD_ITER_PAGE_SIZE(page) (PAGESIZE)1110#endif11111112#ifndef nth_page1113/*1114* Since 6.18 nth_page() no longer exists, and is no longer required to iterate1115* within a single SG entry, so we replace it with a simple addition.1116*/1117#define nth_page(p, n) ((p)+(n))1118#endif11191120void1121abd_iter_page(struct abd_iter *aiter)1122{1123if (abd_iter_at_end(aiter)) {1124aiter->iter_page = NULL;1125aiter->iter_page_doff = 0;1126aiter->iter_page_dsize = 0;1127return;1128}11291130struct page *page;1131size_t doff, dsize;11321133/*1134* Find the page, and the start of the data within it. This is computed1135* differently for linear and scatter ABDs; linear is referenced by1136* virtual memory location, while scatter is referenced by page1137* pointer.1138*/1139if (abd_is_linear(aiter->iter_abd)) {1140ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);11411142/* memory address at iter_pos */1143void *paddr = ABD_LINEAR_BUF(aiter->iter_abd) + aiter->iter_pos;11441145/* struct page for address */1146page = is_vmalloc_addr(paddr) ?1147vmalloc_to_page(paddr) : virt_to_page(paddr);11481149/* offset of address within the page */1150doff = offset_in_page(paddr);1151} else {1152ASSERT(!abd_is_gang(aiter->iter_abd));11531154/* current scatter page */1155page = nth_page(sg_page(aiter->iter_sg),1156aiter->iter_offset >> PAGE_SHIFT);11571158/* position within page */1159doff = aiter->iter_offset & (PAGESIZE - 1);1160}11611162#ifdef ABD_ITER_COMPOUND_PAGES1163if (PageTail(page)) {1164/*1165* If this is a compound tail page, move back to the head, and1166* adjust the offset to match. This may let us yield a much1167* larger amount of data from a single logical page, and so1168* leave our caller with fewer pages to process.1169*/1170struct page *head = compound_head(page);1171doff += ((page - head) * PAGESIZE);1172page = head;1173}1174#endif11751176ASSERT(page);11771178/*1179* Compute the maximum amount of data we can take from this page. This1180* is the smaller of:1181* - the remaining space in the page1182* - the remaining space in this scatterlist entry (which may not cover1183* the entire page)1184* - the remaining space in the abd (which may not cover the entire1185* scatterlist entry)1186*/1187dsize = MIN(ABD_ITER_PAGE_SIZE(page) - doff,1188aiter->iter_abd->abd_size - aiter->iter_pos);1189if (!abd_is_linear(aiter->iter_abd))1190dsize = MIN(dsize, aiter->iter_sg->length - aiter->iter_offset);1191ASSERT3U(dsize, >, 0);11921193/* final iterator outputs */1194aiter->iter_page = page;1195aiter->iter_page_doff = doff;1196aiter->iter_page_dsize = dsize;1197}11981199/*1200* Note: ABD BIO functions only needed to support vdev_classic. See comments in1201* vdev_disk.c.1202*/12031204/*1205* bio_nr_pages for ABD.1206* @off is the offset in @abd1207*/1208unsigned long1209abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off)1210{1211unsigned long pos;12121213if (abd_is_gang(abd)) {1214unsigned long count = 0;12151216for (abd_t *cabd = abd_gang_get_offset(abd, &off);1217cabd != NULL && size != 0;1218cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {1219ASSERT3U(off, <, cabd->abd_size);1220int mysize = MIN(size, cabd->abd_size - off);1221count += abd_nr_pages_off(cabd, mysize, off);1222size -= mysize;1223off = 0;1224}1225return (count);1226}12271228if (abd_is_linear(abd))1229pos = (unsigned long)abd_to_buf(abd) + off;1230else1231pos = ABD_SCATTER(abd).abd_offset + off;12321233return (((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) -1234(pos >> PAGE_SHIFT));1235}12361237static unsigned int1238bio_map(struct bio *bio, void *buf_ptr, unsigned int bio_size)1239{1240unsigned int offset, size, i;1241struct page *page;12421243offset = offset_in_page(buf_ptr);1244for (i = 0; i < bio->bi_max_vecs; i++) {1245size = PAGE_SIZE - offset;12461247if (bio_size <= 0)1248break;12491250if (size > bio_size)1251size = bio_size;12521253if (is_vmalloc_addr(buf_ptr))1254page = vmalloc_to_page(buf_ptr);1255else1256page = virt_to_page(buf_ptr);12571258/*1259* Some network related block device uses tcp_sendpage, which1260* doesn't behave well when using 0-count page, this is a1261* safety net to catch them.1262*/1263ASSERT3S(page_count(page), >, 0);12641265if (bio_add_page(bio, page, size, offset) != size)1266break;12671268buf_ptr += size;1269bio_size -= size;1270offset = 0;1271}12721273return (bio_size);1274}12751276/*1277* bio_map for gang ABD.1278*/1279static unsigned int1280abd_gang_bio_map_off(struct bio *bio, abd_t *abd,1281unsigned int io_size, size_t off)1282{1283ASSERT(abd_is_gang(abd));12841285for (abd_t *cabd = abd_gang_get_offset(abd, &off);1286cabd != NULL;1287cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {1288ASSERT3U(off, <, cabd->abd_size);1289int size = MIN(io_size, cabd->abd_size - off);1290int remainder = abd_bio_map_off(bio, cabd, size, off);1291io_size -= (size - remainder);1292if (io_size == 0 || remainder > 0)1293return (io_size);1294off = 0;1295}1296ASSERT0(io_size);1297return (io_size);1298}12991300/*1301* bio_map for ABD.1302* @off is the offset in @abd1303* Remaining IO size is returned1304*/1305unsigned int1306abd_bio_map_off(struct bio *bio, abd_t *abd,1307unsigned int io_size, size_t off)1308{1309struct abd_iter aiter;13101311ASSERT3U(io_size, <=, abd->abd_size - off);1312if (abd_is_linear(abd))1313return (bio_map(bio, ((char *)abd_to_buf(abd)) + off, io_size));13141315ASSERT(!abd_is_linear(abd));1316if (abd_is_gang(abd))1317return (abd_gang_bio_map_off(bio, abd, io_size, off));13181319abd_iter_init(&aiter, abd);1320abd_iter_advance(&aiter, off);13211322for (int i = 0; i < bio->bi_max_vecs; i++) {1323struct page *pg;1324size_t len, sgoff, pgoff;1325struct scatterlist *sg;13261327if (io_size <= 0)1328break;13291330sg = aiter.iter_sg;1331sgoff = aiter.iter_offset;1332pgoff = sgoff & (PAGESIZE - 1);1333len = MIN(io_size, PAGESIZE - pgoff);1334ASSERT(len > 0);13351336pg = nth_page(sg_page(sg), sgoff >> PAGE_SHIFT);1337if (bio_add_page(bio, pg, len, pgoff) != len)1338break;13391340io_size -= len;1341abd_iter_advance(&aiter, len);1342}13431344return (io_size);1345}13461347EXPORT_SYMBOL(abd_alloc_from_pages);13481349/* Tunable Parameters */1350module_param(zfs_abd_scatter_enabled, int, 0644);1351MODULE_PARM_DESC(zfs_abd_scatter_enabled,1352"Toggle whether ABD allocations must be linear.");1353module_param(zfs_abd_scatter_min_size, int, 0644);1354MODULE_PARM_DESC(zfs_abd_scatter_min_size,1355"Minimum size of scatter allocations.");1356module_param(zfs_abd_scatter_max_order, uint, 0644);1357MODULE_PARM_DESC(zfs_abd_scatter_max_order,1358"Maximum order allocation used for a scatter ABD.");135913601361