Path: blob/main/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
107264 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/21/*22* Copyright (c) 2014 by Chunwei Chen. All rights reserved.23* Copyright (c) 2019 by Delphix. All rights reserved.24* Copyright (c) 2023, 2024, Klara Inc.25* Copyright (c) 2025, Rob Norris <[email protected]>26*/2728/*29* See abd.c for a general overview of the arc buffered data (ABD).30*31* Linear buffers act exactly like normal buffers and are always mapped into the32* kernel's virtual memory space, while scattered ABD data chunks are allocated33* as physical pages and then mapped in only while they are actually being34* accessed through one of the abd_* library functions. Using scattered ABDs35* provides several benefits:36*37* (1) They avoid use of kmem_*, preventing performance problems where running38* kmem_reap on very large memory systems never finishes and causes39* constant TLB shootdowns.40*41* (2) Fragmentation is less of an issue since when we are at the limit of42* allocatable space, we won't have to search around for a long free43* hole in the VA space for large ARC allocations. Each chunk is mapped in44* individually, so even if we are using HIGHMEM (see next point) we45* wouldn't need to worry about finding a contiguous address range.46*47* (3) If we are not using HIGHMEM, then all physical memory is always48* mapped into the kernel's address space, so we also avoid the map /49* unmap costs on each ABD access.50*51* If we are not using HIGHMEM, scattered buffers which have only one chunk52* can be treated as linear buffers, because they are contiguous in the53* kernel's virtual address space. See abd_alloc_chunks() for details.54*/5556#include <sys/abd_impl.h>57#include <sys/param.h>58#include <sys/zio.h>59#include <sys/arc.h>60#include <sys/zfs_context.h>61#include <sys/zfs_znode.h>62#include <linux/kmap_compat.h>63#include <linux/mm_compat.h>64#include <linux/scatterlist.h>65#include <linux/version.h>6667#if defined(MAX_ORDER)68#define ABD_MAX_ORDER (MAX_ORDER)69#elif defined(MAX_PAGE_ORDER)70#define ABD_MAX_ORDER (MAX_PAGE_ORDER)71#endif7273typedef struct abd_stats {74kstat_named_t abdstat_struct_size;75kstat_named_t abdstat_linear_cnt;76kstat_named_t abdstat_linear_data_size;77kstat_named_t abdstat_scatter_cnt;78kstat_named_t abdstat_scatter_data_size;79kstat_named_t abdstat_scatter_chunk_waste;80kstat_named_t abdstat_scatter_orders[ABD_MAX_ORDER];81kstat_named_t abdstat_scatter_page_multi_chunk;82kstat_named_t abdstat_scatter_page_multi_zone;83kstat_named_t abdstat_scatter_page_alloc_retry;84kstat_named_t abdstat_scatter_sg_table_retry;85} abd_stats_t;8687static abd_stats_t abd_stats = {88/* Amount of memory occupied by all of the abd_t struct allocations */89{ "struct_size", KSTAT_DATA_UINT64 },90/*91* The number of linear ABDs which are currently allocated, excluding92* ABDs which don't own their data (for instance the ones which were93* allocated through abd_get_offset() and abd_get_from_buf()). If an94* ABD takes ownership of its buf then it will become tracked.95*/96{ "linear_cnt", KSTAT_DATA_UINT64 },97/* Amount of data stored in all linear ABDs tracked by linear_cnt */98{ "linear_data_size", KSTAT_DATA_UINT64 },99/*100* The number of scatter ABDs which are currently allocated, excluding101* ABDs which don't own their data (for instance the ones which were102* allocated through abd_get_offset()).103*/104{ "scatter_cnt", KSTAT_DATA_UINT64 },105/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */106{ "scatter_data_size", KSTAT_DATA_UINT64 },107/*108* The amount of space wasted at the end of the last chunk across all109* scatter ABDs tracked by scatter_cnt.110*/111{ "scatter_chunk_waste", KSTAT_DATA_UINT64 },112/*113* The number of compound allocations of a given order. These114* allocations are spread over all currently allocated ABDs, and115* act as a measure of memory fragmentation.116*/117{ { "scatter_order_N", KSTAT_DATA_UINT64 } },118/*119* The number of scatter ABDs which contain multiple chunks.120* ABDs are preferentially allocated from the minimum number of121* contiguous multi-page chunks, a single chunk is optimal.122*/123{ "scatter_page_multi_chunk", KSTAT_DATA_UINT64 },124/*125* The number of scatter ABDs which are split across memory zones.126* ABDs are preferentially allocated using pages from a single zone.127*/128{ "scatter_page_multi_zone", KSTAT_DATA_UINT64 },129/*130* The total number of retries encountered when attempting to131* allocate the pages to populate the scatter ABD.132*/133{ "scatter_page_alloc_retry", KSTAT_DATA_UINT64 },134/*135* The total number of retries encountered when attempting to136* allocate the sg table for an ABD.137*/138{ "scatter_sg_table_retry", KSTAT_DATA_UINT64 },139};140141static struct {142wmsum_t abdstat_struct_size;143wmsum_t abdstat_linear_cnt;144wmsum_t abdstat_linear_data_size;145wmsum_t abdstat_scatter_cnt;146wmsum_t abdstat_scatter_data_size;147wmsum_t abdstat_scatter_chunk_waste;148wmsum_t abdstat_scatter_orders[ABD_MAX_ORDER];149wmsum_t abdstat_scatter_page_multi_chunk;150wmsum_t abdstat_scatter_page_multi_zone;151wmsum_t abdstat_scatter_page_alloc_retry;152wmsum_t abdstat_scatter_sg_table_retry;153} abd_sums;154155#define abd_for_each_sg(abd, sg, n, i) \156for_each_sg(ABD_SCATTER(abd).abd_sgl, sg, n, i)157158/*159* zfs_abd_scatter_min_size is the minimum allocation size to use scatter160* ABD's. Smaller allocations will use linear ABD's which uses161* zio_[data_]buf_alloc().162*163* Scatter ABD's use at least one page each, so sub-page allocations waste164* some space when allocated as scatter (e.g. 2KB scatter allocation wastes165* half of each page). Using linear ABD's for small allocations means that166* they will be put on slabs which contain many allocations. This can167* improve memory efficiency, but it also makes it much harder for ARC168* evictions to actually free pages, because all the buffers on one slab need169* to be freed in order for the slab (and underlying pages) to be freed.170* Typically, 512B and 1KB kmem caches have 16 buffers per slab, so it's171* possible for them to actually waste more memory than scatter (one page per172* buf = wasting 3/4 or 7/8th; one buf per slab = wasting 15/16th).173*174* Spill blocks are typically 512B and are heavily used on systems running175* selinux with the default dnode size and the `xattr=sa` property set.176*177* By default we use linear allocations for 512B and 1KB, and scatter178* allocations for larger (1.5KB and up).179*/180static int zfs_abd_scatter_min_size = 512 * 3;181182/*183* We use a scattered SPA_MAXBLOCKSIZE sized ABD whose pages are184* just a single zero'd page. This allows us to conserve memory by185* only using a single zero page for the scatterlist.186*/187abd_t *abd_zero_scatter = NULL;188189struct page;190191/*192* abd_zero_page is assigned to each of the pages of abd_zero_scatter. It will193* point to ZERO_PAGE if it is available or it will be an allocated zero'd194* PAGESIZE buffer.195*/196static struct page *abd_zero_page = NULL;197198static kmem_cache_t *abd_cache = NULL;199static kstat_t *abd_ksp;200201static uint_t202abd_chunkcnt_for_bytes(size_t size)203{204return (P2ROUNDUP(size, PAGESIZE) / PAGESIZE);205}206207abd_t *208abd_alloc_struct_impl(size_t size)209{210/*211* In Linux we do not use the size passed in during ABD212* allocation, so we just ignore it.213*/214(void) size;215abd_t *abd = kmem_cache_alloc(abd_cache, KM_PUSHPAGE);216ASSERT3P(abd, !=, NULL);217ABDSTAT_INCR(abdstat_struct_size, sizeof (abd_t));218219return (abd);220}221222void223abd_free_struct_impl(abd_t *abd)224{225kmem_cache_free(abd_cache, abd);226ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t));227}228229static unsigned zfs_abd_scatter_max_order = ABD_MAX_ORDER - 1;230231/*232* Mark zfs data pages so they can be excluded from kernel crash dumps233*/234#ifdef _LP64235#define ABD_FILE_CACHE_PAGE 0x2F5ABDF11ECAC4E236237static inline void238abd_mark_zfs_page(struct page *page)239{240get_page(page);241SetPagePrivate(page);242set_page_private(page, ABD_FILE_CACHE_PAGE);243}244245static inline void246abd_unmark_zfs_page(struct page *page)247{248set_page_private(page, 0UL);249ClearPagePrivate(page);250put_page(page);251}252#else253#define abd_mark_zfs_page(page)254#define abd_unmark_zfs_page(page)255#endif /* _LP64 */256257#ifndef CONFIG_HIGHMEM258259/*260* The goal is to minimize fragmentation by preferentially populating ABDs261* with higher order compound pages from a single zone. Allocation size is262* progressively decreased until it can be satisfied without performing263* reclaim or compaction. When necessary this function will degenerate to264* allocating individual pages and allowing reclaim to satisfy allocations.265*/266void267abd_alloc_chunks(abd_t *abd, size_t size)268{269struct list_head pages;270struct sg_table table;271struct scatterlist *sg;272struct page *page, *tmp_page = NULL;273gfp_t gfp = __GFP_RECLAIMABLE | __GFP_NOWARN | GFP_NOIO;274gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM;275unsigned int max_order = MIN(zfs_abd_scatter_max_order,276ABD_MAX_ORDER - 1);277unsigned int nr_pages = abd_chunkcnt_for_bytes(size);278unsigned int chunks = 0, zones = 0;279size_t remaining_size;280int nid = NUMA_NO_NODE;281unsigned int alloc_pages = 0;282283INIT_LIST_HEAD(&pages);284285ASSERT3U(alloc_pages, <, nr_pages);286287while (alloc_pages < nr_pages) {288unsigned int chunk_pages;289unsigned int order;290291order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order);292chunk_pages = (1U << order);293294page = alloc_pages_node(nid, order ? gfp_comp : gfp, order);295if (page == NULL) {296if (order == 0) {297ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);298schedule_timeout_interruptible(1);299} else {300max_order = MAX(0, order - 1);301}302continue;303}304305list_add_tail(&page->lru, &pages);306307if ((nid != NUMA_NO_NODE) && (page_to_nid(page) != nid))308zones++;309310nid = page_to_nid(page);311ABDSTAT_BUMP(abdstat_scatter_orders[order]);312chunks++;313alloc_pages += chunk_pages;314}315316ASSERT3S(alloc_pages, ==, nr_pages);317318while (sg_alloc_table(&table, chunks, gfp)) {319ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);320schedule_timeout_interruptible(1);321}322323sg = table.sgl;324remaining_size = size;325list_for_each_entry_safe(page, tmp_page, &pages, lru) {326size_t sg_size = MIN(PAGESIZE << compound_order(page),327remaining_size);328sg_set_page(sg, page, sg_size, 0);329abd_mark_zfs_page(page);330remaining_size -= sg_size;331332sg = sg_next(sg);333list_del(&page->lru);334}335336/*337* These conditions ensure that a possible transformation to a linear338* ABD would be valid.339*/340ASSERT(!PageHighMem(sg_page(table.sgl)));341ASSERT0(ABD_SCATTER(abd).abd_offset);342343if (table.nents == 1) {344/*345* Since there is only one entry, this ABD can be represented346* as a linear buffer. All single-page (4K) ABD's can be347* represented this way. Some multi-page ABD's can also be348* represented this way, if we were able to allocate a single349* "chunk" (higher-order "page" which represents a power-of-2350* series of physically-contiguous pages). This is often the351* case for 2-page (8K) ABD's.352*353* Representing a single-entry scatter ABD as a linear ABD354* has the performance advantage of avoiding the copy (and355* allocation) in abd_borrow_buf_copy / abd_return_buf_copy.356* A performance increase of around 5% has been observed for357* ARC-cached reads (of small blocks which can take advantage358* of this).359*360* Note that this optimization is only possible because the361* pages are always mapped into the kernel's address space.362* This is not the case for highmem pages, so the363* optimization can not be made there.364*/365abd->abd_flags |= ABD_FLAG_LINEAR;366abd->abd_flags |= ABD_FLAG_LINEAR_PAGE;367abd->abd_u.abd_linear.abd_sgl = table.sgl;368ABD_LINEAR_BUF(abd) = page_address(sg_page(table.sgl));369} else if (table.nents > 1) {370ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);371abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;372373if (zones) {374ABDSTAT_BUMP(abdstat_scatter_page_multi_zone);375abd->abd_flags |= ABD_FLAG_MULTI_ZONE;376}377378ABD_SCATTER(abd).abd_sgl = table.sgl;379ABD_SCATTER(abd).abd_nents = table.nents;380}381}382#else383384/*385* Allocate N individual pages to construct a scatter ABD. This function386* makes no attempt to request contiguous pages and requires the minimal387* number of kernel interfaces. It's designed for maximum compatibility.388*/389void390abd_alloc_chunks(abd_t *abd, size_t size)391{392struct scatterlist *sg = NULL;393struct sg_table table;394struct page *page;395gfp_t gfp = __GFP_RECLAIMABLE | __GFP_NOWARN | GFP_NOIO;396int nr_pages = abd_chunkcnt_for_bytes(size);397int i = 0;398399while (sg_alloc_table(&table, nr_pages, gfp)) {400ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);401schedule_timeout_interruptible(1);402}403404ASSERT3U(table.nents, ==, nr_pages);405ABD_SCATTER(abd).abd_sgl = table.sgl;406ABD_SCATTER(abd).abd_nents = nr_pages;407408abd_for_each_sg(abd, sg, nr_pages, i) {409while ((page = __page_cache_alloc(gfp)) == NULL) {410ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);411schedule_timeout_interruptible(1);412}413414ABDSTAT_BUMP(abdstat_scatter_orders[0]);415sg_set_page(sg, page, PAGESIZE, 0);416abd_mark_zfs_page(page);417}418419if (nr_pages > 1) {420ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);421abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;422}423}424#endif /* !CONFIG_HIGHMEM */425426/*427* This must be called if any of the sg_table allocation functions428* are called.429*/430static void431abd_free_sg_table(abd_t *abd)432{433struct sg_table table;434435table.sgl = ABD_SCATTER(abd).abd_sgl;436table.nents = table.orig_nents = ABD_SCATTER(abd).abd_nents;437sg_free_table(&table);438}439440void441abd_free_chunks(abd_t *abd)442{443struct scatterlist *sg = NULL;444struct page *page;445int nr_pages = ABD_SCATTER(abd).abd_nents;446int order, i = 0;447448if (abd->abd_flags & ABD_FLAG_MULTI_ZONE)449ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_zone);450451if (abd->abd_flags & ABD_FLAG_MULTI_CHUNK)452ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk);453454/*455* Scatter ABDs may be constructed by abd_alloc_from_pages() from456* an array of pages. In which case they should not be freed.457*/458if (!abd_is_from_pages(abd)) {459abd_for_each_sg(abd, sg, nr_pages, i) {460page = sg_page(sg);461abd_unmark_zfs_page(page);462order = compound_order(page);463__free_pages(page, order);464ASSERT3U(sg->length, <=, PAGE_SIZE << order);465ABDSTAT_BUMPDOWN(abdstat_scatter_orders[order]);466}467}468469abd_free_sg_table(abd);470}471472/*473* Allocate scatter ABD of size SPA_MAXBLOCKSIZE, where each page in474* the scatterlist will be set to the zero'd out buffer abd_zero_page.475*/476static void477abd_alloc_zero_scatter(void)478{479struct scatterlist *sg = NULL;480struct sg_table table;481gfp_t gfp = __GFP_NOWARN | GFP_NOIO;482int nr_pages = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);483int i = 0;484485#if defined(HAVE_ZERO_PAGE_GPL_ONLY)486gfp_t gfp_zero_page = gfp | __GFP_ZERO;487while ((abd_zero_page = __page_cache_alloc(gfp_zero_page)) == NULL) {488ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);489schedule_timeout_interruptible(1);490}491abd_mark_zfs_page(abd_zero_page);492#else493abd_zero_page = ZERO_PAGE(0);494#endif /* HAVE_ZERO_PAGE_GPL_ONLY */495496while (sg_alloc_table(&table, nr_pages, gfp)) {497ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);498schedule_timeout_interruptible(1);499}500ASSERT3U(table.nents, ==, nr_pages);501502abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);503abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER;504ABD_SCATTER(abd_zero_scatter).abd_offset = 0;505ABD_SCATTER(abd_zero_scatter).abd_sgl = table.sgl;506ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages;507abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;508abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK;509510abd_for_each_sg(abd_zero_scatter, sg, nr_pages, i) {511sg_set_page(sg, abd_zero_page, PAGESIZE, 0);512}513514ABDSTAT_BUMP(abdstat_scatter_cnt);515ABDSTAT_INCR(abdstat_scatter_data_size, PAGESIZE);516ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);517}518519boolean_t520abd_size_alloc_linear(size_t size)521{522return (!zfs_abd_scatter_enabled || size < zfs_abd_scatter_min_size);523}524525void526abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)527{528ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);529int waste = P2ROUNDUP(abd->abd_size, PAGESIZE) - abd->abd_size;530if (op == ABDSTAT_INCR) {531ABDSTAT_BUMP(abdstat_scatter_cnt);532ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);533ABDSTAT_INCR(abdstat_scatter_chunk_waste, waste);534arc_space_consume(waste, ARC_SPACE_ABD_CHUNK_WASTE);535} else {536ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);537ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);538ABDSTAT_INCR(abdstat_scatter_chunk_waste, -waste);539arc_space_return(waste, ARC_SPACE_ABD_CHUNK_WASTE);540}541}542543void544abd_update_linear_stats(abd_t *abd, abd_stats_op_t op)545{546ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);547if (op == ABDSTAT_INCR) {548ABDSTAT_BUMP(abdstat_linear_cnt);549ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);550} else {551ABDSTAT_BUMPDOWN(abdstat_linear_cnt);552ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);553}554}555556void557abd_verify_scatter(abd_t *abd)558{559ASSERT3U(ABD_SCATTER(abd).abd_nents, >, 0);560ASSERT3U(ABD_SCATTER(abd).abd_offset, <,561ABD_SCATTER(abd).abd_sgl->length);562563#ifdef ZFS_DEBUG564struct scatterlist *sg = NULL;565size_t n = ABD_SCATTER(abd).abd_nents;566int i = 0;567568abd_for_each_sg(abd, sg, n, i) {569ASSERT3P(sg_page(sg), !=, NULL);570}571#endif572}573574static void575abd_free_zero_scatter(void)576{577ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);578ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGESIZE);579ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk);580581abd_free_sg_table(abd_zero_scatter);582abd_free_struct(abd_zero_scatter);583abd_zero_scatter = NULL;584ASSERT3P(abd_zero_page, !=, NULL);585#if defined(HAVE_ZERO_PAGE_GPL_ONLY)586abd_unmark_zfs_page(abd_zero_page);587__free_page(abd_zero_page);588#endif /* HAVE_ZERO_PAGE_GPL_ONLY */589}590591static int592abd_kstats_update(kstat_t *ksp, int rw)593{594abd_stats_t *as = ksp->ks_data;595596if (rw == KSTAT_WRITE)597return (EACCES);598as->abdstat_struct_size.value.ui64 =599wmsum_value(&abd_sums.abdstat_struct_size);600as->abdstat_linear_cnt.value.ui64 =601wmsum_value(&abd_sums.abdstat_linear_cnt);602as->abdstat_linear_data_size.value.ui64 =603wmsum_value(&abd_sums.abdstat_linear_data_size);604as->abdstat_scatter_cnt.value.ui64 =605wmsum_value(&abd_sums.abdstat_scatter_cnt);606as->abdstat_scatter_data_size.value.ui64 =607wmsum_value(&abd_sums.abdstat_scatter_data_size);608as->abdstat_scatter_chunk_waste.value.ui64 =609wmsum_value(&abd_sums.abdstat_scatter_chunk_waste);610for (int i = 0; i < ABD_MAX_ORDER; i++) {611as->abdstat_scatter_orders[i].value.ui64 =612wmsum_value(&abd_sums.abdstat_scatter_orders[i]);613}614as->abdstat_scatter_page_multi_chunk.value.ui64 =615wmsum_value(&abd_sums.abdstat_scatter_page_multi_chunk);616as->abdstat_scatter_page_multi_zone.value.ui64 =617wmsum_value(&abd_sums.abdstat_scatter_page_multi_zone);618as->abdstat_scatter_page_alloc_retry.value.ui64 =619wmsum_value(&abd_sums.abdstat_scatter_page_alloc_retry);620as->abdstat_scatter_sg_table_retry.value.ui64 =621wmsum_value(&abd_sums.abdstat_scatter_sg_table_retry);622return (0);623}624625void626abd_init(void)627{628int i;629630abd_cache = kmem_cache_create("abd_t", sizeof (abd_t),6310, NULL, NULL, NULL, NULL, NULL, KMC_RECLAIMABLE);632633wmsum_init(&abd_sums.abdstat_struct_size, 0);634wmsum_init(&abd_sums.abdstat_linear_cnt, 0);635wmsum_init(&abd_sums.abdstat_linear_data_size, 0);636wmsum_init(&abd_sums.abdstat_scatter_cnt, 0);637wmsum_init(&abd_sums.abdstat_scatter_data_size, 0);638wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0);639for (i = 0; i < ABD_MAX_ORDER; i++)640wmsum_init(&abd_sums.abdstat_scatter_orders[i], 0);641wmsum_init(&abd_sums.abdstat_scatter_page_multi_chunk, 0);642wmsum_init(&abd_sums.abdstat_scatter_page_multi_zone, 0);643wmsum_init(&abd_sums.abdstat_scatter_page_alloc_retry, 0);644wmsum_init(&abd_sums.abdstat_scatter_sg_table_retry, 0);645646abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,647sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);648if (abd_ksp != NULL) {649for (i = 0; i < ABD_MAX_ORDER; i++) {650snprintf(abd_stats.abdstat_scatter_orders[i].name,651KSTAT_STRLEN, "scatter_order_%d", i);652abd_stats.abdstat_scatter_orders[i].data_type =653KSTAT_DATA_UINT64;654}655abd_ksp->ks_data = &abd_stats;656abd_ksp->ks_update = abd_kstats_update;657kstat_install(abd_ksp);658}659660abd_alloc_zero_scatter();661}662663void664abd_fini(void)665{666abd_free_zero_scatter();667668if (abd_ksp != NULL) {669kstat_delete(abd_ksp);670abd_ksp = NULL;671}672673wmsum_fini(&abd_sums.abdstat_struct_size);674wmsum_fini(&abd_sums.abdstat_linear_cnt);675wmsum_fini(&abd_sums.abdstat_linear_data_size);676wmsum_fini(&abd_sums.abdstat_scatter_cnt);677wmsum_fini(&abd_sums.abdstat_scatter_data_size);678wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste);679for (int i = 0; i < ABD_MAX_ORDER; i++)680wmsum_fini(&abd_sums.abdstat_scatter_orders[i]);681wmsum_fini(&abd_sums.abdstat_scatter_page_multi_chunk);682wmsum_fini(&abd_sums.abdstat_scatter_page_multi_zone);683wmsum_fini(&abd_sums.abdstat_scatter_page_alloc_retry);684wmsum_fini(&abd_sums.abdstat_scatter_sg_table_retry);685686if (abd_cache) {687kmem_cache_destroy(abd_cache);688abd_cache = NULL;689}690}691692void693abd_free_linear_page(abd_t *abd)694{695/* Transform it back into a scatter ABD for freeing */696struct scatterlist *sg = abd->abd_u.abd_linear.abd_sgl;697698/* When backed by user page unmap it */699if (abd_is_from_pages(abd))700zfs_kunmap(sg_page(sg));701else702abd_update_scatter_stats(abd, ABDSTAT_DECR);703704abd->abd_flags &= ~ABD_FLAG_LINEAR;705abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE;706ABD_SCATTER(abd).abd_nents = 1;707ABD_SCATTER(abd).abd_offset = 0;708ABD_SCATTER(abd).abd_sgl = sg;709abd_free_chunks(abd);710}711712/*713* Allocate a scatter ABD structure from user pages. The pages must be714* pinned with get_user_pages, or similiar, but need not be mapped via715* the kmap interfaces.716*/717abd_t *718abd_alloc_from_pages(struct page **pages, unsigned long offset, uint64_t size)719{720uint_t npages = DIV_ROUND_UP(size, PAGE_SIZE);721struct sg_table table;722723VERIFY3U(size, <=, DMU_MAX_ACCESS);724ASSERT3U(offset, <, PAGE_SIZE);725ASSERT3P(pages, !=, NULL);726727/*728* Even if this buf is filesystem metadata, we only track that we729* own the underlying data buffer, which is not true in this case.730* Therefore, we don't ever use ABD_FLAG_META here.731*/732abd_t *abd = abd_alloc_struct(0);733abd->abd_flags |= ABD_FLAG_FROM_PAGES | ABD_FLAG_OWNER;734abd->abd_size = size;735736while (sg_alloc_table_from_pages(&table, pages, npages, offset,737size, __GFP_NOWARN | GFP_NOIO) != 0) {738ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);739schedule_timeout_interruptible(1);740}741742if ((offset + size) <= PAGE_SIZE) {743/*744* Since there is only one entry, this ABD can be represented745* as a linear buffer. All single-page (4K) ABD's constructed746* from a user page can be represented this way as long as the747* page is mapped to a virtual address. This allows us to748* apply an offset in to the mapped page.749*750* Note that kmap() must be used, not kmap_atomic(), because751* the mapping needs to bet set up on all CPUs. Using kmap()752* also enables the user of highmem pages when required.753*/754abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_LINEAR_PAGE;755abd->abd_u.abd_linear.abd_sgl = table.sgl;756zfs_kmap(sg_page(table.sgl));757ABD_LINEAR_BUF(abd) = sg_virt(table.sgl);758} else {759ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);760abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;761762ABD_SCATTER(abd).abd_offset = offset;763ABD_SCATTER(abd).abd_sgl = table.sgl;764ABD_SCATTER(abd).abd_nents = table.nents;765766ASSERT0(ABD_SCATTER(abd).abd_offset);767}768769return (abd);770}771772/*773* If we're going to use this ABD for doing I/O using the block layer, the774* consumer of the ABD data doesn't care if it's scattered or not, and we don't775* plan to store this ABD in memory for a long period of time, we should776* allocate the ABD type that requires the least data copying to do the I/O.777*778* On Linux the optimal thing to do would be to use abd_get_offset() and779* construct a new ABD which shares the original pages thereby eliminating780* the copy. But for the moment a new linear ABD is allocated until this781* performance optimization can be implemented.782*/783abd_t *784abd_alloc_for_io(size_t size, boolean_t is_metadata)785{786return (abd_alloc(size, is_metadata));787}788789abd_t *790abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,791size_t size)792{793(void) size;794int i = 0;795struct scatterlist *sg = NULL;796797abd_verify(sabd);798ASSERT3U(off, <=, sabd->abd_size);799800size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;801802if (abd == NULL)803abd = abd_alloc_struct(0);804805/*806* Even if this buf is filesystem metadata, we only track that807* if we own the underlying data buffer, which is not true in808* this case. Therefore, we don't ever use ABD_FLAG_META here.809*/810811abd_for_each_sg(sabd, sg, ABD_SCATTER(sabd).abd_nents, i) {812if (new_offset < sg->length)813break;814new_offset -= sg->length;815}816817ABD_SCATTER(abd).abd_sgl = sg;818ABD_SCATTER(abd).abd_offset = new_offset;819ABD_SCATTER(abd).abd_nents = ABD_SCATTER(sabd).abd_nents - i;820821if (abd_is_from_pages(sabd))822abd->abd_flags |= ABD_FLAG_FROM_PAGES;823824return (abd);825}826827/*828* Initialize the abd_iter.829*/830void831abd_iter_init(struct abd_iter *aiter, abd_t *abd)832{833ASSERT(!abd_is_gang(abd));834abd_verify(abd);835memset(aiter, 0, sizeof (struct abd_iter));836aiter->iter_abd = abd;837if (!abd_is_linear(abd)) {838aiter->iter_offset = ABD_SCATTER(abd).abd_offset;839aiter->iter_sg = ABD_SCATTER(abd).abd_sgl;840}841}842843/*844* This is just a helper function to see if we have exhausted the845* abd_iter and reached the end.846*/847boolean_t848abd_iter_at_end(struct abd_iter *aiter)849{850ASSERT3U(aiter->iter_pos, <=, aiter->iter_abd->abd_size);851return (aiter->iter_pos == aiter->iter_abd->abd_size);852}853854/*855* Advance the iterator by a certain amount. Cannot be called when a chunk is856* in use. This can be safely called when the aiter has already exhausted, in857* which case this does nothing.858*/859void860abd_iter_advance(struct abd_iter *aiter, size_t amount)861{862/*863* Ensure that last chunk is not in use. abd_iterate_*() must clear864* this state (directly or abd_iter_unmap()) before advancing.865*/866ASSERT0P(aiter->iter_mapaddr);867ASSERT0(aiter->iter_mapsize);868ASSERT0P(aiter->iter_page);869ASSERT0(aiter->iter_page_doff);870ASSERT0(aiter->iter_page_dsize);871872/* There's nothing left to advance to, so do nothing */873if (abd_iter_at_end(aiter))874return;875876aiter->iter_pos += amount;877aiter->iter_offset += amount;878if (!abd_is_linear(aiter->iter_abd)) {879while (aiter->iter_offset >= aiter->iter_sg->length) {880aiter->iter_offset -= aiter->iter_sg->length;881aiter->iter_sg = sg_next(aiter->iter_sg);882if (aiter->iter_sg == NULL) {883ASSERT0(aiter->iter_offset);884break;885}886}887}888}889890#ifndef nth_page891/*892* Since 6.18 nth_page() no longer exists, and is no longer required to iterate893* within a single SG entry, so we replace it with a simple addition.894*/895#define nth_page(p, n) ((p)+(n))896#endif897898/*899* Map the current chunk into aiter. This can be safely called when the aiter900* has already exhausted, in which case this does nothing.901*/902void903abd_iter_map(struct abd_iter *aiter)904{905void *paddr;906size_t offset = 0;907908ASSERT0P(aiter->iter_mapaddr);909ASSERT0(aiter->iter_mapsize);910911/* There's nothing left to iterate over, so do nothing */912if (abd_iter_at_end(aiter))913return;914915if (abd_is_linear(aiter->iter_abd)) {916ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);917offset = aiter->iter_offset;918aiter->iter_mapsize = aiter->iter_abd->abd_size - offset;919paddr = ABD_LINEAR_BUF(aiter->iter_abd);920} else {921offset = aiter->iter_offset;922aiter->iter_mapsize = MIN(aiter->iter_sg->length - offset,923aiter->iter_abd->abd_size - aiter->iter_pos);924925struct page *page = sg_page(aiter->iter_sg);926if (PageHighMem(page)) {927page = nth_page(page, offset / PAGE_SIZE);928offset &= PAGE_SIZE - 1;929aiter->iter_mapsize = MIN(aiter->iter_mapsize,930PAGE_SIZE - offset);931}932paddr = zfs_kmap_local(page);933}934935aiter->iter_mapaddr = (char *)paddr + offset;936}937938/*939* Unmap the current chunk from aiter. This can be safely called when the aiter940* has already exhausted, in which case this does nothing.941*/942void943abd_iter_unmap(struct abd_iter *aiter)944{945/* There's nothing left to unmap, so do nothing */946if (abd_iter_at_end(aiter))947return;948949if (!abd_is_linear(aiter->iter_abd)) {950size_t offset = aiter->iter_offset;951952struct page *page = sg_page(aiter->iter_sg);953if (PageHighMem(page))954offset &= PAGE_SIZE - 1;955956/* LINTED E_FUNC_SET_NOT_USED */957zfs_kunmap_local(aiter->iter_mapaddr - offset);958}959960ASSERT3P(aiter->iter_mapaddr, !=, NULL);961ASSERT3U(aiter->iter_mapsize, >, 0);962963aiter->iter_mapaddr = NULL;964aiter->iter_mapsize = 0;965}966967void968abd_cache_reap_now(void)969{970}971972/*973* Borrow a raw buffer from an ABD without copying the contents of the ABD974* into the buffer. If the ABD is scattered, this will allocate a raw buffer975* whose contents are undefined. To copy over the existing data in the ABD, use976* abd_borrow_buf_copy() instead.977*/978void *979abd_borrow_buf(abd_t *abd, size_t n)980{981void *buf;982abd_verify(abd);983ASSERT3U(abd->abd_size, >=, 0);984/*985* In the event the ABD is composed of a single user page from Direct986* I/O we can not direclty return the raw buffer. This is a consequence987* of not being able to write protect the page and the contents of the988* page can be changed at any time by the user.989*/990if (abd_is_from_pages(abd)) {991buf = zio_buf_alloc(n);992} else if (abd_is_linear(abd)) {993buf = abd_to_buf(abd);994} else {995buf = zio_buf_alloc(n);996}997998#ifdef ZFS_DEBUG999(void) zfs_refcount_add_many(&abd->abd_children, n, buf);1000#endif1001return (buf);1002}10031004void *1005abd_borrow_buf_copy(abd_t *abd, size_t n)1006{1007void *buf = abd_borrow_buf(abd, n);10081009/*1010* In the event the ABD is composed of a single user page from Direct1011* I/O we must make sure copy the data over into the newly allocated1012* buffer. This is a consequence of the fact that we can not write1013* protect the user page and there is a risk the contents of the page1014* could be changed by the user at any moment.1015*/1016if (!abd_is_linear(abd) || abd_is_from_pages(abd)) {1017abd_copy_to_buf(buf, abd, n);1018}1019return (buf);1020}10211022/*1023* Return a borrowed raw buffer to an ABD. If the ABD is scatterd, this will1024* not change the contents of the ABD. If you want any changes you made to1025* buf to be copied back to abd, use abd_return_buf_copy() instead. If the1026* ABD is not constructed from user pages for Direct I/O then an ASSERT1027* checks to make sure the contents of buffer have not changed since it was1028* borrowed. We can not ASSERT that the contents of the buffer have not changed1029* if it is composed of user pages because the pages can not be placed under1030* write protection and the user could have possibly changed the contents in1031* the pages at any time. This is also an issue for Direct I/O reads. Checksum1032* verifications in the ZIO pipeline check for this issue and handle it by1033* returning an error on checksum verification failure.1034*/1035void1036abd_return_buf(abd_t *abd, void *buf, size_t n)1037{1038abd_verify(abd);1039ASSERT3U(abd->abd_size, >=, n);1040#ifdef ZFS_DEBUG1041(void) zfs_refcount_remove_many(&abd->abd_children, n, buf);1042#endif1043if (abd_is_from_pages(abd)) {1044zio_buf_free(buf, n);1045} else if (abd_is_linear(abd)) {1046ASSERT3P(buf, ==, abd_to_buf(abd));1047} else if (abd_is_gang(abd)) {1048#ifdef ZFS_DEBUG1049/*1050* We have to be careful with gang ABD's that we do not ASSERT01051* for any ABD's that contain user pages from Direct I/O. In1052* order to handle this, we just iterate through the gang ABD1053* and only verify ABDs that are not from user pages.1054*/1055void *cmp_buf = buf;10561057for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain);1058cabd != NULL;1059cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {1060if (!abd_is_from_pages(cabd)) {1061ASSERT0(abd_cmp_buf(cabd, cmp_buf,1062cabd->abd_size));1063}1064cmp_buf = (char *)cmp_buf + cabd->abd_size;1065}1066#endif1067zio_buf_free(buf, n);1068} else {1069ASSERT0(abd_cmp_buf(abd, buf, n));1070zio_buf_free(buf, n);1071}1072}10731074void1075abd_return_buf_copy(abd_t *abd, void *buf, size_t n)1076{1077if (!abd_is_linear(abd) || abd_is_from_pages(abd)) {1078abd_copy_from_buf(abd, buf, n);1079}1080abd_return_buf(abd, buf, n);1081}10821083/*1084* This is abd_iter_page(), the function underneath abd_iterate_page_func().1085* It yields the next page struct and data offset and size within it, without1086* mapping it into the address space.1087*/10881089/*1090* "Compound pages" are a group of pages that can be referenced from a single1091* struct page *. Its organised as a "head" page, followed by a series of1092* "tail" pages.1093*1094* In OpenZFS, compound pages are allocated using the __GFP_COMP flag, which we1095* get from scatter ABDs and SPL vmalloc slabs (ie >16K allocations). So a1096* great many of the IO buffers we get are going to be of this type.1097*1098* The tail pages are just regular PAGESIZE pages, and can be safely used1099* as-is. However, the head page has length covering itself and all the tail1100* pages. If the ABD chunk spans multiple pages, then we can use the head page1101* and a >PAGESIZE length, which is far more efficient.1102*1103* Before kernel 4.5 however, compound page heads were refcounted separately1104* from tail pages, such that moving back to the head page would require us to1105* take a reference to it and releasing it once we're completely finished with1106* it. In practice, that meant when our caller is done with the ABD, which we1107* have no insight into from here. Rather than contort this API to track head1108* page references on such ancient kernels, we disabled this special compound1109* page handling on kernels before 4.5, instead just using treating each page1110* within it as a regular PAGESIZE page (which it is). This is slightly less1111* efficient, but makes everything far simpler.1112*1113* We no longer support kernels before 4.5, so in theory none of this is1114* necessary. However, this code is still relatively new in the grand scheme of1115* things, so I'm leaving the ability to compile this out for the moment.1116*1117* Setting/clearing ABD_ITER_COMPOUND_PAGES below enables/disables the special1118* handling, by defining the ABD_ITER_PAGE_SIZE(page) macro to understand1119* compound pages, or not, and compiling in/out the support to detect compound1120* tail pages and move back to the start.1121*/11221123/* On by default */1124#define ABD_ITER_COMPOUND_PAGES11251126#ifdef ABD_ITER_COMPOUND_PAGES1127#define ABD_ITER_PAGE_SIZE(page) \1128(PageCompound(page) ? page_size(page) : PAGESIZE)1129#else1130#define ABD_ITER_PAGE_SIZE(page) (PAGESIZE)1131#endif11321133void1134abd_iter_page(struct abd_iter *aiter)1135{1136if (abd_iter_at_end(aiter)) {1137aiter->iter_page = NULL;1138aiter->iter_page_doff = 0;1139aiter->iter_page_dsize = 0;1140return;1141}11421143struct page *page;1144size_t doff, dsize;11451146/*1147* Find the page, and the start of the data within it. This is computed1148* differently for linear and scatter ABDs; linear is referenced by1149* virtual memory location, while scatter is referenced by page1150* pointer.1151*/1152if (abd_is_linear(aiter->iter_abd)) {1153ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);11541155/* memory address at iter_pos */1156void *paddr = ABD_LINEAR_BUF(aiter->iter_abd) + aiter->iter_pos;11571158/* struct page for address */1159page = is_vmalloc_addr(paddr) ?1160vmalloc_to_page(paddr) : virt_to_page(paddr);11611162/* offset of address within the page */1163doff = offset_in_page(paddr);1164} else {1165ASSERT(!abd_is_gang(aiter->iter_abd));11661167/* current scatter page */1168page = nth_page(sg_page(aiter->iter_sg),1169aiter->iter_offset >> PAGE_SHIFT);11701171/* position within page */1172doff = aiter->iter_offset & (PAGESIZE - 1);1173}11741175#ifdef ABD_ITER_COMPOUND_PAGES1176if (PageTail(page)) {1177/*1178* If this is a compound tail page, move back to the head, and1179* adjust the offset to match. This may let us yield a much1180* larger amount of data from a single logical page, and so1181* leave our caller with fewer pages to process.1182*/1183struct page *head = compound_head(page);1184doff += ((page - head) * PAGESIZE);1185page = head;1186}1187#endif11881189ASSERT(page);11901191/*1192* Compute the maximum amount of data we can take from this page. This1193* is the smaller of:1194* - the remaining space in the page1195* - the remaining space in this scatterlist entry (which may not cover1196* the entire page)1197* - the remaining space in the abd (which may not cover the entire1198* scatterlist entry)1199*/1200dsize = MIN(ABD_ITER_PAGE_SIZE(page) - doff,1201aiter->iter_abd->abd_size - aiter->iter_pos);1202if (!abd_is_linear(aiter->iter_abd))1203dsize = MIN(dsize, aiter->iter_sg->length - aiter->iter_offset);1204ASSERT3U(dsize, >, 0);12051206/* final iterator outputs */1207aiter->iter_page = page;1208aiter->iter_page_doff = doff;1209aiter->iter_page_dsize = dsize;1210}12111212/*1213* Note: ABD BIO functions only needed to support vdev_classic. See comments in1214* vdev_disk.c.1215*/12161217/*1218* bio_nr_pages for ABD.1219* @off is the offset in @abd1220*/1221unsigned long1222abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off)1223{1224unsigned long pos;12251226if (abd_is_gang(abd)) {1227unsigned long count = 0;12281229for (abd_t *cabd = abd_gang_get_offset(abd, &off);1230cabd != NULL && size != 0;1231cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {1232ASSERT3U(off, <, cabd->abd_size);1233int mysize = MIN(size, cabd->abd_size - off);1234count += abd_nr_pages_off(cabd, mysize, off);1235size -= mysize;1236off = 0;1237}1238return (count);1239}12401241if (abd_is_linear(abd))1242pos = (unsigned long)abd_to_buf(abd) + off;1243else1244pos = ABD_SCATTER(abd).abd_offset + off;12451246return (((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) -1247(pos >> PAGE_SHIFT));1248}12491250static unsigned int1251bio_map(struct bio *bio, void *buf_ptr, unsigned int bio_size)1252{1253unsigned int offset, size, i;1254struct page *page;12551256offset = offset_in_page(buf_ptr);1257for (i = 0; i < bio->bi_max_vecs; i++) {1258size = PAGE_SIZE - offset;12591260if (bio_size <= 0)1261break;12621263if (size > bio_size)1264size = bio_size;12651266if (is_vmalloc_addr(buf_ptr))1267page = vmalloc_to_page(buf_ptr);1268else1269page = virt_to_page(buf_ptr);12701271/*1272* Some network related block device uses tcp_sendpage, which1273* doesn't behave well when using 0-count page, this is a1274* safety net to catch them.1275*/1276ASSERT3S(page_count(page), >, 0);12771278if (bio_add_page(bio, page, size, offset) != size)1279break;12801281buf_ptr += size;1282bio_size -= size;1283offset = 0;1284}12851286return (bio_size);1287}12881289/*1290* bio_map for gang ABD.1291*/1292static unsigned int1293abd_gang_bio_map_off(struct bio *bio, abd_t *abd,1294unsigned int io_size, size_t off)1295{1296ASSERT(abd_is_gang(abd));12971298for (abd_t *cabd = abd_gang_get_offset(abd, &off);1299cabd != NULL;1300cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {1301ASSERT3U(off, <, cabd->abd_size);1302int size = MIN(io_size, cabd->abd_size - off);1303int remainder = abd_bio_map_off(bio, cabd, size, off);1304io_size -= (size - remainder);1305if (io_size == 0 || remainder > 0)1306return (io_size);1307off = 0;1308}1309ASSERT0(io_size);1310return (io_size);1311}13121313/*1314* bio_map for ABD.1315* @off is the offset in @abd1316* Remaining IO size is returned1317*/1318unsigned int1319abd_bio_map_off(struct bio *bio, abd_t *abd,1320unsigned int io_size, size_t off)1321{1322struct abd_iter aiter;13231324ASSERT3U(io_size, <=, abd->abd_size - off);1325if (abd_is_linear(abd))1326return (bio_map(bio, ((char *)abd_to_buf(abd)) + off, io_size));13271328ASSERT(!abd_is_linear(abd));1329if (abd_is_gang(abd))1330return (abd_gang_bio_map_off(bio, abd, io_size, off));13311332abd_iter_init(&aiter, abd);1333abd_iter_advance(&aiter, off);13341335for (int i = 0; i < bio->bi_max_vecs; i++) {1336struct page *pg;1337size_t len, sgoff, pgoff;1338struct scatterlist *sg;13391340if (io_size <= 0)1341break;13421343sg = aiter.iter_sg;1344sgoff = aiter.iter_offset;1345pgoff = sgoff & (PAGESIZE - 1);1346len = MIN(io_size, PAGESIZE - pgoff);1347ASSERT(len > 0);13481349pg = nth_page(sg_page(sg), sgoff >> PAGE_SHIFT);1350if (bio_add_page(bio, pg, len, pgoff) != len)1351break;13521353io_size -= len;1354abd_iter_advance(&aiter, len);1355}13561357return (io_size);1358}13591360EXPORT_SYMBOL(abd_alloc_from_pages);13611362/* Tunable Parameters */1363module_param(zfs_abd_scatter_enabled, int, 0644);1364MODULE_PARM_DESC(zfs_abd_scatter_enabled,1365"Toggle whether ABD allocations must be linear.");1366module_param(zfs_abd_scatter_min_size, int, 0644);1367MODULE_PARM_DESC(zfs_abd_scatter_min_size,1368"Minimum size of scatter allocations.");1369module_param(zfs_abd_scatter_max_order, uint, 0644);1370MODULE_PARM_DESC(zfs_abd_scatter_max_order,1371"Maximum order allocation used for a scatter ABD.");137213731374