Path: blob/main/sys/contrib/openzfs/module/zfs/ddt_zap.c
48383 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/2122/*23* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.24* Copyright (c) 2018 by Delphix. All rights reserved.25* Copyright (c) 2023, Klara Inc.26*/2728#include <sys/zfs_context.h>29#include <sys/spa.h>30#include <sys/zio.h>31#include <sys/ddt.h>32#include <sys/ddt_impl.h>33#include <sys/zap.h>34#include <sys/dmu_tx.h>35#include <sys/zio_compress.h>3637static unsigned int ddt_zap_default_bs = 15;38static unsigned int ddt_zap_default_ibs = 15;3940#define DDT_ZAP_COMPRESS_BYTEORDER_MASK 0x8041#define DDT_ZAP_COMPRESS_FUNCTION_MASK 0x7f4243#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))4445static size_t46ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len)47{48uchar_t *version = dst++;49int cpfunc = ZIO_COMPRESS_ZLE;50zio_compress_info_t *ci = &zio_compress_table[cpfunc];51size_t c_len;5253ASSERT3U(d_len, >=, s_len + 1); /* no compression plus version byte */5455/* Call compress function directly to avoid hole detection. */56abd_t sabd, dabd;57abd_get_from_buf_struct(&sabd, (void *)src, s_len);58abd_get_from_buf_struct(&dabd, dst, d_len);59c_len = ci->ci_compress(&sabd, &dabd, s_len, d_len - 1, ci->ci_level);60abd_free(&dabd);61abd_free(&sabd);6263if (c_len == s_len) {64cpfunc = ZIO_COMPRESS_OFF;65memcpy(dst, src, s_len);66}6768*version = cpfunc;69if (ZFS_HOST_BYTEORDER)70*version |= DDT_ZAP_COMPRESS_BYTEORDER_MASK;7172return (c_len + 1);73}7475static void76ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)77{78uchar_t version = *src++;79int cpfunc = version & DDT_ZAP_COMPRESS_FUNCTION_MASK;8081if (zio_compress_table[cpfunc].ci_decompress == NULL) {82memcpy(dst, src, d_len);83return;84}8586abd_t sabd, dabd;87abd_get_from_buf_struct(&sabd, src, s_len);88abd_get_from_buf_struct(&dabd, dst, d_len);89VERIFY0(zio_decompress_data(cpfunc, &sabd, &dabd, s_len, d_len, NULL));90abd_free(&dabd);91abd_free(&sabd);9293if (((version & DDT_ZAP_COMPRESS_BYTEORDER_MASK) != 0) !=94(ZFS_HOST_BYTEORDER != 0))95byteswap_uint64_array(dst, d_len);96}9798static int99ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)100{101zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY;102103if (prehash)104flags |= ZAP_FLAG_PRE_HASHED_KEY;105106*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,107ddt_zap_default_bs, ddt_zap_default_ibs,108DMU_OT_NONE, 0, tx);109if (*objectp == 0)110return (SET_ERROR(ENOTSUP));111112return (0);113}114115static int116ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)117{118return (zap_destroy(os, object, tx));119}120121static int122ddt_zap_lookup(objset_t *os, uint64_t object,123const ddt_key_t *ddk, void *phys, size_t psize)124{125uchar_t *cbuf;126uint64_t one, csize;127int error;128129error = zap_length_uint64(os, object, (uint64_t *)ddk,130DDT_KEY_WORDS, &one, &csize);131if (error)132return (error);133134ASSERT3U(one, ==, 1);135ASSERT3U(csize, <=, psize + 1);136137cbuf = kmem_alloc(csize, KM_SLEEP);138139error = zap_lookup_uint64(os, object, (uint64_t *)ddk,140DDT_KEY_WORDS, 1, csize, cbuf);141if (error == 0)142ddt_zap_decompress(cbuf, phys, csize, psize);143144kmem_free(cbuf, csize);145146return (error);147}148149static int150ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk)151{152return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS,153NULL, NULL));154}155156static void157ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk)158{159(void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS);160}161162static void163ddt_zap_prefetch_all(objset_t *os, uint64_t object)164{165(void) zap_prefetch_object(os, object);166}167168static int169ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,170const void *phys, size_t psize, dmu_tx_t *tx)171{172const size_t cbuf_size = psize + 1;173174uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP);175176uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size);177178int error = zap_update_uint64(os, object, (uint64_t *)ddk,179DDT_KEY_WORDS, 1, csize, cbuf, tx);180181kmem_free(cbuf, cbuf_size);182183return (error);184}185186static int187ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk,188dmu_tx_t *tx)189{190return (zap_remove_uint64(os, object, (uint64_t *)ddk,191DDT_KEY_WORDS, tx));192}193194static int195ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,196void *phys, size_t psize)197{198zap_cursor_t zc;199zap_attribute_t *za;200int error;201202za = zap_attribute_alloc();203if (*walk == 0) {204/*205* We don't want to prefetch the entire ZAP object, because206* it can be enormous. Also the primary use of DDT iteration207* is for scrubbing, in which case we will be issuing many208* scrub I/Os for each ZAP block that we read in, so209* reading the ZAP is unlikely to be the bottleneck.210*/211zap_cursor_init_noprefetch(&zc, os, object);212} else {213zap_cursor_init_serialized(&zc, os, object, *walk);214}215if ((error = zap_cursor_retrieve(&zc, za)) == 0) {216uint64_t csize = za->za_num_integers;217218ASSERT3U(za->za_integer_length, ==, 1);219ASSERT3U(csize, <=, psize + 1);220221uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP);222223error = zap_lookup_uint64(os, object, (uint64_t *)za->za_name,224DDT_KEY_WORDS, 1, csize, cbuf);225ASSERT0(error);226if (error == 0) {227ddt_zap_decompress(cbuf, phys, csize, psize);228*ddk = *(ddt_key_t *)za->za_name;229}230231kmem_free(cbuf, csize);232233zap_cursor_advance(&zc);234*walk = zap_cursor_serialize(&zc);235}236zap_cursor_fini(&zc);237zap_attribute_free(za);238return (error);239}240241static int242ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count)243{244return (zap_count(os, object, count));245}246247const ddt_ops_t ddt_zap_ops = {248"zap",249ddt_zap_create,250ddt_zap_destroy,251ddt_zap_lookup,252ddt_zap_contains,253ddt_zap_prefetch,254ddt_zap_prefetch_all,255ddt_zap_update,256ddt_zap_remove,257ddt_zap_walk,258ddt_zap_count,259};260261ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW,262"DDT ZAP leaf blockshift");263ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW,264"DDT ZAP indirect blockshift");265266267