Path: blob/main/sys/contrib/openzfs/module/zfs/ddt_zap.c
108609 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/2122/*23* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.24* Copyright (c) 2018 by Delphix. All rights reserved.25* Copyright (c) 2023, Klara Inc.26*/2728#include <sys/zfs_context.h>29#include <sys/spa.h>30#include <sys/zio.h>31#include <sys/ddt.h>32#include <sys/ddt_impl.h>33#include <sys/zap.h>34#include <sys/dmu_tx.h>35#include <sys/dnode.h>36#include <sys/zio_compress.h>3738static unsigned int ddt_zap_default_bs = 15;39static unsigned int ddt_zap_default_ibs = 15;4041#define DDT_ZAP_COMPRESS_BYTEORDER_MASK 0x8042#define DDT_ZAP_COMPRESS_FUNCTION_MASK 0x7f4344#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))4546static size_t47ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len)48{49uchar_t *version = dst++;50int cpfunc = ZIO_COMPRESS_ZLE;51zio_compress_info_t *ci = &zio_compress_table[cpfunc];52size_t c_len;5354ASSERT3U(d_len, >=, s_len + 1); /* no compression plus version byte */5556/* Call compress function directly to avoid hole detection. */57abd_t sabd, dabd;58abd_get_from_buf_struct(&sabd, (void *)src, s_len);59abd_get_from_buf_struct(&dabd, dst, d_len - 1);60c_len = ci->ci_compress(&sabd, &dabd, s_len, d_len - 1, ci->ci_level);61abd_free(&dabd);62abd_free(&sabd);6364if (c_len == s_len) {65cpfunc = ZIO_COMPRESS_OFF;66memcpy(dst, src, s_len);67}6869*version = cpfunc;70if (ZFS_HOST_BYTEORDER)71*version |= DDT_ZAP_COMPRESS_BYTEORDER_MASK;7273return (c_len + 1);74}7576static void77ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)78{79uchar_t version = *src++;80int cpfunc = version & DDT_ZAP_COMPRESS_FUNCTION_MASK;8182if (zio_compress_table[cpfunc].ci_decompress == NULL) {83memcpy(dst, src, d_len);84return;85}8687abd_t sabd, dabd;88size_t c_len = s_len - 1;89abd_get_from_buf_struct(&sabd, src, c_len);90abd_get_from_buf_struct(&dabd, dst, d_len);91VERIFY0(zio_decompress_data(cpfunc, &sabd, &dabd, c_len, d_len, NULL));92abd_free(&dabd);93abd_free(&sabd);9495if (((version & DDT_ZAP_COMPRESS_BYTEORDER_MASK) != 0) !=96(ZFS_HOST_BYTEORDER != 0))97byteswap_uint64_array(dst, d_len);98}99100static int101ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)102{103zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY;104105if (prehash)106flags |= ZAP_FLAG_PRE_HASHED_KEY;107108*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,109ddt_zap_default_bs, ddt_zap_default_ibs,110DMU_OT_NONE, 0, tx);111if (*objectp == 0)112return (SET_ERROR(ENOTSUP));113114return (0);115}116117static int118ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)119{120return (zap_destroy(os, object, tx));121}122123static int124ddt_zap_lookup(dnode_t *dn, const ddt_key_t *ddk, void *phys, size_t psize)125{126uchar_t *cbuf;127uint64_t csize;128int error;129130cbuf = kmem_alloc(psize + 1, KM_SLEEP);131132error = zap_lookup_length_uint64_by_dnode(dn, (uint64_t *)ddk,133DDT_KEY_WORDS, 1, psize + 1, cbuf, &csize);134if (error == 0) {135ASSERT3U(csize, <=, psize + 1);136ddt_zap_decompress(cbuf, phys, csize, psize);137}138139kmem_free(cbuf, psize + 1);140141return (error);142}143144static int145ddt_zap_contains(dnode_t *dn, const ddt_key_t *ddk)146{147return (zap_length_uint64_by_dnode(dn, (uint64_t *)ddk,148DDT_KEY_WORDS, NULL, NULL));149}150151static void152ddt_zap_prefetch(dnode_t *dn, const ddt_key_t *ddk)153{154(void) zap_prefetch_uint64_by_dnode(dn, (uint64_t *)ddk,155DDT_KEY_WORDS);156}157158static void159ddt_zap_prefetch_all(dnode_t *dn)160{161(void) zap_prefetch_object(dn->dn_objset, dn->dn_object);162}163164static int165ddt_zap_update(dnode_t *dn, const ddt_key_t *ddk,166const void *phys, size_t psize, dmu_tx_t *tx)167{168const size_t cbuf_size = psize + 1;169170uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP);171172uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size);173174int error = zap_update_uint64_by_dnode(dn, (uint64_t *)ddk,175DDT_KEY_WORDS, 1, csize, cbuf, tx);176177kmem_free(cbuf, cbuf_size);178179return (error);180}181182static int183ddt_zap_remove(dnode_t *dn, const ddt_key_t *ddk, dmu_tx_t *tx)184{185return (zap_remove_uint64_by_dnode(dn, (uint64_t *)ddk,186DDT_KEY_WORDS, tx));187}188189static int190ddt_zap_walk(dnode_t *dn, uint64_t *walk, ddt_key_t *ddk,191void *phys, size_t psize)192{193zap_cursor_t zc;194zap_attribute_t *za;195int error;196197za = zap_attribute_alloc();198if (*walk == 0) {199/*200* We don't want to prefetch the entire ZAP object, because201* it can be enormous. Also the primary use of DDT iteration202* is for scrubbing, in which case we will be issuing many203* scrub I/Os for each ZAP block that we read in, so204* reading the ZAP is unlikely to be the bottleneck.205*/206zap_cursor_init_noprefetch(&zc, dn->dn_objset, dn->dn_object);207} else {208zap_cursor_init_serialized(&zc, dn->dn_objset, dn->dn_object,209*walk);210}211if ((error = zap_cursor_retrieve(&zc, za)) == 0) {212uint64_t csize = za->za_num_integers;213214ASSERT3U(za->za_integer_length, ==, 1);215ASSERT3U(csize, <=, psize + 1);216217uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP);218219error = zap_lookup_uint64_by_dnode(dn, (uint64_t *)za->za_name,220DDT_KEY_WORDS, 1, csize, cbuf);221ASSERT0(error);222if (error == 0) {223ddt_zap_decompress(cbuf, phys, csize, psize);224*ddk = *(ddt_key_t *)za->za_name;225}226227kmem_free(cbuf, csize);228229zap_cursor_advance(&zc);230*walk = zap_cursor_serialize(&zc);231}232zap_cursor_fini(&zc);233zap_attribute_free(za);234return (error);235}236237static int238ddt_zap_count(dnode_t *dn, uint64_t *count)239{240return (zap_count_by_dnode(dn, count));241}242243const ddt_ops_t ddt_zap_ops = {244"zap",245ddt_zap_create,246ddt_zap_destroy,247ddt_zap_lookup,248ddt_zap_contains,249ddt_zap_prefetch,250ddt_zap_prefetch_all,251ddt_zap_update,252ddt_zap_remove,253ddt_zap_walk,254ddt_zap_count,255};256257ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW,258"DDT ZAP leaf blockshift");259ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW,260"DDT ZAP indirect blockshift");261262263