Path: blob/main/sys/compat/linuxkpi/common/src/linux_skbuff.c
39586 views
/*-1* Copyright (c) 2020-2025 The FreeBSD Foundation2* Copyright (c) 2021-2022 Bjoern A. Zeeb3*4* This software was developed by Björn Zeeb under sponsorship from5* the FreeBSD Foundation.6*7* Redistribution and use in source and binary forms, with or without8* modification, are permitted provided that the following conditions9* are met:10* 1. Redistributions of source code must retain the above copyright11* notice, this list of conditions and the following disclaimer.12* 2. Redistributions in binary form must reproduce the above copyright13* notice, this list of conditions and the following disclaimer in the14* documentation and/or other materials provided with the distribution.15*16* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND17* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE18* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE19* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE20* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL21* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS22* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)23* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT24* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY25* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF26* SUCH DAMAGE.27*/2829/*30* NOTE: this socket buffer compatibility code is highly EXPERIMENTAL.31* Do not rely on the internals of this implementation. They are highly32* likely to change as we will improve the integration to FreeBSD mbufs.33*/3435#include <sys/cdefs.h>36#include "opt_ddb.h"3738#include <sys/param.h>39#include <sys/types.h>40#include <sys/kernel.h>41#include <sys/malloc.h>42#include <sys/sysctl.h>4344#include <vm/uma.h>4546#ifdef DDB47#include <ddb/ddb.h>48#endif4950#include <linux/skbuff.h>51#include <linux/slab.h>52#include <linux/gfp.h>53#ifdef __LP64__54#include <linux/log2.h>55#endif5657SYSCTL_DECL(_compat_linuxkpi);58SYSCTL_NODE(_compat_linuxkpi, OID_AUTO, skb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,59"LinuxKPI skbuff");6061#ifdef SKB_DEBUG62int linuxkpi_debug_skb;63SYSCTL_INT(_compat_linuxkpi_skb, OID_AUTO, debug, CTLFLAG_RWTUN,64&linuxkpi_debug_skb, 0, "SKB debug level");65#endif6667static uma_zone_t skbzone;6869#define SKB_DMA32_MALLOC70#ifdef SKB_DMA32_MALLOC71/*72* Realtek wireless drivers (e.g., rtw88) require 32bit DMA in a single segment.73* busdma(9) has a hard time providing this currently for 3-ish pages at large74* quantities (see lkpi_pci_nseg1_fail in linux_pci.c).75* Work around this for now by allowing a tunable to enforce physical addresses76* allocation limits using "old-school" contigmalloc(9) to avoid bouncing.77* Note: with the malloc/contigmalloc + kmalloc changes also providing physical78* contiguous memory, and the nseg=1 limit for bouncing we should in theory be79* fine now and not need any of this anymore, however busdma still has troubles80* boncing three contiguous pages so for now this stays.81*/82static int linuxkpi_skb_memlimit;83SYSCTL_INT(_compat_linuxkpi_skb, OID_AUTO, mem_limit, CTLFLAG_RDTUN,84&linuxkpi_skb_memlimit, 0, "SKB memory limit: 0=no limit, "85"1=32bit, 2=36bit, other=undef (currently 32bit)");8687static MALLOC_DEFINE(M_LKPISKB, "lkpiskb", "Linux KPI skbuff compat");88#endif8990struct sk_buff *91linuxkpi_alloc_skb(size_t size, gfp_t gfp)92{93struct sk_buff *skb;94void *p;95size_t len;9697skb = uma_zalloc(skbzone, linux_check_m_flags(gfp) | M_ZERO);98if (skb == NULL)99return (NULL);100101skb->prev = skb->next = skb;102skb->truesize = size;103skb->shinfo = (struct skb_shared_info *)(skb + 1);104105if (size == 0)106return (skb);107108len = size;109#ifdef SKB_DMA32_MALLOC110/*111* Using our own type here not backing my kmalloc.112* We assume no one calls kfree directly on the skb.113*/114if (__predict_false(linuxkpi_skb_memlimit != 0)) {115vm_paddr_t high;116117switch (linuxkpi_skb_memlimit) {118#ifdef __LP64__119case 2:120high = (0xfffffffff); /* 1<<36 really. */121break;122#endif123case 1:124default:125high = (0xffffffff); /* 1<<32 really. */126break;127}128len = roundup_pow_of_two(len);129p = contigmalloc(len, M_LKPISKB,130linux_check_m_flags(gfp) | M_ZERO, 0, high, PAGE_SIZE, 0);131} else132#endif133p = __kmalloc(len, linux_check_m_flags(gfp) | M_ZERO);134if (p == NULL) {135uma_zfree(skbzone, skb);136return (NULL);137}138139skb->head = skb->data = (uint8_t *)p;140skb_reset_tail_pointer(skb);141skb->end = skb->head + size;142143SKB_TRACE_FMT(skb, "data %p size %zu", (skb) ? skb->data : NULL, size);144return (skb);145}146147struct sk_buff *148linuxkpi_dev_alloc_skb(size_t size, gfp_t gfp)149{150struct sk_buff *skb;151size_t len;152153len = size + NET_SKB_PAD;154skb = linuxkpi_alloc_skb(len, gfp);155156if (skb != NULL)157skb_reserve(skb, NET_SKB_PAD);158159SKB_TRACE_FMT(skb, "data %p size %zu len %zu",160(skb) ? skb->data : NULL, size, len);161return (skb);162}163164struct sk_buff *165linuxkpi_build_skb(void *data, size_t fragsz)166{167struct sk_buff *skb;168169if (data == NULL || fragsz == 0)170return (NULL);171172/* Just allocate a skb without data area. */173skb = linuxkpi_alloc_skb(0, GFP_KERNEL);174if (skb == NULL)175return (NULL);176177skb->_flags |= _SKB_FLAGS_SKBEXTFRAG;178skb->truesize = fragsz;179skb->head = skb->data = data;180skb_reset_tail_pointer(skb);181skb->end = skb->head + fragsz;182183return (skb);184}185186struct sk_buff *187linuxkpi_skb_copy(const struct sk_buff *skb, gfp_t gfp)188{189struct sk_buff *new;190struct skb_shared_info *shinfo;191size_t len;192unsigned int headroom;193194/* Full buffer size + any fragments. */195len = skb->end - skb->head + skb->data_len;196197new = linuxkpi_alloc_skb(len, gfp);198if (new == NULL)199return (NULL);200201headroom = skb_headroom(skb);202/* Fixup head and end. */203skb_reserve(new, headroom); /* data and tail move headroom forward. */204skb_put(new, skb->len); /* tail and len get adjusted */205206/* Copy data. */207memcpy(new->head, skb->data - headroom, headroom + skb->len);208209/* Deal with fragments. */210shinfo = skb->shinfo;211if (shinfo->nr_frags > 0) {212printf("%s:%d: NOT YET SUPPORTED; missing %d frags\n",213__func__, __LINE__, shinfo->nr_frags);214SKB_TODO();215}216217/* Deal with header fields. */218memcpy(new->cb, skb->cb, sizeof(skb->cb));219SKB_IMPROVE("more header fields to copy?");220221return (new);222}223224void225linuxkpi_kfree_skb(struct sk_buff *skb)226{227struct skb_shared_info *shinfo;228uint16_t fragno, count;229230SKB_TRACE(skb);231if (skb == NULL)232return;233234/*235* XXX TODO this will go away once we have skb backed by mbuf.236* currently we allow the mbuf to stay around and use a private237* free function to allow secondary resources to be freed along.238*/239if (skb->m != NULL) {240void *m;241242m = skb->m;243skb->m = NULL;244245KASSERT(skb->m_free_func != NULL, ("%s: skb %p has m %p but no "246"m_free_func %p\n", __func__, skb, m, skb->m_free_func));247skb->m_free_func(m);248}249KASSERT(skb->m == NULL,250("%s: skb %p m %p != NULL\n", __func__, skb, skb->m));251252shinfo = skb->shinfo;253for (count = fragno = 0;254count < shinfo->nr_frags && fragno < nitems(shinfo->frags);255fragno++) {256257if (shinfo->frags[fragno].page != NULL) {258struct page *p;259260p = shinfo->frags[fragno].page;261shinfo->frags[fragno].size = 0;262shinfo->frags[fragno].offset = 0;263shinfo->frags[fragno].page = NULL;264__free_page(p);265count++;266}267}268269if ((skb->_flags & _SKB_FLAGS_SKBEXTFRAG) != 0) {270void *p;271272p = skb->head;273skb_free_frag(p);274skb->head = NULL;275}276277#ifdef SKB_DMA32_MALLOC278if (__predict_false(linuxkpi_skb_memlimit != 0))279free(skb->head, M_LKPISKB);280else281#endif282kfree(skb->head);283uma_zfree(skbzone, skb);284}285286static void287lkpi_skbuff_init(void *arg __unused)288{289skbzone = uma_zcreate("skbuff",290sizeof(struct sk_buff) + sizeof(struct skb_shared_info),291NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);292/* Do we need to apply limits? */293}294SYSINIT(linuxkpi_skbuff, SI_SUB_DRIVERS, SI_ORDER_FIRST, lkpi_skbuff_init, NULL);295296static void297lkpi_skbuff_destroy(void *arg __unused)298{299uma_zdestroy(skbzone);300}301SYSUNINIT(linuxkpi_skbuff, SI_SUB_DRIVERS, SI_ORDER_SECOND, lkpi_skbuff_destroy, NULL);302303#ifdef DDB304DB_SHOW_COMMAND(skb, db_show_skb)305{306struct sk_buff *skb;307int i;308309if (!have_addr) {310db_printf("usage: show skb <addr>\n");311return;312}313314skb = (struct sk_buff *)addr;315316db_printf("skb %p\n", skb);317db_printf("\tnext %p prev %p\n", skb->next, skb->prev);318db_printf("\tlist %p\n", &skb->list);319db_printf("\tlen %u data_len %u truesize %u mac_len %u\n",320skb->len, skb->data_len, skb->truesize, skb->mac_len);321db_printf("\tcsum %#06x l3hdroff %u l4hdroff %u priority %u qmap %u\n",322skb->csum, skb->l3hdroff, skb->l4hdroff, skb->priority, skb->qmap);323db_printf("\tpkt_type %d dev %p sk %p\n",324skb->pkt_type, skb->dev, skb->sk);325db_printf("\tcsum_offset %d csum_start %d ip_summed %d protocol %d\n",326skb->csum_offset, skb->csum_start, skb->ip_summed, skb->protocol);327db_printf("\t_flags %#06x\n", skb->_flags); /* XXX-BZ print names? */328db_printf("\thead %p data %p tail %p end %p\n",329skb->head, skb->data, skb->tail, skb->end);330db_printf("\tshinfo %p m %p m_free_func %p\n",331skb->shinfo, skb->m, skb->m_free_func);332333if (skb->shinfo != NULL) {334struct skb_shared_info *shinfo;335336shinfo = skb->shinfo;337db_printf("\t\tgso_type %d gso_size %u nr_frags %u\n",338shinfo->gso_type, shinfo->gso_size, shinfo->nr_frags);339for (i = 0; i < nitems(shinfo->frags); i++) {340struct skb_frag *frag;341342frag = &shinfo->frags[i];343if (frag == NULL || frag->page == NULL)344continue;345db_printf("\t\t\tfrag %p fragno %d page %p %p "346"offset %ju size %zu\n",347frag, i, frag->page, linux_page_address(frag->page),348(uintmax_t)frag->offset, frag->size);349}350}351db_printf("\tcb[] %p {", skb->cb);352for (i = 0; i < nitems(skb->cb); i++) {353db_printf("%#04x%s",354skb->cb[i], (i < (nitems(skb->cb)-1)) ? ", " : "");355}356db_printf("}\n");357358db_printf("\t__scratch[0] %p\n", skb->__scratch);359};360#endif361362363