Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/kernel/bpf/bpf_local_storage.c
49139 views
1
// SPDX-License-Identifier: GPL-2.0
2
/* Copyright (c) 2019 Facebook */
3
#include <linux/rculist.h>
4
#include <linux/list.h>
5
#include <linux/hash.h>
6
#include <linux/types.h>
7
#include <linux/spinlock.h>
8
#include <linux/bpf.h>
9
#include <linux/btf_ids.h>
10
#include <linux/bpf_local_storage.h>
11
#include <net/sock.h>
12
#include <uapi/linux/sock_diag.h>
13
#include <uapi/linux/btf.h>
14
#include <linux/rcupdate.h>
15
#include <linux/rcupdate_trace.h>
16
#include <linux/rcupdate_wait.h>
17
18
#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
19
20
static struct bpf_local_storage_map_bucket *
21
select_bucket(struct bpf_local_storage_map *smap,
22
struct bpf_local_storage_elem *selem)
23
{
24
return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
25
}
26
27
static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
28
{
29
struct bpf_map *map = &smap->map;
30
31
if (!map->ops->map_local_storage_charge)
32
return 0;
33
34
return map->ops->map_local_storage_charge(smap, owner, size);
35
}
36
37
static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
38
u32 size)
39
{
40
struct bpf_map *map = &smap->map;
41
42
if (map->ops->map_local_storage_uncharge)
43
map->ops->map_local_storage_uncharge(smap, owner, size);
44
}
45
46
static struct bpf_local_storage __rcu **
47
owner_storage(struct bpf_local_storage_map *smap, void *owner)
48
{
49
struct bpf_map *map = &smap->map;
50
51
return map->ops->map_owner_storage_ptr(owner);
52
}
53
54
static bool selem_linked_to_storage_lockless(const struct bpf_local_storage_elem *selem)
55
{
56
return !hlist_unhashed_lockless(&selem->snode);
57
}
58
59
static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
60
{
61
return !hlist_unhashed(&selem->snode);
62
}
63
64
static bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem)
65
{
66
return !hlist_unhashed_lockless(&selem->map_node);
67
}
68
69
static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
70
{
71
return !hlist_unhashed(&selem->map_node);
72
}
73
74
struct bpf_local_storage_elem *
75
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
76
void *value, bool swap_uptrs, gfp_t gfp_flags)
77
{
78
struct bpf_local_storage_elem *selem;
79
80
if (mem_charge(smap, owner, smap->elem_size))
81
return NULL;
82
83
if (smap->use_kmalloc_nolock) {
84
selem = bpf_map_kmalloc_nolock(&smap->map, smap->elem_size,
85
__GFP_ZERO, NUMA_NO_NODE);
86
} else {
87
selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
88
gfp_flags | __GFP_NOWARN);
89
}
90
91
if (selem) {
92
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
93
94
if (value) {
95
/* No need to call check_and_init_map_value as memory is zero init */
96
copy_map_value(&smap->map, SDATA(selem)->data, value);
97
if (swap_uptrs)
98
bpf_obj_swap_uptrs(smap->map.record, SDATA(selem)->data, value);
99
}
100
return selem;
101
}
102
103
mem_uncharge(smap, owner, smap->elem_size);
104
105
return NULL;
106
}
107
108
/* rcu tasks trace callback for use_kmalloc_nolock == false */
109
static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
110
{
111
struct bpf_local_storage *local_storage;
112
113
/* If RCU Tasks Trace grace period implies RCU grace period, do
114
* kfree(), else do kfree_rcu().
115
*/
116
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
117
if (rcu_trace_implies_rcu_gp())
118
kfree(local_storage);
119
else
120
kfree_rcu(local_storage, rcu);
121
}
122
123
/* Handle use_kmalloc_nolock == false */
124
static void __bpf_local_storage_free(struct bpf_local_storage *local_storage,
125
bool vanilla_rcu)
126
{
127
if (vanilla_rcu)
128
kfree_rcu(local_storage, rcu);
129
else
130
call_rcu_tasks_trace(&local_storage->rcu,
131
__bpf_local_storage_free_trace_rcu);
132
}
133
134
static void bpf_local_storage_free_rcu(struct rcu_head *rcu)
135
{
136
struct bpf_local_storage *local_storage;
137
138
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
139
kfree_nolock(local_storage);
140
}
141
142
static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
143
{
144
if (rcu_trace_implies_rcu_gp())
145
bpf_local_storage_free_rcu(rcu);
146
else
147
call_rcu(rcu, bpf_local_storage_free_rcu);
148
}
149
150
static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
151
bool reuse_now)
152
{
153
if (!local_storage)
154
return;
155
156
if (!local_storage->use_kmalloc_nolock) {
157
__bpf_local_storage_free(local_storage, reuse_now);
158
return;
159
}
160
161
if (reuse_now) {
162
call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu);
163
return;
164
}
165
166
call_rcu_tasks_trace(&local_storage->rcu,
167
bpf_local_storage_free_trace_rcu);
168
}
169
170
/* rcu tasks trace callback for use_kmalloc_nolock == false */
171
static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
172
{
173
struct bpf_local_storage_elem *selem;
174
175
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
176
if (rcu_trace_implies_rcu_gp())
177
kfree(selem);
178
else
179
kfree_rcu(selem, rcu);
180
}
181
182
/* Handle use_kmalloc_nolock == false */
183
static void __bpf_selem_free(struct bpf_local_storage_elem *selem,
184
bool vanilla_rcu)
185
{
186
if (vanilla_rcu)
187
kfree_rcu(selem, rcu);
188
else
189
call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu);
190
}
191
192
static void bpf_selem_free_rcu(struct rcu_head *rcu)
193
{
194
struct bpf_local_storage_elem *selem;
195
struct bpf_local_storage_map *smap;
196
197
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
198
/* The bpf_local_storage_map_free will wait for rcu_barrier */
199
smap = rcu_dereference_check(SDATA(selem)->smap, 1);
200
201
migrate_disable();
202
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
203
migrate_enable();
204
kfree_nolock(selem);
205
}
206
207
static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
208
{
209
if (rcu_trace_implies_rcu_gp())
210
bpf_selem_free_rcu(rcu);
211
else
212
call_rcu(rcu, bpf_selem_free_rcu);
213
}
214
215
void bpf_selem_free(struct bpf_local_storage_elem *selem,
216
bool reuse_now)
217
{
218
struct bpf_local_storage_map *smap;
219
220
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
221
222
if (!smap->use_kmalloc_nolock) {
223
/*
224
* No uptr will be unpin even when reuse_now == false since uptr
225
* is only supported in task local storage, where
226
* smap->use_kmalloc_nolock == true.
227
*/
228
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
229
__bpf_selem_free(selem, reuse_now);
230
return;
231
}
232
233
if (reuse_now) {
234
/*
235
* While it is okay to call bpf_obj_free_fields() that unpins uptr when
236
* reuse_now == true, keep it in bpf_selem_free_rcu() for simplicity.
237
*/
238
call_rcu(&selem->rcu, bpf_selem_free_rcu);
239
return;
240
}
241
242
call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu);
243
}
244
245
static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now)
246
{
247
struct bpf_local_storage_elem *selem;
248
struct hlist_node *n;
249
250
/* The "_safe" iteration is needed.
251
* The loop is not removing the selem from the list
252
* but bpf_selem_free will use the selem->rcu_head
253
* which is union-ized with the selem->free_node.
254
*/
255
hlist_for_each_entry_safe(selem, n, list, free_node)
256
bpf_selem_free(selem, reuse_now);
257
}
258
259
/* local_storage->lock must be held and selem->local_storage == local_storage.
260
* The caller must ensure selem->smap is still valid to be
261
* dereferenced for its smap->elem_size and smap->cache_idx.
262
*/
263
static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
264
struct bpf_local_storage_elem *selem,
265
struct hlist_head *free_selem_list)
266
{
267
struct bpf_local_storage_map *smap;
268
bool free_local_storage;
269
void *owner;
270
271
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
272
owner = local_storage->owner;
273
274
/* All uncharging on the owner must be done first.
275
* The owner may be freed once the last selem is unlinked
276
* from local_storage.
277
*/
278
mem_uncharge(smap, owner, smap->elem_size);
279
280
free_local_storage = hlist_is_singular_node(&selem->snode,
281
&local_storage->list);
282
if (free_local_storage) {
283
mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
284
local_storage->owner = NULL;
285
286
/* After this RCU_INIT, owner may be freed and cannot be used */
287
RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
288
289
/* local_storage is not freed now. local_storage->lock is
290
* still held and raw_spin_unlock_bh(&local_storage->lock)
291
* will be done by the caller.
292
*
293
* Although the unlock will be done under
294
* rcu_read_lock(), it is more intuitive to
295
* read if the freeing of the storage is done
296
* after the raw_spin_unlock_bh(&local_storage->lock).
297
*
298
* Hence, a "bool free_local_storage" is returned
299
* to the caller which then calls then frees the storage after
300
* all the RCU grace periods have expired.
301
*/
302
}
303
hlist_del_init_rcu(&selem->snode);
304
if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
305
SDATA(selem))
306
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
307
308
hlist_add_head(&selem->free_node, free_selem_list);
309
310
if (rcu_access_pointer(local_storage->smap) == smap)
311
RCU_INIT_POINTER(local_storage->smap, NULL);
312
313
return free_local_storage;
314
}
315
316
static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
317
bool reuse_now)
318
{
319
struct bpf_local_storage *local_storage;
320
bool free_local_storage = false;
321
HLIST_HEAD(selem_free_list);
322
unsigned long flags;
323
324
if (unlikely(!selem_linked_to_storage_lockless(selem)))
325
/* selem has already been unlinked from sk */
326
return;
327
328
local_storage = rcu_dereference_check(selem->local_storage,
329
bpf_rcu_lock_held());
330
331
raw_spin_lock_irqsave(&local_storage->lock, flags);
332
if (likely(selem_linked_to_storage(selem)))
333
free_local_storage = bpf_selem_unlink_storage_nolock(
334
local_storage, selem, &selem_free_list);
335
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
336
337
bpf_selem_free_list(&selem_free_list, reuse_now);
338
339
if (free_local_storage)
340
bpf_local_storage_free(local_storage, reuse_now);
341
}
342
343
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
344
struct bpf_local_storage_elem *selem)
345
{
346
RCU_INIT_POINTER(selem->local_storage, local_storage);
347
hlist_add_head_rcu(&selem->snode, &local_storage->list);
348
}
349
350
static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
351
{
352
struct bpf_local_storage_map *smap;
353
struct bpf_local_storage_map_bucket *b;
354
unsigned long flags;
355
356
if (unlikely(!selem_linked_to_map_lockless(selem)))
357
/* selem has already be unlinked from smap */
358
return;
359
360
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
361
b = select_bucket(smap, selem);
362
raw_spin_lock_irqsave(&b->lock, flags);
363
if (likely(selem_linked_to_map(selem)))
364
hlist_del_init_rcu(&selem->map_node);
365
raw_spin_unlock_irqrestore(&b->lock, flags);
366
}
367
368
void bpf_selem_link_map(struct bpf_local_storage_map *smap,
369
struct bpf_local_storage_elem *selem)
370
{
371
struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
372
unsigned long flags;
373
374
raw_spin_lock_irqsave(&b->lock, flags);
375
hlist_add_head_rcu(&selem->map_node, &b->list);
376
raw_spin_unlock_irqrestore(&b->lock, flags);
377
}
378
379
void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
380
{
381
/* Always unlink from map before unlinking from local_storage
382
* because selem will be freed after successfully unlinked from
383
* the local_storage.
384
*/
385
bpf_selem_unlink_map(selem);
386
bpf_selem_unlink_storage(selem, reuse_now);
387
}
388
389
void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
390
struct bpf_local_storage_map *smap,
391
struct bpf_local_storage_elem *selem)
392
{
393
unsigned long flags;
394
395
/* spinlock is needed to avoid racing with the
396
* parallel delete. Otherwise, publishing an already
397
* deleted sdata to the cache will become a use-after-free
398
* problem in the next bpf_local_storage_lookup().
399
*/
400
raw_spin_lock_irqsave(&local_storage->lock, flags);
401
if (selem_linked_to_storage(selem))
402
rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem));
403
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
404
}
405
406
static int check_flags(const struct bpf_local_storage_data *old_sdata,
407
u64 map_flags)
408
{
409
if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
410
/* elem already exists */
411
return -EEXIST;
412
413
if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
414
/* elem doesn't exist, cannot update it */
415
return -ENOENT;
416
417
return 0;
418
}
419
420
int bpf_local_storage_alloc(void *owner,
421
struct bpf_local_storage_map *smap,
422
struct bpf_local_storage_elem *first_selem,
423
gfp_t gfp_flags)
424
{
425
struct bpf_local_storage *prev_storage, *storage;
426
struct bpf_local_storage **owner_storage_ptr;
427
int err;
428
429
err = mem_charge(smap, owner, sizeof(*storage));
430
if (err)
431
return err;
432
433
if (smap->use_kmalloc_nolock)
434
storage = bpf_map_kmalloc_nolock(&smap->map, sizeof(*storage),
435
__GFP_ZERO, NUMA_NO_NODE);
436
else
437
storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
438
gfp_flags | __GFP_NOWARN);
439
if (!storage) {
440
err = -ENOMEM;
441
goto uncharge;
442
}
443
444
RCU_INIT_POINTER(storage->smap, smap);
445
INIT_HLIST_HEAD(&storage->list);
446
raw_spin_lock_init(&storage->lock);
447
storage->owner = owner;
448
storage->use_kmalloc_nolock = smap->use_kmalloc_nolock;
449
450
bpf_selem_link_storage_nolock(storage, first_selem);
451
bpf_selem_link_map(smap, first_selem);
452
453
owner_storage_ptr =
454
(struct bpf_local_storage **)owner_storage(smap, owner);
455
/* Publish storage to the owner.
456
* Instead of using any lock of the kernel object (i.e. owner),
457
* cmpxchg will work with any kernel object regardless what
458
* the running context is, bh, irq...etc.
459
*
460
* From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage)
461
* is protected by the storage->lock. Hence, when freeing
462
* the owner->storage, the storage->lock must be held before
463
* setting owner->storage ptr to NULL.
464
*/
465
prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
466
if (unlikely(prev_storage)) {
467
bpf_selem_unlink_map(first_selem);
468
err = -EAGAIN;
469
goto uncharge;
470
}
471
472
return 0;
473
474
uncharge:
475
bpf_local_storage_free(storage, true);
476
mem_uncharge(smap, owner, sizeof(*storage));
477
return err;
478
}
479
480
/* sk cannot be going away because it is linking new elem
481
* to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0).
482
* Otherwise, it will become a leak (and other memory issues
483
* during map destruction).
484
*/
485
struct bpf_local_storage_data *
486
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
487
void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags)
488
{
489
struct bpf_local_storage_data *old_sdata = NULL;
490
struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
491
struct bpf_local_storage *local_storage;
492
HLIST_HEAD(old_selem_free_list);
493
unsigned long flags;
494
int err;
495
496
/* BPF_EXIST and BPF_NOEXIST cannot be both set */
497
if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
498
/* BPF_F_LOCK can only be used in a value with spin_lock */
499
unlikely((map_flags & BPF_F_LOCK) &&
500
!btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)))
501
return ERR_PTR(-EINVAL);
502
503
if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
504
return ERR_PTR(-EINVAL);
505
506
local_storage = rcu_dereference_check(*owner_storage(smap, owner),
507
bpf_rcu_lock_held());
508
if (!local_storage || hlist_empty(&local_storage->list)) {
509
/* Very first elem for the owner */
510
err = check_flags(NULL, map_flags);
511
if (err)
512
return ERR_PTR(err);
513
514
selem = bpf_selem_alloc(smap, owner, value, swap_uptrs, gfp_flags);
515
if (!selem)
516
return ERR_PTR(-ENOMEM);
517
518
err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
519
if (err) {
520
bpf_selem_free(selem, true);
521
mem_uncharge(smap, owner, smap->elem_size);
522
return ERR_PTR(err);
523
}
524
525
return SDATA(selem);
526
}
527
528
if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
529
/* Hoping to find an old_sdata to do inline update
530
* such that it can avoid taking the local_storage->lock
531
* and changing the lists.
532
*/
533
old_sdata =
534
bpf_local_storage_lookup(local_storage, smap, false);
535
err = check_flags(old_sdata, map_flags);
536
if (err)
537
return ERR_PTR(err);
538
if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) {
539
copy_map_value_locked(&smap->map, old_sdata->data,
540
value, false);
541
return old_sdata;
542
}
543
}
544
545
/* A lookup has just been done before and concluded a new selem is
546
* needed. The chance of an unnecessary alloc is unlikely.
547
*/
548
alloc_selem = selem = bpf_selem_alloc(smap, owner, value, swap_uptrs, gfp_flags);
549
if (!alloc_selem)
550
return ERR_PTR(-ENOMEM);
551
552
raw_spin_lock_irqsave(&local_storage->lock, flags);
553
554
/* Recheck local_storage->list under local_storage->lock */
555
if (unlikely(hlist_empty(&local_storage->list))) {
556
/* A parallel del is happening and local_storage is going
557
* away. It has just been checked before, so very
558
* unlikely. Return instead of retry to keep things
559
* simple.
560
*/
561
err = -EAGAIN;
562
goto unlock;
563
}
564
565
old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
566
err = check_flags(old_sdata, map_flags);
567
if (err)
568
goto unlock;
569
570
if (old_sdata && (map_flags & BPF_F_LOCK)) {
571
copy_map_value_locked(&smap->map, old_sdata->data, value,
572
false);
573
selem = SELEM(old_sdata);
574
goto unlock;
575
}
576
577
alloc_selem = NULL;
578
/* First, link the new selem to the map */
579
bpf_selem_link_map(smap, selem);
580
581
/* Second, link (and publish) the new selem to local_storage */
582
bpf_selem_link_storage_nolock(local_storage, selem);
583
584
/* Third, remove old selem, SELEM(old_sdata) */
585
if (old_sdata) {
586
bpf_selem_unlink_map(SELEM(old_sdata));
587
bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
588
&old_selem_free_list);
589
}
590
591
unlock:
592
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
593
bpf_selem_free_list(&old_selem_free_list, false);
594
if (alloc_selem) {
595
mem_uncharge(smap, owner, smap->elem_size);
596
bpf_selem_free(alloc_selem, true);
597
}
598
return err ? ERR_PTR(err) : SDATA(selem);
599
}
600
601
static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
602
{
603
u64 min_usage = U64_MAX;
604
u16 i, res = 0;
605
606
spin_lock(&cache->idx_lock);
607
608
for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) {
609
if (cache->idx_usage_counts[i] < min_usage) {
610
min_usage = cache->idx_usage_counts[i];
611
res = i;
612
613
/* Found a free cache_idx */
614
if (!min_usage)
615
break;
616
}
617
}
618
cache->idx_usage_counts[res]++;
619
620
spin_unlock(&cache->idx_lock);
621
622
return res;
623
}
624
625
static void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
626
u16 idx)
627
{
628
spin_lock(&cache->idx_lock);
629
cache->idx_usage_counts[idx]--;
630
spin_unlock(&cache->idx_lock);
631
}
632
633
int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
634
{
635
if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
636
!(attr->map_flags & BPF_F_NO_PREALLOC) ||
637
attr->max_entries ||
638
attr->key_size != sizeof(int) || !attr->value_size ||
639
/* Enforce BTF for userspace sk dumping */
640
!attr->btf_key_type_id || !attr->btf_value_type_id)
641
return -EINVAL;
642
643
if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE)
644
return -E2BIG;
645
646
return 0;
647
}
648
649
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
650
const struct btf *btf,
651
const struct btf_type *key_type,
652
const struct btf_type *value_type)
653
{
654
if (!btf_type_is_i32(key_type))
655
return -EINVAL;
656
657
return 0;
658
}
659
660
void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
661
{
662
struct bpf_local_storage_elem *selem;
663
bool free_storage = false;
664
HLIST_HEAD(free_selem_list);
665
struct hlist_node *n;
666
unsigned long flags;
667
668
/* Neither the bpf_prog nor the bpf_map's syscall
669
* could be modifying the local_storage->list now.
670
* Thus, no elem can be added to or deleted from the
671
* local_storage->list by the bpf_prog or by the bpf_map's syscall.
672
*
673
* It is racing with bpf_local_storage_map_free() alone
674
* when unlinking elem from the local_storage->list and
675
* the map's bucket->list.
676
*/
677
raw_spin_lock_irqsave(&local_storage->lock, flags);
678
hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
679
/* Always unlink from map before unlinking from
680
* local_storage.
681
*/
682
bpf_selem_unlink_map(selem);
683
/* If local_storage list has only one element, the
684
* bpf_selem_unlink_storage_nolock() will return true.
685
* Otherwise, it will return false. The current loop iteration
686
* intends to remove all local storage. So the last iteration
687
* of the loop will set the free_cgroup_storage to true.
688
*/
689
free_storage = bpf_selem_unlink_storage_nolock(
690
local_storage, selem, &free_selem_list);
691
}
692
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
693
694
bpf_selem_free_list(&free_selem_list, true);
695
696
if (free_storage)
697
bpf_local_storage_free(local_storage, true);
698
}
699
700
u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
701
{
702
struct bpf_local_storage_map *smap = (struct bpf_local_storage_map *)map;
703
u64 usage = sizeof(*smap);
704
705
/* The dynamically callocated selems are not counted currently. */
706
usage += sizeof(*smap->buckets) * (1ULL << smap->bucket_log);
707
return usage;
708
}
709
710
struct bpf_map *
711
bpf_local_storage_map_alloc(union bpf_attr *attr,
712
struct bpf_local_storage_cache *cache,
713
bool use_kmalloc_nolock)
714
{
715
struct bpf_local_storage_map *smap;
716
unsigned int i;
717
u32 nbuckets;
718
int err;
719
720
smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
721
if (!smap)
722
return ERR_PTR(-ENOMEM);
723
bpf_map_init_from_attr(&smap->map, attr);
724
725
nbuckets = roundup_pow_of_two(num_possible_cpus());
726
/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
727
nbuckets = max_t(u32, 2, nbuckets);
728
smap->bucket_log = ilog2(nbuckets);
729
730
smap->buckets = bpf_map_kvcalloc(&smap->map, nbuckets,
731
sizeof(*smap->buckets), GFP_USER | __GFP_NOWARN);
732
if (!smap->buckets) {
733
err = -ENOMEM;
734
goto free_smap;
735
}
736
737
for (i = 0; i < nbuckets; i++) {
738
INIT_HLIST_HEAD(&smap->buckets[i].list);
739
raw_spin_lock_init(&smap->buckets[i].lock);
740
}
741
742
smap->elem_size = offsetof(struct bpf_local_storage_elem,
743
sdata.data[attr->value_size]);
744
745
/* In PREEMPT_RT, kmalloc(GFP_ATOMIC) is still not safe in non
746
* preemptible context. Thus, enforce all storages to use
747
* kmalloc_nolock() when CONFIG_PREEMPT_RT is enabled.
748
*/
749
smap->use_kmalloc_nolock = IS_ENABLED(CONFIG_PREEMPT_RT) ? true : use_kmalloc_nolock;
750
751
smap->cache_idx = bpf_local_storage_cache_idx_get(cache);
752
return &smap->map;
753
754
free_smap:
755
kvfree(smap->buckets);
756
bpf_map_area_free(smap);
757
return ERR_PTR(err);
758
}
759
760
void bpf_local_storage_map_free(struct bpf_map *map,
761
struct bpf_local_storage_cache *cache,
762
int __percpu *busy_counter)
763
{
764
struct bpf_local_storage_map_bucket *b;
765
struct bpf_local_storage_elem *selem;
766
struct bpf_local_storage_map *smap;
767
unsigned int i;
768
769
smap = (struct bpf_local_storage_map *)map;
770
bpf_local_storage_cache_idx_free(cache, smap->cache_idx);
771
772
/* Note that this map might be concurrently cloned from
773
* bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
774
* RCU read section to finish before proceeding. New RCU
775
* read sections should be prevented via bpf_map_inc_not_zero.
776
*/
777
synchronize_rcu();
778
779
/* bpf prog and the userspace can no longer access this map
780
* now. No new selem (of this map) can be added
781
* to the owner->storage or to the map bucket's list.
782
*
783
* The elem of this map can be cleaned up here
784
* or when the storage is freed e.g.
785
* by bpf_sk_storage_free() during __sk_destruct().
786
*/
787
for (i = 0; i < (1U << smap->bucket_log); i++) {
788
b = &smap->buckets[i];
789
790
rcu_read_lock();
791
/* No one is adding to b->list now */
792
while ((selem = hlist_entry_safe(
793
rcu_dereference_raw(hlist_first_rcu(&b->list)),
794
struct bpf_local_storage_elem, map_node))) {
795
if (busy_counter)
796
this_cpu_inc(*busy_counter);
797
bpf_selem_unlink(selem, true);
798
if (busy_counter)
799
this_cpu_dec(*busy_counter);
800
cond_resched_rcu();
801
}
802
rcu_read_unlock();
803
}
804
805
/* While freeing the storage we may still need to access the map.
806
*
807
* e.g. when bpf_sk_storage_free() has unlinked selem from the map
808
* which then made the above while((selem = ...)) loop
809
* exit immediately.
810
*
811
* However, while freeing the storage one still needs to access the
812
* smap->elem_size to do the uncharging in
813
* bpf_selem_unlink_storage_nolock().
814
*
815
* Hence, wait another rcu grace period for the storage to be freed.
816
*/
817
synchronize_rcu();
818
819
if (smap->use_kmalloc_nolock) {
820
rcu_barrier_tasks_trace();
821
rcu_barrier();
822
}
823
kvfree(smap->buckets);
824
bpf_map_area_free(smap);
825
}
826
827