Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/kernel/bpf/arraymap.c
49098 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3
* Copyright (c) 2016,2017 Facebook
4
*/
5
#include <linux/bpf.h>
6
#include <linux/btf.h>
7
#include <linux/err.h>
8
#include <linux/slab.h>
9
#include <linux/mm.h>
10
#include <linux/filter.h>
11
#include <linux/perf_event.h>
12
#include <uapi/linux/btf.h>
13
#include <linux/rcupdate_trace.h>
14
#include <linux/btf_ids.h>
15
#include <crypto/sha2.h>
16
17
#include "map_in_map.h"
18
19
#define ARRAY_CREATE_FLAG_MASK \
20
(BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
21
BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
22
23
static void bpf_array_free_percpu(struct bpf_array *array)
24
{
25
int i;
26
27
for (i = 0; i < array->map.max_entries; i++) {
28
free_percpu(array->pptrs[i]);
29
cond_resched();
30
}
31
}
32
33
static int bpf_array_alloc_percpu(struct bpf_array *array)
34
{
35
void __percpu *ptr;
36
int i;
37
38
for (i = 0; i < array->map.max_entries; i++) {
39
ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8,
40
GFP_USER | __GFP_NOWARN);
41
if (!ptr) {
42
bpf_array_free_percpu(array);
43
return -ENOMEM;
44
}
45
array->pptrs[i] = ptr;
46
cond_resched();
47
}
48
49
return 0;
50
}
51
52
/* Called from syscall */
53
int array_map_alloc_check(union bpf_attr *attr)
54
{
55
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
56
int numa_node = bpf_map_attr_numa_node(attr);
57
58
/* check sanity of attributes */
59
if (attr->max_entries == 0 || attr->key_size != 4 ||
60
attr->value_size == 0 ||
61
attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
62
!bpf_map_flags_access_ok(attr->map_flags) ||
63
(percpu && numa_node != NUMA_NO_NODE))
64
return -EINVAL;
65
66
if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
67
attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
68
return -EINVAL;
69
70
if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
71
attr->map_flags & BPF_F_PRESERVE_ELEMS)
72
return -EINVAL;
73
74
/* avoid overflow on round_up(map->value_size) */
75
if (attr->value_size > INT_MAX)
76
return -E2BIG;
77
/* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */
78
if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE)
79
return -E2BIG;
80
81
return 0;
82
}
83
84
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
85
{
86
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
87
int numa_node = bpf_map_attr_numa_node(attr);
88
u32 elem_size, index_mask, max_entries;
89
bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL);
90
u64 array_size, mask64;
91
struct bpf_array *array;
92
93
elem_size = round_up(attr->value_size, 8);
94
95
max_entries = attr->max_entries;
96
97
/* On 32 bit archs roundup_pow_of_two() with max_entries that has
98
* upper most bit set in u32 space is undefined behavior due to
99
* resulting 1U << 32, so do it manually here in u64 space.
100
*/
101
mask64 = fls_long(max_entries - 1);
102
mask64 = 1ULL << mask64;
103
mask64 -= 1;
104
105
index_mask = mask64;
106
if (!bypass_spec_v1) {
107
/* round up array size to nearest power of 2,
108
* since cpu will speculate within index_mask limits
109
*/
110
max_entries = index_mask + 1;
111
/* Check for overflows. */
112
if (max_entries < attr->max_entries)
113
return ERR_PTR(-E2BIG);
114
}
115
116
array_size = sizeof(*array);
117
if (percpu) {
118
array_size += (u64) max_entries * sizeof(void *);
119
} else {
120
/* rely on vmalloc() to return page-aligned memory and
121
* ensure array->value is exactly page-aligned
122
*/
123
if (attr->map_flags & BPF_F_MMAPABLE) {
124
array_size = PAGE_ALIGN(array_size);
125
array_size += PAGE_ALIGN((u64) max_entries * elem_size);
126
} else {
127
array_size += (u64) max_entries * elem_size;
128
}
129
}
130
131
/* allocate all map elements and zero-initialize them */
132
if (attr->map_flags & BPF_F_MMAPABLE) {
133
void *data;
134
135
/* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
136
data = bpf_map_area_mmapable_alloc(array_size, numa_node);
137
if (!data)
138
return ERR_PTR(-ENOMEM);
139
array = data + PAGE_ALIGN(sizeof(struct bpf_array))
140
- offsetof(struct bpf_array, value);
141
} else {
142
array = bpf_map_area_alloc(array_size, numa_node);
143
}
144
if (!array)
145
return ERR_PTR(-ENOMEM);
146
array->index_mask = index_mask;
147
array->map.bypass_spec_v1 = bypass_spec_v1;
148
149
/* copy mandatory map attributes */
150
bpf_map_init_from_attr(&array->map, attr);
151
array->elem_size = elem_size;
152
153
if (percpu && bpf_array_alloc_percpu(array)) {
154
bpf_map_area_free(array);
155
return ERR_PTR(-ENOMEM);
156
}
157
158
return &array->map;
159
}
160
161
static void *array_map_elem_ptr(struct bpf_array* array, u32 index)
162
{
163
return array->value + (u64)array->elem_size * index;
164
}
165
166
/* Called from syscall or from eBPF program */
167
static void *array_map_lookup_elem(struct bpf_map *map, void *key)
168
{
169
struct bpf_array *array = container_of(map, struct bpf_array, map);
170
u32 index = *(u32 *)key;
171
172
if (unlikely(index >= array->map.max_entries))
173
return NULL;
174
175
return array->value + (u64)array->elem_size * (index & array->index_mask);
176
}
177
178
static int array_map_get_hash(struct bpf_map *map, u32 hash_buf_size,
179
void *hash_buf)
180
{
181
struct bpf_array *array = container_of(map, struct bpf_array, map);
182
183
sha256(array->value, (u64)array->elem_size * array->map.max_entries,
184
hash_buf);
185
memcpy(array->map.sha, hash_buf, sizeof(array->map.sha));
186
return 0;
187
}
188
189
static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
190
u32 off)
191
{
192
struct bpf_array *array = container_of(map, struct bpf_array, map);
193
194
if (map->max_entries != 1)
195
return -ENOTSUPP;
196
if (off >= map->value_size)
197
return -EINVAL;
198
199
*imm = (unsigned long)array->value;
200
return 0;
201
}
202
203
static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
204
u32 *off)
205
{
206
struct bpf_array *array = container_of(map, struct bpf_array, map);
207
u64 base = (unsigned long)array->value;
208
u64 range = array->elem_size;
209
210
if (map->max_entries != 1)
211
return -ENOTSUPP;
212
if (imm < base || imm >= base + range)
213
return -ENOENT;
214
215
*off = imm - base;
216
return 0;
217
}
218
219
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
220
static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
221
{
222
struct bpf_array *array = container_of(map, struct bpf_array, map);
223
struct bpf_insn *insn = insn_buf;
224
u32 elem_size = array->elem_size;
225
const int ret = BPF_REG_0;
226
const int map_ptr = BPF_REG_1;
227
const int index = BPF_REG_2;
228
229
if (map->map_flags & BPF_F_INNER_MAP)
230
return -EOPNOTSUPP;
231
232
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
233
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
234
if (!map->bypass_spec_v1) {
235
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
236
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
237
} else {
238
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
239
}
240
241
if (is_power_of_2(elem_size)) {
242
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
243
} else {
244
*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
245
}
246
*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
247
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
248
*insn++ = BPF_MOV64_IMM(ret, 0);
249
return insn - insn_buf;
250
}
251
252
/* Called from eBPF program */
253
static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
254
{
255
struct bpf_array *array = container_of(map, struct bpf_array, map);
256
u32 index = *(u32 *)key;
257
258
if (unlikely(index >= array->map.max_entries))
259
return NULL;
260
261
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
262
}
263
264
/* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */
265
static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
266
{
267
struct bpf_array *array = container_of(map, struct bpf_array, map);
268
struct bpf_insn *insn = insn_buf;
269
270
if (!bpf_jit_supports_percpu_insn())
271
return -EOPNOTSUPP;
272
273
if (map->map_flags & BPF_F_INNER_MAP)
274
return -EOPNOTSUPP;
275
276
BUILD_BUG_ON(offsetof(struct bpf_array, map) != 0);
277
*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct bpf_array, pptrs));
278
279
*insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0);
280
if (!map->bypass_spec_v1) {
281
*insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6);
282
*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask);
283
} else {
284
*insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5);
285
}
286
287
*insn++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
288
*insn++ = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
289
*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
290
*insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
291
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
292
*insn++ = BPF_MOV64_IMM(BPF_REG_0, 0);
293
return insn - insn_buf;
294
}
295
296
static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
297
{
298
struct bpf_array *array = container_of(map, struct bpf_array, map);
299
u32 index = *(u32 *)key;
300
301
if (cpu >= nr_cpu_ids)
302
return NULL;
303
304
if (unlikely(index >= array->map.max_entries))
305
return NULL;
306
307
return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
308
}
309
310
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
311
{
312
struct bpf_array *array = container_of(map, struct bpf_array, map);
313
u32 index = *(u32 *)key;
314
void __percpu *pptr;
315
int cpu, off = 0;
316
u32 size;
317
318
if (unlikely(index >= array->map.max_entries))
319
return -ENOENT;
320
321
/* per_cpu areas are zero-filled and bpf programs can only
322
* access 'value_size' of them, so copying rounded areas
323
* will not leak any kernel data
324
*/
325
size = array->elem_size;
326
rcu_read_lock();
327
pptr = array->pptrs[index & array->index_mask];
328
for_each_possible_cpu(cpu) {
329
copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
330
check_and_init_map_value(map, value + off);
331
off += size;
332
}
333
rcu_read_unlock();
334
return 0;
335
}
336
337
/* Called from syscall */
338
int bpf_array_get_next_key(struct bpf_map *map, void *key, void *next_key)
339
{
340
u32 index = key ? *(u32 *)key : U32_MAX;
341
u32 *next = (u32 *)next_key;
342
343
if (index >= map->max_entries) {
344
*next = 0;
345
return 0;
346
}
347
348
if (index == map->max_entries - 1)
349
return -ENOENT;
350
351
*next = index + 1;
352
return 0;
353
}
354
355
/* Called from syscall or from eBPF program */
356
static long array_map_update_elem(struct bpf_map *map, void *key, void *value,
357
u64 map_flags)
358
{
359
struct bpf_array *array = container_of(map, struct bpf_array, map);
360
u32 index = *(u32 *)key;
361
char *val;
362
363
if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
364
/* unknown flags */
365
return -EINVAL;
366
367
if (unlikely(index >= array->map.max_entries))
368
/* all elements were pre-allocated, cannot insert a new one */
369
return -E2BIG;
370
371
if (unlikely(map_flags & BPF_NOEXIST))
372
/* all elements already exist */
373
return -EEXIST;
374
375
if (unlikely((map_flags & BPF_F_LOCK) &&
376
!btf_record_has_field(map->record, BPF_SPIN_LOCK)))
377
return -EINVAL;
378
379
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
380
val = this_cpu_ptr(array->pptrs[index & array->index_mask]);
381
copy_map_value(map, val, value);
382
bpf_obj_free_fields(array->map.record, val);
383
} else {
384
val = array->value +
385
(u64)array->elem_size * (index & array->index_mask);
386
if (map_flags & BPF_F_LOCK)
387
copy_map_value_locked(map, val, value, false);
388
else
389
copy_map_value(map, val, value);
390
bpf_obj_free_fields(array->map.record, val);
391
}
392
return 0;
393
}
394
395
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
396
u64 map_flags)
397
{
398
struct bpf_array *array = container_of(map, struct bpf_array, map);
399
u32 index = *(u32 *)key;
400
void __percpu *pptr;
401
int cpu, off = 0;
402
u32 size;
403
404
if (unlikely(map_flags > BPF_EXIST))
405
/* unknown flags */
406
return -EINVAL;
407
408
if (unlikely(index >= array->map.max_entries))
409
/* all elements were pre-allocated, cannot insert a new one */
410
return -E2BIG;
411
412
if (unlikely(map_flags == BPF_NOEXIST))
413
/* all elements already exist */
414
return -EEXIST;
415
416
/* the user space will provide round_up(value_size, 8) bytes that
417
* will be copied into per-cpu area. bpf programs can only access
418
* value_size of it. During lookup the same extra bytes will be
419
* returned or zeros which were zero-filled by percpu_alloc,
420
* so no kernel data leaks possible
421
*/
422
size = array->elem_size;
423
rcu_read_lock();
424
pptr = array->pptrs[index & array->index_mask];
425
for_each_possible_cpu(cpu) {
426
copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
427
bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
428
off += size;
429
}
430
rcu_read_unlock();
431
return 0;
432
}
433
434
/* Called from syscall or from eBPF program */
435
static long array_map_delete_elem(struct bpf_map *map, void *key)
436
{
437
return -EINVAL;
438
}
439
440
static void *array_map_vmalloc_addr(struct bpf_array *array)
441
{
442
return (void *)round_down((unsigned long)array, PAGE_SIZE);
443
}
444
445
static void array_map_free_internal_structs(struct bpf_map *map)
446
{
447
struct bpf_array *array = container_of(map, struct bpf_array, map);
448
int i;
449
450
/* We only free internal structs on uref dropping to zero */
451
if (!bpf_map_has_internal_structs(map))
452
return;
453
454
for (i = 0; i < array->map.max_entries; i++)
455
bpf_map_free_internal_structs(map, array_map_elem_ptr(array, i));
456
}
457
458
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
459
static void array_map_free(struct bpf_map *map)
460
{
461
struct bpf_array *array = container_of(map, struct bpf_array, map);
462
int i;
463
464
if (!IS_ERR_OR_NULL(map->record)) {
465
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
466
for (i = 0; i < array->map.max_entries; i++) {
467
void __percpu *pptr = array->pptrs[i & array->index_mask];
468
int cpu;
469
470
for_each_possible_cpu(cpu) {
471
bpf_obj_free_fields(map->record, per_cpu_ptr(pptr, cpu));
472
cond_resched();
473
}
474
}
475
} else {
476
for (i = 0; i < array->map.max_entries; i++)
477
bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i));
478
}
479
}
480
481
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
482
bpf_array_free_percpu(array);
483
484
if (array->map.map_flags & BPF_F_MMAPABLE)
485
bpf_map_area_free(array_map_vmalloc_addr(array));
486
else
487
bpf_map_area_free(array);
488
}
489
490
static void array_map_seq_show_elem(struct bpf_map *map, void *key,
491
struct seq_file *m)
492
{
493
void *value;
494
495
rcu_read_lock();
496
497
value = array_map_lookup_elem(map, key);
498
if (!value) {
499
rcu_read_unlock();
500
return;
501
}
502
503
if (map->btf_key_type_id)
504
seq_printf(m, "%u: ", *(u32 *)key);
505
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
506
seq_putc(m, '\n');
507
508
rcu_read_unlock();
509
}
510
511
static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
512
struct seq_file *m)
513
{
514
struct bpf_array *array = container_of(map, struct bpf_array, map);
515
u32 index = *(u32 *)key;
516
void __percpu *pptr;
517
int cpu;
518
519
rcu_read_lock();
520
521
seq_printf(m, "%u: {\n", *(u32 *)key);
522
pptr = array->pptrs[index & array->index_mask];
523
for_each_possible_cpu(cpu) {
524
seq_printf(m, "\tcpu%d: ", cpu);
525
btf_type_seq_show(map->btf, map->btf_value_type_id,
526
per_cpu_ptr(pptr, cpu), m);
527
seq_putc(m, '\n');
528
}
529
seq_puts(m, "}\n");
530
531
rcu_read_unlock();
532
}
533
534
static int array_map_check_btf(const struct bpf_map *map,
535
const struct btf *btf,
536
const struct btf_type *key_type,
537
const struct btf_type *value_type)
538
{
539
/* One exception for keyless BTF: .bss/.data/.rodata map */
540
if (btf_type_is_void(key_type)) {
541
if (map->map_type != BPF_MAP_TYPE_ARRAY ||
542
map->max_entries != 1)
543
return -EINVAL;
544
545
if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
546
return -EINVAL;
547
548
return 0;
549
}
550
551
/*
552
* Bpf array can only take a u32 key. This check makes sure
553
* that the btf matches the attr used during map_create.
554
*/
555
if (!btf_type_is_i32(key_type))
556
return -EINVAL;
557
558
return 0;
559
}
560
561
static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
562
{
563
struct bpf_array *array = container_of(map, struct bpf_array, map);
564
pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
565
566
if (!(map->map_flags & BPF_F_MMAPABLE))
567
return -EINVAL;
568
569
if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) >
570
PAGE_ALIGN((u64)array->map.max_entries * array->elem_size))
571
return -EINVAL;
572
573
return remap_vmalloc_range(vma, array_map_vmalloc_addr(array),
574
vma->vm_pgoff + pgoff);
575
}
576
577
static bool array_map_meta_equal(const struct bpf_map *meta0,
578
const struct bpf_map *meta1)
579
{
580
if (!bpf_map_meta_equal(meta0, meta1))
581
return false;
582
return meta0->map_flags & BPF_F_INNER_MAP ? true :
583
meta0->max_entries == meta1->max_entries;
584
}
585
586
struct bpf_iter_seq_array_map_info {
587
struct bpf_map *map;
588
void *percpu_value_buf;
589
u32 index;
590
};
591
592
static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
593
{
594
struct bpf_iter_seq_array_map_info *info = seq->private;
595
struct bpf_map *map = info->map;
596
struct bpf_array *array;
597
u32 index;
598
599
if (info->index >= map->max_entries)
600
return NULL;
601
602
if (*pos == 0)
603
++*pos;
604
array = container_of(map, struct bpf_array, map);
605
index = info->index & array->index_mask;
606
if (info->percpu_value_buf)
607
return (void *)(uintptr_t)array->pptrs[index];
608
return array_map_elem_ptr(array, index);
609
}
610
611
static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
612
{
613
struct bpf_iter_seq_array_map_info *info = seq->private;
614
struct bpf_map *map = info->map;
615
struct bpf_array *array;
616
u32 index;
617
618
++*pos;
619
++info->index;
620
if (info->index >= map->max_entries)
621
return NULL;
622
623
array = container_of(map, struct bpf_array, map);
624
index = info->index & array->index_mask;
625
if (info->percpu_value_buf)
626
return (void *)(uintptr_t)array->pptrs[index];
627
return array_map_elem_ptr(array, index);
628
}
629
630
static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
631
{
632
struct bpf_iter_seq_array_map_info *info = seq->private;
633
struct bpf_iter__bpf_map_elem ctx = {};
634
struct bpf_map *map = info->map;
635
struct bpf_array *array = container_of(map, struct bpf_array, map);
636
struct bpf_iter_meta meta;
637
struct bpf_prog *prog;
638
int off = 0, cpu = 0;
639
void __percpu *pptr;
640
u32 size;
641
642
meta.seq = seq;
643
prog = bpf_iter_get_info(&meta, v == NULL);
644
if (!prog)
645
return 0;
646
647
ctx.meta = &meta;
648
ctx.map = info->map;
649
if (v) {
650
ctx.key = &info->index;
651
652
if (!info->percpu_value_buf) {
653
ctx.value = v;
654
} else {
655
pptr = (void __percpu *)(uintptr_t)v;
656
size = array->elem_size;
657
for_each_possible_cpu(cpu) {
658
copy_map_value_long(map, info->percpu_value_buf + off,
659
per_cpu_ptr(pptr, cpu));
660
check_and_init_map_value(map, info->percpu_value_buf + off);
661
off += size;
662
}
663
ctx.value = info->percpu_value_buf;
664
}
665
}
666
667
return bpf_iter_run_prog(prog, &ctx);
668
}
669
670
static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
671
{
672
return __bpf_array_map_seq_show(seq, v);
673
}
674
675
static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
676
{
677
if (!v)
678
(void)__bpf_array_map_seq_show(seq, NULL);
679
}
680
681
static int bpf_iter_init_array_map(void *priv_data,
682
struct bpf_iter_aux_info *aux)
683
{
684
struct bpf_iter_seq_array_map_info *seq_info = priv_data;
685
struct bpf_map *map = aux->map;
686
struct bpf_array *array = container_of(map, struct bpf_array, map);
687
void *value_buf;
688
u32 buf_size;
689
690
if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
691
buf_size = array->elem_size * num_possible_cpus();
692
value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
693
if (!value_buf)
694
return -ENOMEM;
695
696
seq_info->percpu_value_buf = value_buf;
697
}
698
699
/* bpf_iter_attach_map() acquires a map uref, and the uref may be
700
* released before or in the middle of iterating map elements, so
701
* acquire an extra map uref for iterator.
702
*/
703
bpf_map_inc_with_uref(map);
704
seq_info->map = map;
705
return 0;
706
}
707
708
static void bpf_iter_fini_array_map(void *priv_data)
709
{
710
struct bpf_iter_seq_array_map_info *seq_info = priv_data;
711
712
bpf_map_put_with_uref(seq_info->map);
713
kfree(seq_info->percpu_value_buf);
714
}
715
716
static const struct seq_operations bpf_array_map_seq_ops = {
717
.start = bpf_array_map_seq_start,
718
.next = bpf_array_map_seq_next,
719
.stop = bpf_array_map_seq_stop,
720
.show = bpf_array_map_seq_show,
721
};
722
723
static const struct bpf_iter_seq_info iter_seq_info = {
724
.seq_ops = &bpf_array_map_seq_ops,
725
.init_seq_private = bpf_iter_init_array_map,
726
.fini_seq_private = bpf_iter_fini_array_map,
727
.seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
728
};
729
730
static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn,
731
void *callback_ctx, u64 flags)
732
{
733
u32 i, key, num_elems = 0;
734
struct bpf_array *array;
735
bool is_percpu;
736
u64 ret = 0;
737
void *val;
738
739
cant_migrate();
740
741
if (flags != 0)
742
return -EINVAL;
743
744
is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
745
array = container_of(map, struct bpf_array, map);
746
for (i = 0; i < map->max_entries; i++) {
747
if (is_percpu)
748
val = this_cpu_ptr(array->pptrs[i]);
749
else
750
val = array_map_elem_ptr(array, i);
751
num_elems++;
752
key = i;
753
ret = callback_fn((u64)(long)map, (u64)(long)&key,
754
(u64)(long)val, (u64)(long)callback_ctx, 0);
755
/* return value: 0 - continue, 1 - stop and return */
756
if (ret)
757
break;
758
}
759
760
return num_elems;
761
}
762
763
static u64 array_map_mem_usage(const struct bpf_map *map)
764
{
765
struct bpf_array *array = container_of(map, struct bpf_array, map);
766
bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
767
u32 elem_size = array->elem_size;
768
u64 entries = map->max_entries;
769
u64 usage = sizeof(*array);
770
771
if (percpu) {
772
usage += entries * sizeof(void *);
773
usage += entries * elem_size * num_possible_cpus();
774
} else {
775
if (map->map_flags & BPF_F_MMAPABLE) {
776
usage = PAGE_ALIGN(usage);
777
usage += PAGE_ALIGN(entries * elem_size);
778
} else {
779
usage += entries * elem_size;
780
}
781
}
782
return usage;
783
}
784
785
BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array)
786
const struct bpf_map_ops array_map_ops = {
787
.map_meta_equal = array_map_meta_equal,
788
.map_alloc_check = array_map_alloc_check,
789
.map_alloc = array_map_alloc,
790
.map_free = array_map_free,
791
.map_get_next_key = bpf_array_get_next_key,
792
.map_release_uref = array_map_free_internal_structs,
793
.map_lookup_elem = array_map_lookup_elem,
794
.map_update_elem = array_map_update_elem,
795
.map_delete_elem = array_map_delete_elem,
796
.map_gen_lookup = array_map_gen_lookup,
797
.map_direct_value_addr = array_map_direct_value_addr,
798
.map_direct_value_meta = array_map_direct_value_meta,
799
.map_mmap = array_map_mmap,
800
.map_seq_show_elem = array_map_seq_show_elem,
801
.map_check_btf = array_map_check_btf,
802
.map_lookup_batch = generic_map_lookup_batch,
803
.map_update_batch = generic_map_update_batch,
804
.map_set_for_each_callback_args = map_set_for_each_callback_args,
805
.map_for_each_callback = bpf_for_each_array_elem,
806
.map_mem_usage = array_map_mem_usage,
807
.map_btf_id = &array_map_btf_ids[0],
808
.iter_seq_info = &iter_seq_info,
809
.map_get_hash = &array_map_get_hash,
810
};
811
812
const struct bpf_map_ops percpu_array_map_ops = {
813
.map_meta_equal = bpf_map_meta_equal,
814
.map_alloc_check = array_map_alloc_check,
815
.map_alloc = array_map_alloc,
816
.map_free = array_map_free,
817
.map_get_next_key = bpf_array_get_next_key,
818
.map_lookup_elem = percpu_array_map_lookup_elem,
819
.map_gen_lookup = percpu_array_map_gen_lookup,
820
.map_update_elem = array_map_update_elem,
821
.map_delete_elem = array_map_delete_elem,
822
.map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
823
.map_seq_show_elem = percpu_array_map_seq_show_elem,
824
.map_check_btf = array_map_check_btf,
825
.map_lookup_batch = generic_map_lookup_batch,
826
.map_update_batch = generic_map_update_batch,
827
.map_set_for_each_callback_args = map_set_for_each_callback_args,
828
.map_for_each_callback = bpf_for_each_array_elem,
829
.map_mem_usage = array_map_mem_usage,
830
.map_btf_id = &array_map_btf_ids[0],
831
.iter_seq_info = &iter_seq_info,
832
};
833
834
static int fd_array_map_alloc_check(union bpf_attr *attr)
835
{
836
/* only file descriptors can be stored in this type of map */
837
if (attr->value_size != sizeof(u32))
838
return -EINVAL;
839
/* Program read-only/write-only not supported for special maps yet. */
840
if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
841
return -EINVAL;
842
return array_map_alloc_check(attr);
843
}
844
845
static void fd_array_map_free(struct bpf_map *map)
846
{
847
struct bpf_array *array = container_of(map, struct bpf_array, map);
848
int i;
849
850
/* make sure it's empty */
851
for (i = 0; i < array->map.max_entries; i++)
852
BUG_ON(array->ptrs[i] != NULL);
853
854
bpf_map_area_free(array);
855
}
856
857
static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
858
{
859
return ERR_PTR(-EOPNOTSUPP);
860
}
861
862
/* only called from syscall */
863
int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
864
{
865
void **elem, *ptr;
866
int ret = 0;
867
868
if (!map->ops->map_fd_sys_lookup_elem)
869
return -ENOTSUPP;
870
871
rcu_read_lock();
872
elem = array_map_lookup_elem(map, key);
873
if (elem && (ptr = READ_ONCE(*elem)))
874
*value = map->ops->map_fd_sys_lookup_elem(ptr);
875
else
876
ret = -ENOENT;
877
rcu_read_unlock();
878
879
return ret;
880
}
881
882
/* only called from syscall */
883
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
884
void *key, void *value, u64 map_flags)
885
{
886
struct bpf_array *array = container_of(map, struct bpf_array, map);
887
void *new_ptr, *old_ptr;
888
u32 index = *(u32 *)key, ufd;
889
890
if (map_flags != BPF_ANY)
891
return -EINVAL;
892
893
if (index >= array->map.max_entries)
894
return -E2BIG;
895
896
ufd = *(u32 *)value;
897
new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
898
if (IS_ERR(new_ptr))
899
return PTR_ERR(new_ptr);
900
901
if (map->ops->map_poke_run) {
902
mutex_lock(&array->aux->poke_mutex);
903
old_ptr = xchg(array->ptrs + index, new_ptr);
904
map->ops->map_poke_run(map, index, old_ptr, new_ptr);
905
mutex_unlock(&array->aux->poke_mutex);
906
} else {
907
old_ptr = xchg(array->ptrs + index, new_ptr);
908
}
909
910
if (old_ptr)
911
map->ops->map_fd_put_ptr(map, old_ptr, true);
912
return 0;
913
}
914
915
static long __fd_array_map_delete_elem(struct bpf_map *map, void *key, bool need_defer)
916
{
917
struct bpf_array *array = container_of(map, struct bpf_array, map);
918
void *old_ptr;
919
u32 index = *(u32 *)key;
920
921
if (index >= array->map.max_entries)
922
return -E2BIG;
923
924
if (map->ops->map_poke_run) {
925
mutex_lock(&array->aux->poke_mutex);
926
old_ptr = xchg(array->ptrs + index, NULL);
927
map->ops->map_poke_run(map, index, old_ptr, NULL);
928
mutex_unlock(&array->aux->poke_mutex);
929
} else {
930
old_ptr = xchg(array->ptrs + index, NULL);
931
}
932
933
if (old_ptr) {
934
map->ops->map_fd_put_ptr(map, old_ptr, need_defer);
935
return 0;
936
} else {
937
return -ENOENT;
938
}
939
}
940
941
static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
942
{
943
return __fd_array_map_delete_elem(map, key, true);
944
}
945
946
static void *prog_fd_array_get_ptr(struct bpf_map *map,
947
struct file *map_file, int fd)
948
{
949
struct bpf_prog *prog = bpf_prog_get(fd);
950
bool is_extended;
951
952
if (IS_ERR(prog))
953
return prog;
954
955
if (prog->type == BPF_PROG_TYPE_EXT ||
956
!bpf_prog_map_compatible(map, prog)) {
957
bpf_prog_put(prog);
958
return ERR_PTR(-EINVAL);
959
}
960
961
mutex_lock(&prog->aux->ext_mutex);
962
is_extended = prog->aux->is_extended;
963
if (!is_extended)
964
prog->aux->prog_array_member_cnt++;
965
mutex_unlock(&prog->aux->ext_mutex);
966
if (is_extended) {
967
/* Extended prog can not be tail callee. It's to prevent a
968
* potential infinite loop like:
969
* tail callee prog entry -> tail callee prog subprog ->
970
* freplace prog entry --tailcall-> tail callee prog entry.
971
*/
972
bpf_prog_put(prog);
973
return ERR_PTR(-EBUSY);
974
}
975
976
return prog;
977
}
978
979
static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
980
{
981
struct bpf_prog *prog = ptr;
982
983
mutex_lock(&prog->aux->ext_mutex);
984
prog->aux->prog_array_member_cnt--;
985
mutex_unlock(&prog->aux->ext_mutex);
986
/* bpf_prog is freed after one RCU or tasks trace grace period */
987
bpf_prog_put(prog);
988
}
989
990
static u32 prog_fd_array_sys_lookup_elem(void *ptr)
991
{
992
return ((struct bpf_prog *)ptr)->aux->id;
993
}
994
995
/* decrement refcnt of all bpf_progs that are stored in this map */
996
static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer)
997
{
998
struct bpf_array *array = container_of(map, struct bpf_array, map);
999
int i;
1000
1001
for (i = 0; i < array->map.max_entries; i++)
1002
__fd_array_map_delete_elem(map, &i, need_defer);
1003
}
1004
1005
static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
1006
struct seq_file *m)
1007
{
1008
void **elem, *ptr;
1009
u32 prog_id;
1010
1011
rcu_read_lock();
1012
1013
elem = array_map_lookup_elem(map, key);
1014
if (elem) {
1015
ptr = READ_ONCE(*elem);
1016
if (ptr) {
1017
seq_printf(m, "%u: ", *(u32 *)key);
1018
prog_id = prog_fd_array_sys_lookup_elem(ptr);
1019
btf_type_seq_show(map->btf, map->btf_value_type_id,
1020
&prog_id, m);
1021
seq_putc(m, '\n');
1022
}
1023
}
1024
1025
rcu_read_unlock();
1026
}
1027
1028
struct prog_poke_elem {
1029
struct list_head list;
1030
struct bpf_prog_aux *aux;
1031
};
1032
1033
static int prog_array_map_poke_track(struct bpf_map *map,
1034
struct bpf_prog_aux *prog_aux)
1035
{
1036
struct prog_poke_elem *elem;
1037
struct bpf_array_aux *aux;
1038
int ret = 0;
1039
1040
aux = container_of(map, struct bpf_array, map)->aux;
1041
mutex_lock(&aux->poke_mutex);
1042
list_for_each_entry(elem, &aux->poke_progs, list) {
1043
if (elem->aux == prog_aux)
1044
goto out;
1045
}
1046
1047
elem = kmalloc(sizeof(*elem), GFP_KERNEL);
1048
if (!elem) {
1049
ret = -ENOMEM;
1050
goto out;
1051
}
1052
1053
INIT_LIST_HEAD(&elem->list);
1054
/* We must track the program's aux info at this point in time
1055
* since the program pointer itself may not be stable yet, see
1056
* also comment in prog_array_map_poke_run().
1057
*/
1058
elem->aux = prog_aux;
1059
1060
list_add_tail(&elem->list, &aux->poke_progs);
1061
out:
1062
mutex_unlock(&aux->poke_mutex);
1063
return ret;
1064
}
1065
1066
static void prog_array_map_poke_untrack(struct bpf_map *map,
1067
struct bpf_prog_aux *prog_aux)
1068
{
1069
struct prog_poke_elem *elem, *tmp;
1070
struct bpf_array_aux *aux;
1071
1072
aux = container_of(map, struct bpf_array, map)->aux;
1073
mutex_lock(&aux->poke_mutex);
1074
list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1075
if (elem->aux == prog_aux) {
1076
list_del_init(&elem->list);
1077
kfree(elem);
1078
break;
1079
}
1080
}
1081
mutex_unlock(&aux->poke_mutex);
1082
}
1083
1084
void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
1085
struct bpf_prog *new, struct bpf_prog *old)
1086
{
1087
WARN_ON_ONCE(1);
1088
}
1089
1090
static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
1091
struct bpf_prog *old,
1092
struct bpf_prog *new)
1093
{
1094
struct prog_poke_elem *elem;
1095
struct bpf_array_aux *aux;
1096
1097
aux = container_of(map, struct bpf_array, map)->aux;
1098
WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
1099
1100
list_for_each_entry(elem, &aux->poke_progs, list) {
1101
struct bpf_jit_poke_descriptor *poke;
1102
int i;
1103
1104
for (i = 0; i < elem->aux->size_poke_tab; i++) {
1105
poke = &elem->aux->poke_tab[i];
1106
1107
/* Few things to be aware of:
1108
*
1109
* 1) We can only ever access aux in this context, but
1110
* not aux->prog since it might not be stable yet and
1111
* there could be danger of use after free otherwise.
1112
* 2) Initially when we start tracking aux, the program
1113
* is not JITed yet and also does not have a kallsyms
1114
* entry. We skip these as poke->tailcall_target_stable
1115
* is not active yet. The JIT will do the final fixup
1116
* before setting it stable. The various
1117
* poke->tailcall_target_stable are successively
1118
* activated, so tail call updates can arrive from here
1119
* while JIT is still finishing its final fixup for
1120
* non-activated poke entries.
1121
* 3) Also programs reaching refcount of zero while patching
1122
* is in progress is okay since we're protected under
1123
* poke_mutex and untrack the programs before the JIT
1124
* buffer is freed.
1125
*/
1126
if (!READ_ONCE(poke->tailcall_target_stable))
1127
continue;
1128
if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
1129
continue;
1130
if (poke->tail_call.map != map ||
1131
poke->tail_call.key != key)
1132
continue;
1133
1134
bpf_arch_poke_desc_update(poke, new, old);
1135
}
1136
}
1137
}
1138
1139
static void prog_array_map_clear_deferred(struct work_struct *work)
1140
{
1141
struct bpf_map *map = container_of(work, struct bpf_array_aux,
1142
work)->map;
1143
bpf_fd_array_map_clear(map, true);
1144
bpf_map_put(map);
1145
}
1146
1147
static void prog_array_map_clear(struct bpf_map *map)
1148
{
1149
struct bpf_array_aux *aux = container_of(map, struct bpf_array,
1150
map)->aux;
1151
bpf_map_inc(map);
1152
schedule_work(&aux->work);
1153
}
1154
1155
static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
1156
{
1157
struct bpf_array_aux *aux;
1158
struct bpf_map *map;
1159
1160
aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT);
1161
if (!aux)
1162
return ERR_PTR(-ENOMEM);
1163
1164
INIT_WORK(&aux->work, prog_array_map_clear_deferred);
1165
INIT_LIST_HEAD(&aux->poke_progs);
1166
mutex_init(&aux->poke_mutex);
1167
1168
map = array_map_alloc(attr);
1169
if (IS_ERR(map)) {
1170
kfree(aux);
1171
return map;
1172
}
1173
1174
container_of(map, struct bpf_array, map)->aux = aux;
1175
aux->map = map;
1176
1177
return map;
1178
}
1179
1180
static void prog_array_map_free(struct bpf_map *map)
1181
{
1182
struct prog_poke_elem *elem, *tmp;
1183
struct bpf_array_aux *aux;
1184
1185
aux = container_of(map, struct bpf_array, map)->aux;
1186
list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1187
list_del_init(&elem->list);
1188
kfree(elem);
1189
}
1190
kfree(aux);
1191
fd_array_map_free(map);
1192
}
1193
1194
/* prog_array->aux->{type,jited} is a runtime binding.
1195
* Doing static check alone in the verifier is not enough.
1196
* Thus, prog_array_map cannot be used as an inner_map
1197
* and map_meta_equal is not implemented.
1198
*/
1199
const struct bpf_map_ops prog_array_map_ops = {
1200
.map_alloc_check = fd_array_map_alloc_check,
1201
.map_alloc = prog_array_map_alloc,
1202
.map_free = prog_array_map_free,
1203
.map_poke_track = prog_array_map_poke_track,
1204
.map_poke_untrack = prog_array_map_poke_untrack,
1205
.map_poke_run = prog_array_map_poke_run,
1206
.map_get_next_key = bpf_array_get_next_key,
1207
.map_lookup_elem = fd_array_map_lookup_elem,
1208
.map_delete_elem = fd_array_map_delete_elem,
1209
.map_fd_get_ptr = prog_fd_array_get_ptr,
1210
.map_fd_put_ptr = prog_fd_array_put_ptr,
1211
.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
1212
.map_release_uref = prog_array_map_clear,
1213
.map_seq_show_elem = prog_array_map_seq_show_elem,
1214
.map_mem_usage = array_map_mem_usage,
1215
.map_btf_id = &array_map_btf_ids[0],
1216
};
1217
1218
static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
1219
struct file *map_file)
1220
{
1221
struct bpf_event_entry *ee;
1222
1223
ee = kzalloc(sizeof(*ee), GFP_KERNEL);
1224
if (ee) {
1225
ee->event = perf_file->private_data;
1226
ee->perf_file = perf_file;
1227
ee->map_file = map_file;
1228
}
1229
1230
return ee;
1231
}
1232
1233
static void __bpf_event_entry_free(struct rcu_head *rcu)
1234
{
1235
struct bpf_event_entry *ee;
1236
1237
ee = container_of(rcu, struct bpf_event_entry, rcu);
1238
fput(ee->perf_file);
1239
kfree(ee);
1240
}
1241
1242
static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
1243
{
1244
call_rcu(&ee->rcu, __bpf_event_entry_free);
1245
}
1246
1247
static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
1248
struct file *map_file, int fd)
1249
{
1250
struct bpf_event_entry *ee;
1251
struct perf_event *event;
1252
struct file *perf_file;
1253
u64 value;
1254
1255
perf_file = perf_event_get(fd);
1256
if (IS_ERR(perf_file))
1257
return perf_file;
1258
1259
ee = ERR_PTR(-EOPNOTSUPP);
1260
event = perf_file->private_data;
1261
if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
1262
goto err_out;
1263
1264
ee = bpf_event_entry_gen(perf_file, map_file);
1265
if (ee)
1266
return ee;
1267
ee = ERR_PTR(-ENOMEM);
1268
err_out:
1269
fput(perf_file);
1270
return ee;
1271
}
1272
1273
static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
1274
{
1275
/* bpf_perf_event is freed after one RCU grace period */
1276
bpf_event_entry_free_rcu(ptr);
1277
}
1278
1279
static void perf_event_fd_array_release(struct bpf_map *map,
1280
struct file *map_file)
1281
{
1282
struct bpf_array *array = container_of(map, struct bpf_array, map);
1283
struct bpf_event_entry *ee;
1284
int i;
1285
1286
if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1287
return;
1288
1289
rcu_read_lock();
1290
for (i = 0; i < array->map.max_entries; i++) {
1291
ee = READ_ONCE(array->ptrs[i]);
1292
if (ee && ee->map_file == map_file)
1293
__fd_array_map_delete_elem(map, &i, true);
1294
}
1295
rcu_read_unlock();
1296
}
1297
1298
static void perf_event_fd_array_map_free(struct bpf_map *map)
1299
{
1300
if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1301
bpf_fd_array_map_clear(map, false);
1302
fd_array_map_free(map);
1303
}
1304
1305
const struct bpf_map_ops perf_event_array_map_ops = {
1306
.map_meta_equal = bpf_map_meta_equal,
1307
.map_alloc_check = fd_array_map_alloc_check,
1308
.map_alloc = array_map_alloc,
1309
.map_free = perf_event_fd_array_map_free,
1310
.map_get_next_key = bpf_array_get_next_key,
1311
.map_lookup_elem = fd_array_map_lookup_elem,
1312
.map_delete_elem = fd_array_map_delete_elem,
1313
.map_fd_get_ptr = perf_event_fd_array_get_ptr,
1314
.map_fd_put_ptr = perf_event_fd_array_put_ptr,
1315
.map_release = perf_event_fd_array_release,
1316
.map_check_btf = map_check_no_btf,
1317
.map_mem_usage = array_map_mem_usage,
1318
.map_btf_id = &array_map_btf_ids[0],
1319
};
1320
1321
#ifdef CONFIG_CGROUPS
1322
static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
1323
struct file *map_file /* not used */,
1324
int fd)
1325
{
1326
return cgroup_get_from_fd(fd);
1327
}
1328
1329
static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
1330
{
1331
/* cgroup_put free cgrp after a rcu grace period */
1332
cgroup_put(ptr);
1333
}
1334
1335
static void cgroup_fd_array_free(struct bpf_map *map)
1336
{
1337
bpf_fd_array_map_clear(map, false);
1338
fd_array_map_free(map);
1339
}
1340
1341
const struct bpf_map_ops cgroup_array_map_ops = {
1342
.map_meta_equal = bpf_map_meta_equal,
1343
.map_alloc_check = fd_array_map_alloc_check,
1344
.map_alloc = array_map_alloc,
1345
.map_free = cgroup_fd_array_free,
1346
.map_get_next_key = bpf_array_get_next_key,
1347
.map_lookup_elem = fd_array_map_lookup_elem,
1348
.map_delete_elem = fd_array_map_delete_elem,
1349
.map_fd_get_ptr = cgroup_fd_array_get_ptr,
1350
.map_fd_put_ptr = cgroup_fd_array_put_ptr,
1351
.map_check_btf = map_check_no_btf,
1352
.map_mem_usage = array_map_mem_usage,
1353
.map_btf_id = &array_map_btf_ids[0],
1354
};
1355
#endif
1356
1357
static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
1358
{
1359
struct bpf_map *map, *inner_map_meta;
1360
1361
inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
1362
if (IS_ERR(inner_map_meta))
1363
return inner_map_meta;
1364
1365
map = array_map_alloc(attr);
1366
if (IS_ERR(map)) {
1367
bpf_map_meta_free(inner_map_meta);
1368
return map;
1369
}
1370
1371
map->inner_map_meta = inner_map_meta;
1372
1373
return map;
1374
}
1375
1376
static void array_of_map_free(struct bpf_map *map)
1377
{
1378
/* map->inner_map_meta is only accessed by syscall which
1379
* is protected by fdget/fdput.
1380
*/
1381
bpf_map_meta_free(map->inner_map_meta);
1382
bpf_fd_array_map_clear(map, false);
1383
fd_array_map_free(map);
1384
}
1385
1386
static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
1387
{
1388
struct bpf_map **inner_map = array_map_lookup_elem(map, key);
1389
1390
if (!inner_map)
1391
return NULL;
1392
1393
return READ_ONCE(*inner_map);
1394
}
1395
1396
static int array_of_map_gen_lookup(struct bpf_map *map,
1397
struct bpf_insn *insn_buf)
1398
{
1399
struct bpf_array *array = container_of(map, struct bpf_array, map);
1400
u32 elem_size = array->elem_size;
1401
struct bpf_insn *insn = insn_buf;
1402
const int ret = BPF_REG_0;
1403
const int map_ptr = BPF_REG_1;
1404
const int index = BPF_REG_2;
1405
1406
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
1407
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
1408
if (!map->bypass_spec_v1) {
1409
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
1410
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
1411
} else {
1412
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
1413
}
1414
if (is_power_of_2(elem_size))
1415
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
1416
else
1417
*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
1418
*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
1419
*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
1420
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
1421
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1422
*insn++ = BPF_MOV64_IMM(ret, 0);
1423
1424
return insn - insn_buf;
1425
}
1426
1427
const struct bpf_map_ops array_of_maps_map_ops = {
1428
.map_alloc_check = fd_array_map_alloc_check,
1429
.map_alloc = array_of_map_alloc,
1430
.map_free = array_of_map_free,
1431
.map_get_next_key = bpf_array_get_next_key,
1432
.map_lookup_elem = array_of_map_lookup_elem,
1433
.map_delete_elem = fd_array_map_delete_elem,
1434
.map_fd_get_ptr = bpf_map_fd_get_ptr,
1435
.map_fd_put_ptr = bpf_map_fd_put_ptr,
1436
.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
1437
.map_gen_lookup = array_of_map_gen_lookup,
1438
.map_lookup_batch = generic_map_lookup_batch,
1439
.map_update_batch = generic_map_update_batch,
1440
.map_check_btf = map_check_no_btf,
1441
.map_mem_usage = array_map_mem_usage,
1442
.map_btf_id = &array_map_btf_ids[0],
1443
};
1444
1445