diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 9c49ae53deaf81..1c22b90e754ac4 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -459,8 +459,7 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c) * Typical case will be between 11K and 116K closer to 11K. * bpf progs can and should share bpf_mem_cache when possible. */ - -static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) +static void init_refill_work(struct bpf_mem_cache *c) { init_irq_work(&c->refill_work, bpf_mem_refill); if (c->unit_size <= 256) { @@ -476,7 +475,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) c->high_watermark = max(96 * 256 / c->unit_size, 3); } c->batch = max((c->high_watermark - c->low_watermark) / 4 * 3, 1); +} +static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) +{ /* To avoid consuming memory assume that 1st run of bpf * prog won't be doing more than 4 map_update_elem from * irq disabled region @@ -484,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false); } +static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) +{ + struct llist_node *first; + unsigned int obj_size; + + first = c->free_llist.first; + if (!first) + return 0; + + obj_size = ksize(first); + if (obj_size != c->unit_size) { + WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n", + idx, obj_size, c->unit_size); + return -EINVAL; + } + return 0; +} + /* When size != 0 bpf_mem_cache for each cpu. * This is typical bpf hash map use case when all elements have equal size. * @@ -494,10 +514,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) { static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; + int cpu, i, err, unit_size, percpu_size = 0; struct bpf_mem_caches *cc, __percpu *pcc; struct bpf_mem_cache *c, __percpu *pc; struct obj_cgroup *objcg = NULL; - int cpu, i, unit_size, percpu_size = 0; if (size) { pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL); @@ -521,6 +541,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) c->objcg = objcg; c->percpu_size = percpu_size; c->tgt = c; + init_refill_work(c); prefill_mem_cache(c, cpu); } ma->cache = pc; @@ -534,6 +555,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL); if (!pcc) return -ENOMEM; + err = 0; #ifdef CONFIG_MEMCG_KMEM objcg = get_obj_cgroup_from_current(); #endif @@ -544,11 +566,30 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) c->unit_size = sizes[i]; c->objcg = objcg; c->tgt = c; + + init_refill_work(c); + /* Another bpf_mem_cache will be used when allocating + * c->unit_size in bpf_mem_alloc(), so doesn't prefill + * for the bpf_mem_cache because these free objects will + * never be used. + */ + if (i != bpf_mem_cache_idx(c->unit_size)) + continue; prefill_mem_cache(c, cpu); + err = check_obj_size(c, i); + if (err) + goto out; } } + +out: ma->caches = pcc; - return 0; + /* refill_work is either zeroed or initialized, so it is safe to + * call irq_work_sync(). + */ + if (err) + bpf_mem_alloc_destroy(ma); + return err; } static void drain_mem_cache(struct bpf_mem_cache *c) @@ -916,3 +957,41 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) return !ret ? NULL : ret + LLIST_NODE_SZ; } + +/* Most of the logic is taken from setup_kmalloc_cache_index_table() */ +static __init int bpf_mem_cache_adjust_size(void) +{ + unsigned int size, index; + + /* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be + * up-to 256-bytes. + */ + size = KMALLOC_MIN_SIZE; + if (size <= 192) + index = size_index[(size - 1) / 8]; + else + index = fls(size - 1) - 1; + for (size = 8; size < KMALLOC_MIN_SIZE && size <= 192; size += 8) + size_index[(size - 1) / 8] = index; + + /* The minimal alignment is 64-bytes, so disable 96-bytes cache and + * use 128-bytes cache instead. + */ + if (KMALLOC_MIN_SIZE >= 64) { + index = size_index[(128 - 1) / 8]; + for (size = 64 + 8; size <= 96; size += 8) + size_index[(size - 1) / 8] = index; + } + + /* The minimal alignment is 128-bytes, so disable 192-bytes cache and + * use 256-bytes cache instead. + */ + if (KMALLOC_MIN_SIZE >= 128) { + index = fls(256 - 1) - 1; + for (size = 128 + 8; size <= 192; size += 8) + size_index[(size - 1) / 8] = index; + } + + return 0; +} +subsys_initcall(bpf_mem_cache_adjust_size); diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c new file mode 100644 index 00000000000000..0cca4e8ae38e0f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include "test_bpf_ma.skel.h" + +void test_test_bpf_ma(void) +{ + struct test_bpf_ma *skel; + struct btf *btf; + int i, err; + + skel = test_bpf_ma__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + btf = bpf_object__btf(skel->obj); + if (!ASSERT_OK_PTR(btf, "btf")) + goto out; + + for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) { + char name[32]; + int id; + + snprintf(name, sizeof(name), "bin_data_%u", skel->rodata->data_sizes[i]); + id = btf__find_by_name_kind(btf, name, BTF_KIND_STRUCT); + if (!ASSERT_GT(id, 0, "bin_data")) + goto out; + skel->rodata->data_btf_ids[i] = id; + } + + err = test_bpf_ma__load(skel); + if (!ASSERT_OK(err, "load")) + goto out; + + err = test_bpf_ma__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto out; + + skel->bss->pid = getpid(); + usleep(1); + ASSERT_OK(skel->bss->err, "test error"); +out: + test_bpf_ma__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c new file mode 100644 index 00000000000000..ecde41ae0fc87c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include +#include +#include + +#include "bpf_experimental.h" +#include "bpf_misc.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +struct generic_map_value { + void *data; +}; + +char _license[] SEC("license") = "GPL"; + +const unsigned int data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096}; +const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {}; + +int err = 0; +int pid = 0; + +#define DEFINE_ARRAY_WITH_KPTR(_size) \ + struct bin_data_##_size { \ + char data[_size - sizeof(void *)]; \ + }; \ + struct map_value_##_size { \ + struct bin_data_##_size __kptr * data; \ + /* To emit BTF info for bin_data_xx */ \ + struct bin_data_##_size not_used; \ + }; \ + struct { \ + __uint(type, BPF_MAP_TYPE_ARRAY); \ + __type(key, int); \ + __type(value, struct map_value_##_size); \ + __uint(max_entries, 128); \ + } array_##_size SEC(".maps"); + +static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int batch, + unsigned int idx) +{ + struct generic_map_value *value; + unsigned int i, key; + void *old, *new; + + for (i = 0; i < batch; i++) { + key = i; + value = bpf_map_lookup_elem(map, &key); + if (!value) { + err = 1; + return; + } + new = bpf_obj_new_impl(data_btf_ids[idx], NULL); + if (!new) { + err = 2; + return; + } + old = bpf_kptr_xchg(&value->data, new); + if (old) { + bpf_obj_drop(old); + err = 3; + return; + } + } + for (i = 0; i < batch; i++) { + key = i; + value = bpf_map_lookup_elem(map, &key); + if (!value) { + err = 4; + return; + } + old = bpf_kptr_xchg(&value->data, NULL); + if (!old) { + err = 5; + return; + } + bpf_obj_drop(old); + } +} + +#define CALL_BATCH_ALLOC_FREE(size, batch, idx) \ + batch_alloc_free((struct bpf_map *)(&array_##size), batch, idx) + +DEFINE_ARRAY_WITH_KPTR(8); +DEFINE_ARRAY_WITH_KPTR(16); +DEFINE_ARRAY_WITH_KPTR(32); +DEFINE_ARRAY_WITH_KPTR(64); +DEFINE_ARRAY_WITH_KPTR(96); +DEFINE_ARRAY_WITH_KPTR(128); +DEFINE_ARRAY_WITH_KPTR(192); +DEFINE_ARRAY_WITH_KPTR(256); +DEFINE_ARRAY_WITH_KPTR(512); +DEFINE_ARRAY_WITH_KPTR(1024); +DEFINE_ARRAY_WITH_KPTR(2048); +DEFINE_ARRAY_WITH_KPTR(4096); + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int test_bpf_mem_alloc_free(void *ctx) +{ + if ((u32)bpf_get_current_pid_tgid() != pid) + return 0; + + /* Alloc 128 8-bytes objects in batch to trigger refilling, + * then free 128 8-bytes objects in batch to trigger freeing. + */ + CALL_BATCH_ALLOC_FREE(8, 128, 0); + CALL_BATCH_ALLOC_FREE(16, 128, 1); + CALL_BATCH_ALLOC_FREE(32, 128, 2); + CALL_BATCH_ALLOC_FREE(64, 128, 3); + CALL_BATCH_ALLOC_FREE(96, 128, 4); + CALL_BATCH_ALLOC_FREE(128, 128, 5); + CALL_BATCH_ALLOC_FREE(192, 128, 6); + CALL_BATCH_ALLOC_FREE(256, 128, 7); + CALL_BATCH_ALLOC_FREE(512, 64, 8); + CALL_BATCH_ALLOC_FREE(1024, 32, 9); + CALL_BATCH_ALLOC_FREE(2048, 16, 10); + CALL_BATCH_ALLOC_FREE(4096, 8, 11); + + return 0; +}