From f36f3010f67611a45d66e773bc91e4c66a9abab5 Mon Sep 17 00:00:00 2001 From: Chunhai Guo Date: Tue, 2 Apr 2024 04:00:36 -0600 Subject: [PATCH] erofs: rename per-CPU buffers to global buffer pool and make it configurable It will cost more time if compressed buffers are allocated on demand for low-latency algorithms (like lz4) so EROFS uses per-CPU buffers to keep compressed data if in-place decompression is unfulfilled. While it is kind of wasteful of memory for a device with hundreds of CPUs, and only a small number of CPUs concurrently decompress most of the time. This patch renames it as 'global buffer pool' and makes it configurable. This allows two or more CPUs to share a common buffer to reduce memory occupation. Suggested-by: Gao Xiang Reviewed-by: Gao Xiang Signed-off-by: Chunhai Guo Link: https://lore.kernel.org/r/20240402100036.2673604-1-guochunhai@vivo.com Signed-off-by: Sandeep Dhavale Link: https://lore.kernel.org/r/20240408215231.3376659-1-dhavale@google.com Signed-off-by: Gao Xiang --- fs/erofs/Makefile | 2 +- fs/erofs/decompressor.c | 6 +- fs/erofs/internal.h | 14 ++-- fs/erofs/pcpubuf.c | 148 ---------------------------------------- fs/erofs/super.c | 9 ++- fs/erofs/zutil.c | 148 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 166 insertions(+), 161 deletions(-) delete mode 100644 fs/erofs/pcpubuf.c diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile index 845eafdcee4a1..20d1ec4224435 100644 --- a/fs/erofs/Makefile +++ b/fs/erofs/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_EROFS_FS) += erofs.o erofs-objs := super.o inode.o data.o namei.o dir.o sysfs.o erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o -erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o pcpubuf.o zutil.o +erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 2ec9b2bb628d6..e1239d8869844 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -54,7 +54,7 @@ static int z_erofs_load_lz4_config(struct super_block *sb, sbi->lz4.max_distance_pages = distance ? DIV_ROUND_UP(distance, PAGE_SIZE) + 1 : LZ4_MAX_DISTANCE_PAGES; - return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks); + return z_erofs_gbuf_growsize(sbi->lz4.max_pclusterblks); } /* @@ -159,7 +159,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, docopy: /* Or copy compressed data which can be overlapped to per-CPU buffer */ in = rq->in; - src = erofs_get_pcpubuf(ctx->inpages); + src = z_erofs_get_gbuf(ctx->inpages); if (!src) { DBG_BUGON(1); kunmap_local(inpage); @@ -260,7 +260,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, } else if (maptype == 1) { vm_unmap_ram(src, ctx->inpages); } else if (maptype == 2) { - erofs_put_pcpubuf(src); + z_erofs_put_gbuf(src); } else if (maptype != 3) { DBG_BUGON(1); return -EFAULT; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index d28ccfc0352b1..ee080d042ab34 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -463,11 +463,11 @@ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, struct erofs_workgroup *egrp); int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, int flags); -void *erofs_get_pcpubuf(unsigned int requiredpages); -void erofs_put_pcpubuf(void *ptr); -int erofs_pcpubuf_growsize(unsigned int nrpages); -void __init erofs_pcpubuf_init(void); -void erofs_pcpubuf_exit(void); +void *z_erofs_get_gbuf(unsigned int requiredpages); +void z_erofs_put_gbuf(void *ptr); +int z_erofs_gbuf_growsize(unsigned int nrpages); +int __init z_erofs_gbuf_init(void); +void z_erofs_gbuf_exit(void); int erofs_init_managed_cache(struct super_block *sb); int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb); #else @@ -477,8 +477,8 @@ static inline int erofs_init_shrinker(void) { return 0; } static inline void erofs_exit_shrinker(void) {} static inline int z_erofs_init_zip_subsystem(void) { return 0; } static inline void z_erofs_exit_zip_subsystem(void) {} -static inline void erofs_pcpubuf_init(void) {} -static inline void erofs_pcpubuf_exit(void) {} +static inline int z_erofs_gbuf_init(void) { return 0; } +static inline void z_erofs_gbuf_exit(void) {} static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; } #endif /* !CONFIG_EROFS_FS_ZIP */ diff --git a/fs/erofs/pcpubuf.c b/fs/erofs/pcpubuf.c deleted file mode 100644 index c7a4b1d77069d..0000000000000 --- a/fs/erofs/pcpubuf.c +++ /dev/null @@ -1,148 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) Gao Xiang - * - * For low-latency decompression algorithms (e.g. lz4), reserve consecutive - * per-CPU virtual memory (in pages) in advance to store such inplace I/O - * data if inplace decompression is failed (due to unmet inplace margin for - * example). - */ -#include "internal.h" - -struct erofs_pcpubuf { - raw_spinlock_t lock; - void *ptr; - struct page **pages; - unsigned int nrpages; -}; - -static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb); - -void *erofs_get_pcpubuf(unsigned int requiredpages) - __acquires(pcb->lock) -{ - struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb); - - raw_spin_lock(&pcb->lock); - /* check if the per-CPU buffer is too small */ - if (requiredpages > pcb->nrpages) { - raw_spin_unlock(&pcb->lock); - put_cpu_var(erofs_pcb); - /* (for sparse checker) pretend pcb->lock is still taken */ - __acquire(pcb->lock); - return NULL; - } - return pcb->ptr; -} - -void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock) -{ - struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id()); - - DBG_BUGON(pcb->ptr != ptr); - raw_spin_unlock(&pcb->lock); - put_cpu_var(erofs_pcb); -} - -/* the next step: support per-CPU page buffers hotplug */ -int erofs_pcpubuf_growsize(unsigned int nrpages) -{ - static DEFINE_MUTEX(pcb_resize_mutex); - static unsigned int pcb_nrpages; - struct page *pagepool = NULL; - int delta, cpu, ret, i; - - mutex_lock(&pcb_resize_mutex); - delta = nrpages - pcb_nrpages; - ret = 0; - /* avoid shrinking pcpubuf, since no idea how many fses rely on */ - if (delta <= 0) - goto out; - - for_each_possible_cpu(cpu) { - struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu); - struct page **pages, **oldpages; - void *ptr, *old_ptr; - - pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL); - if (!pages) { - ret = -ENOMEM; - break; - } - - for (i = 0; i < nrpages; ++i) { - pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL); - if (!pages[i]) { - ret = -ENOMEM; - oldpages = pages; - goto free_pagearray; - } - } - ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL); - if (!ptr) { - ret = -ENOMEM; - oldpages = pages; - goto free_pagearray; - } - raw_spin_lock(&pcb->lock); - old_ptr = pcb->ptr; - pcb->ptr = ptr; - oldpages = pcb->pages; - pcb->pages = pages; - i = pcb->nrpages; - pcb->nrpages = nrpages; - raw_spin_unlock(&pcb->lock); - - if (!oldpages) { - DBG_BUGON(old_ptr); - continue; - } - - if (old_ptr) - vunmap(old_ptr); -free_pagearray: - while (i) - erofs_pagepool_add(&pagepool, oldpages[--i]); - kfree(oldpages); - if (ret) - break; - } - pcb_nrpages = nrpages; - erofs_release_pages(&pagepool); -out: - mutex_unlock(&pcb_resize_mutex); - return ret; -} - -void __init erofs_pcpubuf_init(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu); - - raw_spin_lock_init(&pcb->lock); - } -} - -void erofs_pcpubuf_exit(void) -{ - int cpu, i; - - for_each_possible_cpu(cpu) { - struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu); - - if (pcb->ptr) { - vunmap(pcb->ptr); - pcb->ptr = NULL; - } - if (!pcb->pages) - continue; - - for (i = 0; i < pcb->nrpages; ++i) - if (pcb->pages[i]) - put_page(pcb->pages[i]); - kfree(pcb->pages); - pcb->pages = NULL; - } -} diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 30b49b2eee534..c1dae1fb949b2 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -859,7 +859,10 @@ static int __init erofs_module_init(void) if (err) goto deflate_err; - erofs_pcpubuf_init(); + err = z_erofs_gbuf_init(); + if (err) + goto gbuf_err; + err = z_erofs_init_zip_subsystem(); if (err) goto zip_err; @@ -879,6 +882,8 @@ static int __init erofs_module_init(void) sysfs_err: z_erofs_exit_zip_subsystem(); zip_err: + z_erofs_gbuf_exit(); +gbuf_err: z_erofs_deflate_exit(); deflate_err: z_erofs_lzma_exit(); @@ -902,7 +907,7 @@ static void __exit erofs_module_exit(void) z_erofs_lzma_exit(); erofs_exit_shrinker(); kmem_cache_destroy(erofs_inode_cachep); - erofs_pcpubuf_exit(); + z_erofs_gbuf_exit(); } static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c index 8cd30ac2091fe..2fa90b10b985c 100644 --- a/fs/erofs/zutil.c +++ b/fs/erofs/zutil.c @@ -5,6 +5,18 @@ */ #include "internal.h" +struct z_erofs_gbuf { + spinlock_t lock; + void *ptr; + struct page **pages; + unsigned int nrpages; +}; + +static struct z_erofs_gbuf *z_erofs_gbufpool; +static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages; + +module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444); + static atomic_long_t erofs_global_shrink_cnt; /* for all mounted instances */ /* protected by 'erofs_sb_list_lock' */ static unsigned int shrinker_run_no; @@ -14,6 +26,142 @@ static DEFINE_SPINLOCK(erofs_sb_list_lock); static LIST_HEAD(erofs_sb_list); static struct shrinker *erofs_shrinker_info; +static unsigned int z_erofs_gbuf_id(void) +{ + return raw_smp_processor_id() % z_erofs_gbuf_count; +} + +void *z_erofs_get_gbuf(unsigned int requiredpages) + __acquires(gbuf->lock) +{ + struct z_erofs_gbuf *gbuf; + + gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; + spin_lock(&gbuf->lock); + /* check if the buffer is too small */ + if (requiredpages > gbuf->nrpages) { + spin_unlock(&gbuf->lock); + /* (for sparse checker) pretend gbuf->lock is still taken */ + __acquire(gbuf->lock); + return NULL; + } + return gbuf->ptr; +} + +void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock) +{ + struct z_erofs_gbuf *gbuf; + + gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; + DBG_BUGON(gbuf->ptr != ptr); + spin_unlock(&gbuf->lock); +} + +int z_erofs_gbuf_growsize(unsigned int nrpages) +{ + static DEFINE_MUTEX(gbuf_resize_mutex); + struct page *pagepool = NULL; + int delta, ret, i, j; + + mutex_lock(&gbuf_resize_mutex); + delta = nrpages - z_erofs_gbuf_nrpages; + ret = 0; + /* avoid shrinking gbufs, since no idea how many fses rely on */ + if (delta <= 0) + goto out; + + for (i = 0; i < z_erofs_gbuf_count; ++i) { + struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i]; + struct page **pages, **tmp_pages; + void *ptr, *old_ptr = NULL; + + ret = -ENOMEM; + tmp_pages = kcalloc(nrpages, sizeof(*tmp_pages), GFP_KERNEL); + if (!tmp_pages) + break; + for (j = 0; j < nrpages; ++j) { + tmp_pages[j] = erofs_allocpage(&pagepool, GFP_KERNEL); + if (!tmp_pages[j]) + goto free_pagearray; + } + ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL); + if (!ptr) + goto free_pagearray; + + pages = tmp_pages; + spin_lock(&gbuf->lock); + old_ptr = gbuf->ptr; + gbuf->ptr = ptr; + tmp_pages = gbuf->pages; + gbuf->pages = pages; + j = gbuf->nrpages; + gbuf->nrpages = nrpages; + spin_unlock(&gbuf->lock); + ret = 0; + if (!tmp_pages) { + DBG_BUGON(old_ptr); + continue; + } + + if (old_ptr) + vunmap(old_ptr); +free_pagearray: + while (j) + erofs_pagepool_add(&pagepool, tmp_pages[--j]); + kfree(tmp_pages); + if (ret) + break; + } + z_erofs_gbuf_nrpages = nrpages; + erofs_release_pages(&pagepool); +out: + mutex_unlock(&gbuf_resize_mutex); + return ret; +} + +int __init z_erofs_gbuf_init(void) +{ + unsigned int i = num_possible_cpus(); + + if (!z_erofs_gbuf_count) + z_erofs_gbuf_count = i; + else + z_erofs_gbuf_count = min(z_erofs_gbuf_count, i); + + z_erofs_gbufpool = kcalloc(z_erofs_gbuf_count, + sizeof(*z_erofs_gbufpool), GFP_KERNEL); + if (!z_erofs_gbufpool) + return -ENOMEM; + + for (i = 0; i < z_erofs_gbuf_count; ++i) + spin_lock_init(&z_erofs_gbufpool[i].lock); + return 0; +} + +void z_erofs_gbuf_exit(void) +{ + int i; + + for (i = 0; i < z_erofs_gbuf_count; ++i) { + struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i]; + + if (gbuf->ptr) { + vunmap(gbuf->ptr); + gbuf->ptr = NULL; + } + + if (!gbuf->pages) + continue; + + for (i = 0; i < gbuf->nrpages; ++i) + if (gbuf->pages[i]) + put_page(gbuf->pages[i]); + kfree(gbuf->pages); + gbuf->pages = NULL; + } + kfree(z_erofs_gbufpool); +} + struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp) { struct page *page = *pagepool;