linux-stable-mirror/mm/cma_debug.c
Frank van der Linden c009da4258 mm, cma: support multiple contiguous ranges, if requested
Currently, CMA manages one range of physically contiguous memory. 
Creation of larger CMA areas with hugetlb_cma may run in to gaps in
physical memory, so that they are not able to allocate that contiguous
physical range from memblock when creating the CMA area.

This can happen, for example, on an AMD system with > 1TB of memory, where
there will be a gap just below the 1TB (40bit DMA) line.  If you have set
aside most of memory for potential hugetlb CMA allocation,
cma_declare_contiguous_nid will fail.

hugetlb_cma doesn't need the entire area to be one physically contiguous
range.  It just cares about being able to get physically contiguous chunks
of a certain size (e.g.  1G), and it is fine to have the CMA area backed
by multiple physical ranges, as long as it gets 1G contiguous allocations.

Multi-range support is implemented by introducing an array of ranges,
instead of just one big one.  Each range has its own bitmap.  Effectively,
the allocate and release operations work as before, just per-range.  So,
instead of going through one large bitmap, they now go through a number of
smaller ones.

The maximum number of supported ranges is 8, as defined in CMA_MAX_RANGES.

Since some current users of CMA expect a CMA area to just use one
physically contiguous range, only allow for multiple ranges if a new
interface, cma_declare_contiguous_nid_multi, is used.  The other
interfaces will work like before, creating only CMA areas with 1 range.

cma_declare_contiguous_nid_multi works as follows, mimicking the
default "bottom-up, above 4G" reservation approach:

0) Try cma_declare_contiguous_nid, which will use only one
   region. If this succeeds, return. This makes sure that for
   all the cases that currently work, the behavior remains
   unchanged even if the caller switches from
   cma_declare_contiguous_nid to cma_declare_contiguous_nid_multi.
1) Select the largest free memblock ranges above 4G, with
   a maximum number of CMA_MAX_RANGES.
2) If we did not find at most CMA_MAX_RANGES that add
   up to the total size requested, return -ENOMEM.
3) Sort the selected ranges by base address.
4) Reserve them bottom-up until we get what we wanted.

Link: https://lkml.kernel.org/r/20250228182928.2645936-3-fvdl@google.com
Signed-off-by: Frank van der Linden <fvdl@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin (Cruise) <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-03-16 22:06:25 -07:00

221 lines
5.0 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* CMA DebugFS Interface
*
* Copyright (c) 2015 Sasha Levin <sasha.levin@oracle.com>
*/
#include <linux/debugfs.h>
#include <linux/cma.h>
#include <linux/list.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm_types.h>
#include "cma.h"
struct cma_mem {
struct hlist_node node;
struct page *p;
unsigned long n;
};
static int cma_debugfs_get(void *data, u64 *val)
{
unsigned long *p = data;
*val = *p;
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(cma_debugfs_fops, cma_debugfs_get, NULL, "%llu\n");
static int cma_used_get(void *data, u64 *val)
{
struct cma *cma = data;
spin_lock_irq(&cma->lock);
*val = cma->count - cma->available_count;
spin_unlock_irq(&cma->lock);
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(cma_used_fops, cma_used_get, NULL, "%llu\n");
static int cma_maxchunk_get(void *data, u64 *val)
{
struct cma *cma = data;
struct cma_memrange *cmr;
unsigned long maxchunk = 0;
unsigned long start, end;
unsigned long bitmap_maxno;
int r;
spin_lock_irq(&cma->lock);
for (r = 0; r < cma->nranges; r++) {
cmr = &cma->ranges[r];
bitmap_maxno = cma_bitmap_maxno(cma, cmr);
end = 0;
for (;;) {
start = find_next_zero_bit(cmr->bitmap,
bitmap_maxno, end);
if (start >= bitmap_maxno)
break;
end = find_next_bit(cmr->bitmap, bitmap_maxno,
start);
maxchunk = max(end - start, maxchunk);
}
}
spin_unlock_irq(&cma->lock);
*val = (u64)maxchunk << cma->order_per_bit;
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(cma_maxchunk_fops, cma_maxchunk_get, NULL, "%llu\n");
static void cma_add_to_cma_mem_list(struct cma *cma, struct cma_mem *mem)
{
spin_lock(&cma->mem_head_lock);
hlist_add_head(&mem->node, &cma->mem_head);
spin_unlock(&cma->mem_head_lock);
}
static struct cma_mem *cma_get_entry_from_list(struct cma *cma)
{
struct cma_mem *mem = NULL;
spin_lock(&cma->mem_head_lock);
if (!hlist_empty(&cma->mem_head)) {
mem = hlist_entry(cma->mem_head.first, struct cma_mem, node);
hlist_del_init(&mem->node);
}
spin_unlock(&cma->mem_head_lock);
return mem;
}
static int cma_free_mem(struct cma *cma, int count)
{
struct cma_mem *mem = NULL;
while (count) {
mem = cma_get_entry_from_list(cma);
if (mem == NULL)
return 0;
if (mem->n <= count) {
cma_release(cma, mem->p, mem->n);
count -= mem->n;
kfree(mem);
} else if (cma->order_per_bit == 0) {
cma_release(cma, mem->p, count);
mem->p += count;
mem->n -= count;
count = 0;
cma_add_to_cma_mem_list(cma, mem);
} else {
pr_debug("cma: cannot release partial block when order_per_bit != 0\n");
cma_add_to_cma_mem_list(cma, mem);
break;
}
}
return 0;
}
static int cma_free_write(void *data, u64 val)
{
int pages = val;
struct cma *cma = data;
return cma_free_mem(cma, pages);
}
DEFINE_DEBUGFS_ATTRIBUTE(cma_free_fops, NULL, cma_free_write, "%llu\n");
static int cma_alloc_mem(struct cma *cma, int count)
{
struct cma_mem *mem;
struct page *p;
mem = kzalloc(sizeof(*mem), GFP_KERNEL);
if (!mem)
return -ENOMEM;
p = cma_alloc(cma, count, 0, false);
if (!p) {
kfree(mem);
return -ENOMEM;
}
mem->p = p;
mem->n = count;
cma_add_to_cma_mem_list(cma, mem);
return 0;
}
static int cma_alloc_write(void *data, u64 val)
{
int pages = val;
struct cma *cma = data;
return cma_alloc_mem(cma, pages);
}
DEFINE_DEBUGFS_ATTRIBUTE(cma_alloc_fops, NULL, cma_alloc_write, "%llu\n");
static void cma_debugfs_add_one(struct cma *cma, struct dentry *root_dentry)
{
struct dentry *tmp, *dir, *rangedir;
int r;
char rdirname[12];
struct cma_memrange *cmr;
tmp = debugfs_create_dir(cma->name, root_dentry);
debugfs_create_file("alloc", 0200, tmp, cma, &cma_alloc_fops);
debugfs_create_file("free", 0200, tmp, cma, &cma_free_fops);
debugfs_create_file("count", 0444, tmp, &cma->count, &cma_debugfs_fops);
debugfs_create_file("order_per_bit", 0444, tmp,
&cma->order_per_bit, &cma_debugfs_fops);
debugfs_create_file("used", 0444, tmp, cma, &cma_used_fops);
debugfs_create_file("maxchunk", 0444, tmp, cma, &cma_maxchunk_fops);
rangedir = debugfs_create_dir("ranges", tmp);
for (r = 0; r < cma->nranges; r++) {
cmr = &cma->ranges[r];
snprintf(rdirname, sizeof(rdirname), "%d", r);
dir = debugfs_create_dir(rdirname, rangedir);
debugfs_create_file("base_pfn", 0444, dir,
&cmr->base_pfn, &cma_debugfs_fops);
cmr->dfs_bitmap.array = (u32 *)cmr->bitmap;
cmr->dfs_bitmap.n_elements =
DIV_ROUND_UP(cma_bitmap_maxno(cma, cmr),
BITS_PER_BYTE * sizeof(u32));
debugfs_create_u32_array("bitmap", 0444, dir,
&cmr->dfs_bitmap);
}
/*
* Backward compatible symlinks to range 0 for base_pfn and bitmap.
*/
debugfs_create_symlink("base_pfn", tmp, "ranges/0/base_pfn");
debugfs_create_symlink("bitmap", tmp, "ranges/0/bitmap");
}
static int __init cma_debugfs_init(void)
{
struct dentry *cma_debugfs_root;
int i;
cma_debugfs_root = debugfs_create_dir("cma", NULL);
for (i = 0; i < cma_area_count; i++)
cma_debugfs_add_one(&cma_areas[i], cma_debugfs_root);
return 0;
}
late_initcall(cma_debugfs_init);