* Move vm_table members out of kernel/sysctl.c
 
   All vm_table array members have moved to their respective subsystems leading
   to the removal of vm_table from kernel/sysctl.c. This increases modularity by
   placing the ctl_tables closer to where they are actually used and at the same
   time reducing the chances of merge conflicts in kernel/sysctl.c.
 
 * ctl_table range fixes
 
   Replace the proc_handler function that checks variable ranges in
   coredump_sysctls and vdso_table with the one that actually uses the extra{1,2}
   pointers as min/max values. This tightens the range of the values that users
   can pass into the kernel effectively preventing {under,over}flows.
 
 * Misc fixes
 
   Correct grammar errors and typos in test messages. Update sysctl files in
   MAINTAINERS. Constified and removed array size in declaration for
   alignment_tbl
 
 * Testing
 
   - These have all been in linux-next for at least 1 month
   - They have gone through 0-day
   - Ran all these through sysctl selftests in x86_64
 -----BEGIN PGP SIGNATURE-----
 
 iQGzBAABCgAdFiEErkcJVyXmMSXOyyeQupfNUreWQU8FAmfhV8EACgkQupfNUreW
 QU/udAv/VCXGkndQsJ5biXpXYFnokX0gIEaYzzHiqrFycZqr8ys0/wWzc+ar1LjF
 Jvanl2uKB0mUviLKt7Gk0+Hri+PJlYIrbx+5K5eo2wsKUUxFykqLLm59y/orPODl
 gyPQjKNpHJb7COsnEc3Lrq/fvol4NPHlcBPXG8NwehccTeBHZ1ninfo+pSnxh3o8
 kI3GSLLxD4K9AgBl5QuVWH4gU7o//u7lUkKzy03NW+2jmuRv3dRcYF7IdgMINNee
 AeXnygdSBxLzECBvmkfNdyg+AmL8hdsmzbsIh7UuJDvxLlQOInVLZa+sXBotCOIc
 TImCrr1Ws1OuGrD0kpH+21tJvc8pNFWt61QlulObQdrLndWHdZEGyGOusLpXTwbn
 jIWZmMvzk1foSwdgzwPFzUqPEpW3FrBVDo4Z4kenBDrCp56QTX7hGRvkNYJNKvot
 Ue+i8BeHR/Gm/p+UMqgsSTOaNJXTqZhFqwJQVzxU/9LN/vkS0On6fbjgBd5X6Pn+
 a5dlc9gy
 =0bcX
 -----END PGP SIGNATURE-----

Merge tag 'sysctl-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl

Pull sysctl updates from Joel Granados:

 - Move vm_table members out of kernel/sysctl.c

   All vm_table array members have moved to their respective subsystems
   leading to the removal of vm_table from kernel/sysctl.c. This
   increases modularity by placing the ctl_tables closer to where they
   are actually used and at the same time reducing the chances of merge
   conflicts in kernel/sysctl.c.

 - ctl_table range fixes

   Replace the proc_handler function that checks variable ranges in
   coredump_sysctls and vdso_table with the one that actually uses the
   extra{1,2} pointers as min/max values. This tightens the range of the
   values that users can pass into the kernel effectively preventing
   {under,over}flows.

 - Misc fixes

   Correct grammar errors and typos in test messages. Update sysctl
   files in MAINTAINERS. Constified and removed array size in
   declaration for alignment_tbl

* tag 'sysctl-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl: (22 commits)
  selftests/sysctl: fix wording of help messages
  selftests: fix spelling/grammar errors in sysctl/sysctl.sh
  MAINTAINERS: Update sysctl file list in MAINTAINERS
  sysctl: Fix underflow value setting risk in vm_table
  coredump: Fixes core_pipe_limit sysctl proc_handler
  sysctl: remove unneeded include
  sysctl: remove the vm_table
  sh: vdso: move the sysctl to arch/sh/kernel/vsyscall/vsyscall.c
  x86: vdso: move the sysctl to arch/x86/entry/vdso/vdso32-setup.c
  fs: dcache: move the sysctl to fs/dcache.c
  sunrpc: simplify rpcauth_cache_shrink_count()
  fs: drop_caches: move sysctl to fs/drop_caches.c
  fs: fs-writeback: move sysctl to fs/fs-writeback.c
  mm: nommu: move sysctl to mm/nommu.c
  security: min_addr: move sysctl to security/min_addr.c
  mm: mmap: move sysctl to mm/mmap.c
  mm: util: move sysctls to mm/util.c
  mm: vmscan: move vmscan sysctls to mm/vmscan.c
  mm: swap: move sysctl to mm/swap.c
  mm: filemap: move sysctl to mm/filemap.c
  ...
This commit is contained in:
Linus Torvalds 2025-03-26 21:02:05 -07:00
commit 592329e5e9
27 changed files with 350 additions and 322 deletions

View File

@ -19081,9 +19081,10 @@ S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl.git sysctl-next
F: fs/proc/proc_sysctl.c
F: include/linux/sysctl.h
F: kernel/sysctl-test.c
F: kernel/sysctl.c
F: tools/testing/selftests/sysctl/
F: kernel/sysctl*
F: tools/testing/selftests/sysctl/*
F: lib/test_sysctl.c
F: scripts/check-sysctl-docs
PS3 NETWORK SUPPORT
M: Geoff Levand <geoff@infradead.org>

View File

@ -300,7 +300,7 @@ bad_area:
force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr);
}
static struct ctl_table alignment_tbl[5] = {
static const struct ctl_table alignment_tbl[] = {
{
.procname = "kernel_enable",
.data = &align_kern_enable,

View File

@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/elf.h>
#include <linux/sched.h>
#include <linux/sysctl.h>
#include <linux/err.h>
/*
@ -30,6 +31,18 @@ static int __init vdso_setup(char *s)
}
__setup("vdso=", vdso_setup);
static const struct ctl_table vdso_table[] = {
{
.procname = "vdso_enabled",
.data = &vdso_enabled,
.maxlen = sizeof(vdso_enabled),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
};
/*
* These symbols are defined by vsyscall.o to mark the bounds
* of the ELF DSO images included therein.
@ -58,6 +71,14 @@ int __init vsyscall_init(void)
return 0;
}
static int __init vm_sysctl_init(void)
{
register_sysctl_init("vm", vdso_table);
return 0;
}
fs_initcall(vm_sysctl_init);
/* Setup a VMA at program startup for the vsyscall page */
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{

View File

@ -51,15 +51,17 @@ __setup("vdso32=", vdso32_setup);
__setup_param("vdso=", vdso_setup, vdso32_setup, 0);
#endif
#ifdef CONFIG_X86_64
#ifdef CONFIG_SYSCTL
/* Register vsyscall32 into the ABI table */
#include <linux/sysctl.h>
static const struct ctl_table abi_table2[] = {
static const struct ctl_table vdso_table[] = {
{
#ifdef CONFIG_X86_64
.procname = "vsyscall32",
#else
.procname = "vdso_enabled",
#endif
.data = &vdso32_enabled,
.maxlen = sizeof(int),
.mode = 0644,
@ -71,10 +73,14 @@ static const struct ctl_table abi_table2[] = {
static __init int ia32_binfmt_init(void)
{
register_sysctl("abi", abi_table2);
#ifdef CONFIG_X86_64
/* Register vsyscall32 into the ABI table */
register_sysctl("abi", vdso_table);
#else
register_sysctl_init("vm", vdso_table);
#endif
return 0;
}
__initcall(ia32_binfmt_init);
#endif /* CONFIG_SYSCTL */
#endif /* CONFIG_X86_64 */

View File

@ -1042,7 +1042,9 @@ static const struct ctl_table coredump_sysctls[] = {
.data = &core_pipe_limit,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
{
.procname = "core_file_note_size_limit",

View File

@ -73,8 +73,13 @@
* If no ancestor relationship:
* arbitrary, since it's serialized on rename_lock
*/
int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
static int sysctl_vfs_cache_pressure __read_mostly = 100;
unsigned long vfs_pressure_ratio(unsigned long val)
{
return mult_frac(val, sysctl_vfs_cache_pressure, 100);
}
EXPORT_SYMBOL_GPL(vfs_pressure_ratio);
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
@ -211,8 +216,20 @@ static const struct ctl_table fs_dcache_sysctls[] = {
},
};
static const struct ctl_table vm_dcache_sysctls[] = {
{
.procname = "vfs_cache_pressure",
.data = &sysctl_vfs_cache_pressure,
.maxlen = sizeof(sysctl_vfs_cache_pressure),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
};
static int __init init_fs_dcache_sysctls(void)
{
register_sysctl_init("vm", vm_dcache_sysctls);
register_sysctl_init("fs", fs_dcache_sysctls);
return 0;
}

View File

@ -14,7 +14,7 @@
#include "internal.h"
/* A global variable is a bit ugly, but it keeps the code simple */
int sysctl_drop_caches;
static int sysctl_drop_caches;
static void drop_pagecache_sb(struct super_block *sb, void *unused)
{
@ -48,7 +48,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
iput(toput_inode);
}
int drop_caches_sysctl_handler(const struct ctl_table *table, int write,
static int drop_caches_sysctl_handler(const struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int ret;
@ -77,3 +77,22 @@ int drop_caches_sysctl_handler(const struct ctl_table *table, int write,
}
return 0;
}
static const struct ctl_table drop_caches_table[] = {
{
.procname = "drop_caches",
.data = &sysctl_drop_caches,
.maxlen = sizeof(int),
.mode = 0200,
.proc_handler = drop_caches_sysctl_handler,
.extra1 = SYSCTL_ONE,
.extra2 = SYSCTL_FOUR,
},
};
static int __init init_vm_drop_caches_sysctls(void)
{
register_sysctl_init("vm", drop_caches_table);
return 0;
}
fs_initcall(init_vm_drop_caches_sysctls);

View File

@ -65,7 +65,7 @@ struct wb_writeback_work {
* timestamps written to disk after 12 hours, but in the worst case a
* few inodes might not their timestamps updated for 24 hours.
*/
unsigned int dirtytime_expire_interval = 12 * 60 * 60;
static unsigned int dirtytime_expire_interval = 12 * 60 * 60;
static inline struct inode *wb_inode(struct list_head *head)
{
@ -2435,14 +2435,7 @@ static void wakeup_dirtytime_writeback(struct work_struct *w)
schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
}
static int __init start_dirtytime_writeback(void)
{
schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
return 0;
}
__initcall(start_dirtytime_writeback);
int dirtytime_interval_handler(const struct ctl_table *table, int write,
static int dirtytime_interval_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@ -2453,6 +2446,25 @@ int dirtytime_interval_handler(const struct ctl_table *table, int write,
return ret;
}
static const struct ctl_table vm_fs_writeback_table[] = {
{
.procname = "dirtytime_expire_seconds",
.data = &dirtytime_expire_interval,
.maxlen = sizeof(dirtytime_expire_interval),
.mode = 0644,
.proc_handler = dirtytime_interval_handler,
.extra1 = SYSCTL_ZERO,
},
};
static int __init start_dirtytime_writeback(void)
{
schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
register_sysctl_init("vm", vm_fs_writeback_table);
return 0;
}
__initcall(start_dirtytime_writeback);
/**
* __mark_inode_dirty - internal function to mark an inode dirty
*

View File

@ -519,12 +519,7 @@ static inline int simple_positive(const struct dentry *dentry)
return d_really_is_positive(dentry) && !d_unhashed(dentry);
}
extern int sysctl_vfs_cache_pressure;
static inline unsigned long vfs_pressure_ratio(unsigned long val)
{
return mult_frac(val, sysctl_vfs_cache_pressure, 100);
}
unsigned long vfs_pressure_ratio(unsigned long val);
/**
* d_inode - Get the actual inode of this dentry

View File

@ -40,8 +40,6 @@ struct user_struct;
struct pt_regs;
struct folio_batch;
extern int sysctl_page_lock_unfairness;
void mm_core_init(void);
void init_mm_internals(void);
@ -78,8 +76,6 @@ static inline void totalram_pages_add(long count)
}
extern void * high_memory;
extern int page_cluster;
extern const int page_cluster_max;
#ifdef CONFIG_SYSCTL
extern int sysctl_legacy_va_layout;
@ -209,17 +205,6 @@ extern int sysctl_max_map_count;
extern unsigned long sysctl_user_reserve_kbytes;
extern unsigned long sysctl_admin_reserve_kbytes;
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern unsigned long sysctl_overcommit_kbytes;
int overcommit_ratio_handler(const struct ctl_table *, int, void *, size_t *,
loff_t *);
int overcommit_kbytes_handler(const struct ctl_table *, int, void *, size_t *,
loff_t *);
int overcommit_policy_handler(const struct ctl_table *, int, void *, size_t *,
loff_t *);
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
#define folio_page_idx(folio, p) (page_to_pfn(p) - folio_pfn(folio))
@ -3809,12 +3794,6 @@ static inline int in_gate_area(struct mm_struct *mm, unsigned long addr)
extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm);
#ifdef CONFIG_SYSCTL
extern int sysctl_drop_caches;
int drop_caches_sysctl_handler(const struct ctl_table *, int, void *, size_t *,
loff_t *);
#endif
void drop_slab(void);
#ifndef CONFIG_MMU
@ -4086,8 +4065,6 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,
pgoff_t first_index, pgoff_t nr);
#endif
extern int sysctl_nr_trim_pages;
#ifdef CONFIG_ANON_VMA_NAME
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
unsigned long len_in,

View File

@ -59,8 +59,6 @@
| MAP_HUGE_1GB)
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern unsigned long sysctl_overcommit_kbytes;
extern struct percpu_counter vm_committed_as;
#ifdef CONFIG_SMP

View File

@ -433,19 +433,10 @@ extern int vm_swappiness;
long remove_mapping(struct address_space *mapping, struct folio *folio);
#ifdef CONFIG_NUMA
extern int node_reclaim_mode;
extern int sysctl_min_unmapped_ratio;
extern int sysctl_min_slab_ratio;
#else
#define node_reclaim_mode 0
#endif
static inline bool node_reclaim_enabled(void)
{
/* Is any node_reclaim_mode bit set? */
return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP);
}
void check_move_unevictable_folios(struct folio_batch *fbatch);
extern void __meminit kswapd_run(int nid);

View File

@ -10,15 +10,8 @@
#include <linux/static_key.h>
#include <linux/mmdebug.h>
extern int sysctl_stat_interval;
#ifdef CONFIG_NUMA
#define ENABLE_NUMA_STAT 1
#define DISABLE_NUMA_STAT 0
extern int sysctl_vm_numa_stat;
DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key);
int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos);
#endif
struct reclaim_stat {
@ -304,10 +297,6 @@ void quiet_vmstat(void);
void cpu_vm_stats_fold(int cpu);
void refresh_zone_stat_thresholds(void);
struct ctl_table;
int vmstat_refresh(const struct ctl_table *, int write, void *buffer, size_t *lenp,
loff_t *ppos);
void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *);
int calculate_pressure_threshold(struct zone *zone);

View File

@ -327,12 +327,8 @@ extern struct wb_domain global_wb_domain;
/* These are exported to sysctl. */
extern unsigned int dirty_writeback_interval;
extern unsigned int dirty_expire_interval;
extern unsigned int dirtytime_expire_interval;
extern int laptop_mode;
int dirtytime_interval_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
unsigned long cgwb_calc_thresh(struct bdi_writeback *wb);

View File

@ -20,9 +20,6 @@
*/
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/bitmap.h>
#include <linux/signal.h>
@ -31,7 +28,6 @@
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/ctype.h>
#include <linux/kmemleak.h>
#include <linux/filter.h>
#include <linux/fs.h>
#include <linux/init.h>
@ -42,25 +38,20 @@
#include <linux/highuid.h>
#include <linux/writeback.h>
#include <linux/ratelimit.h>
#include <linux/hugetlb.h>
#include <linux/initrd.h>
#include <linux/key.h>
#include <linux/times.h>
#include <linux/limits.h>
#include <linux/dcache.h>
#include <linux/syscalls.h>
#include <linux/vmstat.h>
#include <linux/nfs_fs.h>
#include <linux/acpi.h>
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/oom.h>
#include <linux/kmod.h>
#include <linux/capability.h>
#include <linux/binfmts.h>
#include <linux/sched/sysctl.h>
#include <linux/mount.h>
#include <linux/userfaultfd_k.h>
#include <linux/pid.h>
#include "../lib/kstrtox.h"
@ -122,12 +113,6 @@ enum sysctl_writes_mode {
static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
#endif /* CONFIG_PROC_SYSCTL */
#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
int sysctl_legacy_va_layout;
#endif
#endif /* CONFIG_SYSCTL */
/*
@ -1901,215 +1886,9 @@ static const struct ctl_table kern_table[] = {
#endif
};
static const struct ctl_table vm_table[] = {
{
.procname = "overcommit_memory",
.data = &sysctl_overcommit_memory,
.maxlen = sizeof(sysctl_overcommit_memory),
.mode = 0644,
.proc_handler = overcommit_policy_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
{
.procname = "overcommit_ratio",
.data = &sysctl_overcommit_ratio,
.maxlen = sizeof(sysctl_overcommit_ratio),
.mode = 0644,
.proc_handler = overcommit_ratio_handler,
},
{
.procname = "overcommit_kbytes",
.data = &sysctl_overcommit_kbytes,
.maxlen = sizeof(sysctl_overcommit_kbytes),
.mode = 0644,
.proc_handler = overcommit_kbytes_handler,
},
{
.procname = "page-cluster",
.data = &page_cluster,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = (void *)&page_cluster_max,
},
{
.procname = "dirtytime_expire_seconds",
.data = &dirtytime_expire_interval,
.maxlen = sizeof(dirtytime_expire_interval),
.mode = 0644,
.proc_handler = dirtytime_interval_handler,
.extra1 = SYSCTL_ZERO,
},
{
.procname = "swappiness",
.data = &vm_swappiness,
.maxlen = sizeof(vm_swappiness),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO_HUNDRED,
},
#ifdef CONFIG_NUMA
{
.procname = "numa_stat",
.data = &sysctl_vm_numa_stat,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = sysctl_vm_numa_stat_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#endif
{
.procname = "drop_caches",
.data = &sysctl_drop_caches,
.maxlen = sizeof(int),
.mode = 0200,
.proc_handler = drop_caches_sysctl_handler,
.extra1 = SYSCTL_ONE,
.extra2 = SYSCTL_FOUR,
},
{
.procname = "page_lock_unfairness",
.data = &sysctl_page_lock_unfairness,
.maxlen = sizeof(sysctl_page_lock_unfairness),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
#ifdef CONFIG_MMU
{
.procname = "max_map_count",
.data = &sysctl_max_map_count,
.maxlen = sizeof(sysctl_max_map_count),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
#else
{
.procname = "nr_trim_pages",
.data = &sysctl_nr_trim_pages,
.maxlen = sizeof(sysctl_nr_trim_pages),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
#endif
{
.procname = "vfs_cache_pressure",
.data = &sysctl_vfs_cache_pressure,
.maxlen = sizeof(sysctl_vfs_cache_pressure),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
{
.procname = "legacy_va_layout",
.data = &sysctl_legacy_va_layout,
.maxlen = sizeof(sysctl_legacy_va_layout),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
#endif
#ifdef CONFIG_NUMA
{
.procname = "zone_reclaim_mode",
.data = &node_reclaim_mode,
.maxlen = sizeof(node_reclaim_mode),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
#endif
#ifdef CONFIG_SMP
{
.procname = "stat_interval",
.data = &sysctl_stat_interval,
.maxlen = sizeof(sysctl_stat_interval),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "stat_refresh",
.data = NULL,
.maxlen = 0,
.mode = 0600,
.proc_handler = vmstat_refresh,
},
#endif
#ifdef CONFIG_MMU
{
.procname = "mmap_min_addr",
.data = &dac_mmap_min_addr,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = mmap_min_addr_handler,
},
#endif
#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
(defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
{
.procname = "vdso_enabled",
#ifdef CONFIG_X86_32
.data = &vdso32_enabled,
.maxlen = sizeof(vdso32_enabled),
#else
.data = &vdso_enabled,
.maxlen = sizeof(vdso_enabled),
#endif
.mode = 0644,
.proc_handler = proc_dointvec,
.extra1 = SYSCTL_ZERO,
},
#endif
{
.procname = "user_reserve_kbytes",
.data = &sysctl_user_reserve_kbytes,
.maxlen = sizeof(sysctl_user_reserve_kbytes),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "admin_reserve_kbytes",
.data = &sysctl_admin_reserve_kbytes,
.maxlen = sizeof(sysctl_admin_reserve_kbytes),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
{
.procname = "mmap_rnd_bits",
.data = &mmap_rnd_bits,
.maxlen = sizeof(mmap_rnd_bits),
.mode = 0600,
.proc_handler = proc_dointvec_minmax,
.extra1 = (void *)&mmap_rnd_bits_min,
.extra2 = (void *)&mmap_rnd_bits_max,
},
#endif
#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
{
.procname = "mmap_rnd_compat_bits",
.data = &mmap_rnd_compat_bits,
.maxlen = sizeof(mmap_rnd_compat_bits),
.mode = 0600,
.proc_handler = proc_dointvec_minmax,
.extra1 = (void *)&mmap_rnd_compat_bits_min,
.extra2 = (void *)&mmap_rnd_compat_bits_max,
},
#endif
};
int __init sysctl_init_bases(void)
{
register_sysctl_init("kernel", kern_table);
register_sysctl_init("vm", vm_table);
return 0;
}

View File

@ -47,6 +47,7 @@
#include <linux/splice.h>
#include <linux/rcupdate_wait.h>
#include <linux/sched/mm.h>
#include <linux/sysctl.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
@ -1077,6 +1078,19 @@ static wait_queue_head_t *folio_waitqueue(struct folio *folio)
return &folio_wait_table[hash_ptr(folio, PAGE_WAIT_TABLE_BITS)];
}
/* How many times do we accept lock stealing from under a waiter? */
static int sysctl_page_lock_unfairness = 5;
static const struct ctl_table filemap_sysctl_table[] = {
{
.procname = "page_lock_unfairness",
.data = &sysctl_page_lock_unfairness,
.maxlen = sizeof(sysctl_page_lock_unfairness),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
}
};
void __init pagecache_init(void)
{
int i;
@ -1085,6 +1099,7 @@ void __init pagecache_init(void)
init_waitqueue_head(&folio_wait_table[i]);
page_writeback_init();
register_sysctl_init("vm", filemap_sysctl_table);
}
/*
@ -1232,9 +1247,6 @@ static inline bool folio_trylock_flag(struct folio *folio, int bit_nr,
return true;
}
/* How many times do we accept lock stealing from under a waiter? */
int sysctl_page_lock_unfairness = 5;
static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
int state, enum behavior behavior)
{

View File

@ -1097,9 +1097,13 @@ static inline void mminit_verify_zonelist(void)
#define NODE_RECLAIM_SUCCESS 1
#ifdef CONFIG_NUMA
extern int node_reclaim_mode;
extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
extern int find_next_best_node(int node, nodemask_t *used_node_mask);
#else
#define node_reclaim_mode 0
static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
unsigned int order)
{
@ -1111,6 +1115,12 @@ static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
}
#endif
static inline bool node_reclaim_enabled(void)
{
/* Is any node_reclaim_mode bit set? */
return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP);
}
/*
* mm/memory-failure.c
*/

View File

@ -1543,6 +1543,57 @@ struct vm_area_struct *_install_special_mapping(
&special_mapping_vmops);
}
#ifdef CONFIG_SYSCTL
#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
int sysctl_legacy_va_layout;
#endif
static const struct ctl_table mmap_table[] = {
{
.procname = "max_map_count",
.data = &sysctl_max_map_count,
.maxlen = sizeof(sysctl_max_map_count),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
{
.procname = "legacy_va_layout",
.data = &sysctl_legacy_va_layout,
.maxlen = sizeof(sysctl_legacy_va_layout),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
#endif
#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
{
.procname = "mmap_rnd_bits",
.data = &mmap_rnd_bits,
.maxlen = sizeof(mmap_rnd_bits),
.mode = 0600,
.proc_handler = proc_dointvec_minmax,
.extra1 = (void *)&mmap_rnd_bits_min,
.extra2 = (void *)&mmap_rnd_bits_max,
},
#endif
#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
{
.procname = "mmap_rnd_compat_bits",
.data = &mmap_rnd_compat_bits,
.maxlen = sizeof(mmap_rnd_compat_bits),
.mode = 0600,
.proc_handler = proc_dointvec_minmax,
.extra1 = (void *)&mmap_rnd_compat_bits_min,
.extra2 = (void *)&mmap_rnd_compat_bits_max,
},
#endif
};
#endif /* CONFIG_SYSCTL */
/*
* initialise the percpu counter for VM
*/
@ -1552,6 +1603,9 @@ void __init mmap_init(void)
ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
VM_BUG_ON(ret);
#ifdef CONFIG_SYSCTL
register_sysctl_init("vm", mmap_table);
#endif
}
/*

View File

@ -48,7 +48,6 @@ struct page *mem_map;
unsigned long max_mapnr;
EXPORT_SYMBOL(max_mapnr);
unsigned long highest_memmap_pfn;
int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
int heap_stack_gap = 0;
atomic_long_t mmap_pages_allocated;
@ -392,6 +391,19 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
return mm->brk = brk;
}
static int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
static const struct ctl_table nommu_table[] = {
{
.procname = "nr_trim_pages",
.data = &sysctl_nr_trim_pages,
.maxlen = sizeof(sysctl_nr_trim_pages),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
};
/*
* initialise the percpu counter for VM and region record slabs
*/
@ -402,6 +414,7 @@ void __init mmap_init(void)
ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
VM_BUG_ON(ret);
vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT);
register_sysctl_init("vm", nommu_table);
}
/*

View File

@ -45,7 +45,7 @@
/* How many pages do we try to swap or page in/out together? As a power of 2 */
int page_cluster;
const int page_cluster_max = 31;
static const int page_cluster_max = 31;
struct cpu_fbatches {
/*
@ -1076,6 +1076,18 @@ void folio_batch_remove_exceptionals(struct folio_batch *fbatch)
fbatch->nr = j;
}
static const struct ctl_table swap_sysctl_table[] = {
{
.procname = "page-cluster",
.data = &page_cluster,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = (void *)&page_cluster_max,
}
};
/*
* Perform any setup for the swap system
*/
@ -1092,4 +1104,6 @@ void __init swap_setup(void)
* Right now other parts of the system means that we
* _really_ don't want to cluster much more
*/
register_sysctl_init("vm", swap_sysctl_table);
}

View File

@ -3,6 +3,7 @@
#define _MM_SWAP_H
struct mempolicy;
extern int page_cluster;
#ifdef CONFIG_SWAP
#include <linux/swapops.h> /* for swp_offset */

View File

@ -12,6 +12,7 @@
#include <linux/security.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/sysctl.h>
#include <linux/mman.h>
#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
@ -747,14 +748,16 @@ int folio_mc_copy(struct folio *dst, struct folio *src)
EXPORT_SYMBOL(folio_mc_copy);
int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
int sysctl_overcommit_ratio __read_mostly = 50;
unsigned long sysctl_overcommit_kbytes __read_mostly;
static int sysctl_overcommit_ratio __read_mostly = 50;
static unsigned long sysctl_overcommit_kbytes __read_mostly;
int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
int overcommit_ratio_handler(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
#ifdef CONFIG_SYSCTL
static int overcommit_ratio_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@ -769,8 +772,8 @@ static void sync_overcommit_as(struct work_struct *dummy)
percpu_counter_sync(&vm_committed_as);
}
int overcommit_policy_handler(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
static int overcommit_policy_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
int new_policy = -1;
@ -805,8 +808,8 @@ int overcommit_policy_handler(const struct ctl_table *table, int write, void *bu
return ret;
}
int overcommit_kbytes_handler(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
static int overcommit_kbytes_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@ -816,6 +819,54 @@ int overcommit_kbytes_handler(const struct ctl_table *table, int write, void *bu
return ret;
}
static const struct ctl_table util_sysctl_table[] = {
{
.procname = "overcommit_memory",
.data = &sysctl_overcommit_memory,
.maxlen = sizeof(sysctl_overcommit_memory),
.mode = 0644,
.proc_handler = overcommit_policy_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
{
.procname = "overcommit_ratio",
.data = &sysctl_overcommit_ratio,
.maxlen = sizeof(sysctl_overcommit_ratio),
.mode = 0644,
.proc_handler = overcommit_ratio_handler,
},
{
.procname = "overcommit_kbytes",
.data = &sysctl_overcommit_kbytes,
.maxlen = sizeof(sysctl_overcommit_kbytes),
.mode = 0644,
.proc_handler = overcommit_kbytes_handler,
},
{
.procname = "user_reserve_kbytes",
.data = &sysctl_user_reserve_kbytes,
.maxlen = sizeof(sysctl_user_reserve_kbytes),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "admin_reserve_kbytes",
.data = &sysctl_admin_reserve_kbytes,
.maxlen = sizeof(sysctl_admin_reserve_kbytes),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
};
static int __init init_vm_util_sysctls(void)
{
register_sysctl_init("vm", util_sysctl_table);
return 0;
}
subsys_initcall(init_vm_util_sysctls);
#endif /* CONFIG_SYSCTL */
/*
* Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
*/

View File

@ -7404,6 +7404,28 @@ void __meminit kswapd_stop(int nid)
pgdat_kswapd_unlock(pgdat);
}
static const struct ctl_table vmscan_sysctl_table[] = {
{
.procname = "swappiness",
.data = &vm_swappiness,
.maxlen = sizeof(vm_swappiness),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO_HUNDRED,
},
#ifdef CONFIG_NUMA
{
.procname = "zone_reclaim_mode",
.data = &node_reclaim_mode,
.maxlen = sizeof(node_reclaim_mode),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
}
#endif
};
static int __init kswapd_init(void)
{
int nid;
@ -7411,6 +7433,7 @@ static int __init kswapd_init(void)
swap_setup();
for_each_node_state(nid, N_MEMORY)
kswapd_run(nid);
register_sysctl_init("vm", vmscan_sysctl_table);
return 0;
}

View File

@ -31,8 +31,10 @@
#include "internal.h"
#ifdef CONFIG_PROC_FS
#ifdef CONFIG_NUMA
int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
#define ENABLE_NUMA_STAT 1
static int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
/* zero numa counters within a zone */
static void zero_zone_numa_counters(struct zone *zone)
@ -74,7 +76,7 @@ static void invalid_numa_statistics(void)
static DEFINE_MUTEX(vm_numa_stat_lock);
int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write,
static int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int ret, oldval;
@ -102,6 +104,7 @@ out:
return ret;
}
#endif
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_VM_EVENT_COUNTERS
DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
@ -1940,7 +1943,7 @@ static const struct seq_operations vmstat_op = {
#ifdef CONFIG_SMP
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ;
static int sysctl_stat_interval __read_mostly = HZ;
static int vmstat_late_init_done;
#ifdef CONFIG_PROC_FS
@ -1949,7 +1952,7 @@ static void refresh_vm_stats(struct work_struct *work)
refresh_cpu_vm_stats(true);
}
int vmstat_refresh(const struct ctl_table *table, int write,
static int vmstat_refresh(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
long val;
@ -2198,6 +2201,38 @@ static int __init vmstat_late_init(void)
late_initcall(vmstat_late_init);
#endif
#ifdef CONFIG_PROC_FS
static const struct ctl_table vmstat_table[] = {
#ifdef CONFIG_SMP
{
.procname = "stat_interval",
.data = &sysctl_stat_interval,
.maxlen = sizeof(sysctl_stat_interval),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "stat_refresh",
.data = NULL,
.maxlen = 0,
.mode = 0600,
.proc_handler = vmstat_refresh,
},
#endif
#ifdef CONFIG_NUMA
{
.procname = "numa_stat",
.data = &sysctl_vm_numa_stat,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = sysctl_vm_numa_stat_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#endif
};
#endif
struct workqueue_struct *mm_percpu_wq;
void __init init_mm_internals(void)
@ -2229,6 +2264,7 @@ void __init init_mm_internals(void)
proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
register_sysctl_init("vm", vmstat_table);
#endif
}

View File

@ -489,7 +489,7 @@ static unsigned long
rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
return number_cred_unused * sysctl_vfs_cache_pressure / 100;
return number_cred_unused;
}
static void

View File

@ -44,8 +44,19 @@ int mmap_min_addr_handler(const struct ctl_table *table, int write,
return ret;
}
static const struct ctl_table min_addr_sysctl_table[] = {
{
.procname = "mmap_min_addr",
.data = &dac_mmap_min_addr,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = mmap_min_addr_handler,
},
};
static int __init init_mmap_min_addr(void)
{
register_sysctl_init("vm", min_addr_sysctl_table);
update_mmap_min_addr();
return 0;

View File

@ -21,7 +21,7 @@ TEST_FILE=$(mktemp)
# ENABLED: 1 if enabled, 0 otherwise
# TARGET: test target file required on the test_sysctl module
# SKIP_NO_TARGET: 1 skip if TARGET not there
# 0 run eventhough TARGET not there
# 0 run even though TARGET not there
#
# Once these are enabled please leave them as-is. Write your own test,
# we have tons of space.
@ -764,7 +764,7 @@ sysctl_test_0007()
fi
if [ ! -f /proc/cmdline ]; then
echo -e "SKIPPING\nThere is no /proc/cmdline to check for paramter"
echo -e "SKIPPING\nThere is no /proc/cmdline to check for parameter"
return $ksft_skip
fi
@ -857,7 +857,7 @@ list_tests()
echo
echo "TEST_ID x NUM_TEST"
echo "TEST_ID: Test ID"
echo "NUM_TESTS: Number of recommended times to run the test"
echo "NUM_TESTS: Recommended number of times to run the test"
echo
echo "0001 x $(get_test_count 0001) - tests proc_dointvec_minmax()"
echo "0002 x $(get_test_count 0002) - tests proc_dostring()"
@ -884,7 +884,7 @@ usage()
echo "Valid tests: 0001-$MAX_TEST"
echo ""
echo " all Runs all tests (default)"
echo " -t Run test ID the number amount of times is recommended"
echo " -t Run test ID the recommended number of times"
echo " -w Watch test ID run until it runs into an error"
echo " -c Run test ID once"
echo " -s Run test ID x test-count number of times"
@ -898,7 +898,7 @@ usage()
echo Example uses:
echo
echo "$TEST_NAME.sh -- executes all tests"
echo "$TEST_NAME.sh -t 0002 -- Executes test ID 0002 number of times is recomended"
echo "$TEST_NAME.sh -t 0002 -- Executes test ID 0002 the recommended number of times"
echo "$TEST_NAME.sh -w 0002 -- Watch test ID 0002 run until an error occurs"
echo "$TEST_NAME.sh -s 0002 -- Run test ID 0002 once"
echo "$TEST_NAME.sh -c 0002 3 -- Run test ID 0002 three times"