vfs-6.15-rc1.misc

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZ90p4AAKCRCRxhvAZXjc
 ojMIAP9atkG3u7+490+NGWLdulQlaHnD51Owa9MiW87UfKpsTQEArwi/NrJqXJNT
 PFQ2xIa5TxG+9haChR89w3kjZ6b/hgs=
 =iDkx
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.15-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull misc vfs updates from Christian Brauner:
 "Features:

   - Add CONFIG_DEBUG_VFS infrastucture:
      - Catch invalid modes in open
      - Use the new debug macros in inode_set_cached_link()
      - Use debug-only asserts around fd allocation and install

   - Place f_ref to 3rd cache line in struct file to resolve false
     sharing

Cleanups:

   - Start using anon_inode_getfile_fmode() helper in various places

   - Don't take f_lock during SEEK_CUR if exclusion is guaranteed by
     f_pos_lock

   - Add unlikely() to kcmp()

   - Remove legacy ->remount_fs method from ecryptfs after port to the
     new mount api

   - Remove invalidate_inodes() in favour of evict_inodes()

   - Simplify ep_busy_loopER by removing unused argument

   - Avoid mmap sem relocks when coredumping with many missing pages

   - Inline getname()

   - Inline new_inode_pseudo() and de-staticize alloc_inode()

   - Dodge an atomic in putname if ref == 1

   - Consistently deref the files table with rcu_dereference_raw()

   - Dedup handling of struct filename init and refcounts bumps

   - Use wq_has_sleeper() in end_dir_add()

   - Drop the lock trip around I_NEW wake up in evict()

   - Load the ->i_sb pointer once in inode_sb_list_{add,del}

   - Predict not reaching the limit in alloc_empty_file()

   - Tidy up do_sys_openat2() with likely/unlikely

   - Call inode_sb_list_add() outside of inode hash lock

   - Sort out fd allocation vs dup2 race commentary

   - Turn page_offset() into a wrapper around folio_pos()

   - Remove locking in exportfs around ->get_parent() call

   - try_lookup_one_len() does not need any locks in autofs

   - Fix return type of several functions from long to int in open

   - Fix return type of several functions from long to int in ioctls

  Fixes:

   - Fix watch queue accounting mismatch"

* tag 'vfs-6.15-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (30 commits)
  fs: sort out fd allocation vs dup2 race commentary, take 2
  fs: call inode_sb_list_add() outside of inode hash lock
  fs: tidy up do_sys_openat2() with likely/unlikely
  fs: predict not reaching the limit in alloc_empty_file()
  fs: load the ->i_sb pointer once in inode_sb_list_{add,del}
  fs: drop the lock trip around I_NEW wake up in evict()
  fs: use wq_has_sleeper() in end_dir_add()
  VFS/autofs: try_lookup_one_len() does not need any locks
  fs: dedup handling of struct filename init and refcounts bumps
  fs: consistently deref the files table with rcu_dereference_raw()
  exportfs: remove locking around ->get_parent() call.
  fs: use debug-only asserts around fd allocation and install
  fs: dodge an atomic in putname if ref == 1
  vfs: Remove invalidate_inodes()
  ecryptfs: remove NULL remount_fs from super_operations
  watch_queue: fix pipe accounting mismatch
  fs: place f_ref to 3rd cache line in struct file to resolve false sharing
  epoll: simplify ep_busy_loop by removing always 0 argument
  fs: Turn page_offset() into a wrapper around folio_pos()
  kcmp: improve performance adding an unlikely hint to task comparisons
  ...
This commit is contained in:
Linus Torvalds 2025-03-24 09:13:50 -07:00
commit 99c21beaab
36 changed files with 339 additions and 258 deletions

View File

@ -1157,3 +1157,8 @@ in normal case it points into the pathname being looked up.
NOTE: if you need something like full path from the root of filesystem,
you are still on your own - this assists with simple cases, but it's not
magic.
---
** mandatory **
invalidate_inodes() is gone use evict_inodes() instead.

View File

@ -27,9 +27,10 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
int ret = 1;
unsigned long addr;
void *tags = NULL;
int locked = 0;
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
struct page *page = get_dump_page(addr);
struct page *page = get_dump_page(addr, &locked);
/*
* get_dump_page() returns NULL when encountering an empty

View File

@ -482,14 +482,13 @@ static long papr_vpd_create_handle(struct papr_location_code __user *ulc)
goto free_blob;
}
file = anon_inode_getfile("[papr-vpd]", &papr_vpd_handle_ops,
(void *)blob, O_RDONLY);
file = anon_inode_getfile_fmode("[papr-vpd]", &papr_vpd_handle_ops,
(void *)blob, O_RDONLY,
FMODE_LSEEK | FMODE_PREAD);
if (IS_ERR(file)) {
err = PTR_ERR(file);
goto put_fd;
}
file->f_mode |= FMODE_LSEEK | FMODE_PREAD;
fd_install(fd, file);
return fd;
put_fd:

View File

@ -266,24 +266,12 @@ static struct file *vfio_device_open_file(struct vfio_device *device)
if (ret)
goto err_free;
/*
* We can't use anon_inode_getfd() because we need to modify
* the f_mode flags directly to allow more than just ioctls
*/
filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
df, O_RDWR);
filep = anon_inode_getfile_fmode("[vfio-device]", &vfio_device_fops,
df, O_RDWR, FMODE_PREAD | FMODE_PWRITE);
if (IS_ERR(filep)) {
ret = PTR_ERR(filep);
goto err_close_device;
}
/*
* TODO: add an anon_inode interface to do this.
* Appears to be missing by lack of need rather than
* explicitly prevented. Now there's need.
*/
filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
/*
* Use the pseudo fs inode on the device to link all mmaps
* to the same address space, allowing us to unmap all vmas

View File

@ -442,7 +442,6 @@ static int autofs_dev_ioctl_timeout(struct file *fp,
sbi->exp_timeout = timeout * HZ;
} else {
struct dentry *base = fp->f_path.dentry;
struct inode *inode = base->d_inode;
int path_len = param->size - AUTOFS_DEV_IOCTL_SIZE - 1;
struct dentry *dentry;
struct autofs_info *ino;
@ -460,9 +459,7 @@ static int autofs_dev_ioctl_timeout(struct file *fp,
"the parent autofs mount timeout which could "
"prevent shutdown\n");
inode_lock_shared(inode);
dentry = try_lookup_one_len(param->path, base, path_len);
inode_unlock_shared(inode);
if (IS_ERR_OR_NULL(dentry))
return dentry ? PTR_ERR(dentry) : -ENOENT;
ino = autofs_dentry_ino(dentry);

View File

@ -317,8 +317,9 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req,
goto err_free_id;
}
anon_file->file = anon_inode_getfile("[cachefiles]",
&cachefiles_ondemand_fd_fops, object, O_WRONLY);
anon_file->file = anon_inode_getfile_fmode("[cachefiles]",
&cachefiles_ondemand_fd_fops, object,
O_WRONLY, FMODE_PWRITE | FMODE_LSEEK);
if (IS_ERR(anon_file->file)) {
ret = PTR_ERR(anon_file->file);
goto err_put_fd;
@ -333,8 +334,6 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req,
goto err_put_file;
}
anon_file->file->f_mode |= FMODE_PWRITE | FMODE_LSEEK;
load = (void *)req->msg.data;
load->fd = anon_file->fd;
object->ondemand->ondemand_id = object_id;

View File

@ -926,14 +926,23 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
{
unsigned long addr;
struct page *dump_page;
int locked, ret;
dump_page = dump_page_alloc();
if (!dump_page)
return 0;
ret = 0;
locked = 0;
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
struct page *page;
if (!locked) {
if (mmap_read_lock_killable(current->mm))
goto out;
locked = 1;
}
/*
* To avoid having to allocate page tables for virtual address
* ranges that have never been used yet, and also to make it
@ -941,21 +950,38 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
* NULL when encountering an empty page table entry that would
* otherwise have been filled with the zero page.
*/
page = get_dump_page(addr);
page = get_dump_page(addr, &locked);
if (page) {
if (locked) {
mmap_read_unlock(current->mm);
locked = 0;
}
int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page));
put_page(page);
if (stop) {
dump_page_free(dump_page);
return 0;
}
if (stop)
goto out;
} else {
dump_skip(cprm, PAGE_SIZE);
}
if (dump_interrupted())
goto out;
if (!need_resched())
continue;
if (locked) {
mmap_read_unlock(current->mm);
locked = 0;
}
cond_resched();
}
ret = 1;
out:
if (locked)
mmap_read_unlock(current->mm);
dump_page_free(dump_page);
return 1;
return ret;
}
#endif

View File

@ -2480,7 +2480,8 @@ static inline void end_dir_add(struct inode *dir, unsigned int n,
{
smp_store_release(&dir->i_dir_seq, n + 2);
preempt_enable_nested();
wake_up_all(d_wait);
if (wq_has_sleeper(d_wait))
wake_up_all(d_wait);
}
static void d_wait_lookup(struct dentry *dentry)

View File

@ -172,7 +172,6 @@ const struct super_operations ecryptfs_sops = {
.destroy_inode = ecryptfs_destroy_inode,
.free_inode = ecryptfs_free_inode,
.statfs = ecryptfs_statfs,
.remount_fs = NULL,
.evict_inode = ecryptfs_evict_inode,
.show_options = ecryptfs_show_options
};

View File

@ -406,14 +406,13 @@ static int do_eventfd(unsigned int count, int flags)
if (fd < 0)
goto err;
file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
file = anon_inode_getfile_fmode("[eventfd]", &eventfd_fops,
ctx, flags, FMODE_NOWAIT);
if (IS_ERR(file)) {
put_unused_fd(fd);
fd = PTR_ERR(file);
goto err;
}
file->f_mode |= FMODE_NOWAIT;
fd_install(fd, file);
return fd;
err:

View File

@ -438,7 +438,7 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time)
*
* we must do our busy polling with irqs enabled
*/
static bool ep_busy_loop(struct eventpoll *ep, int nonblock)
static bool ep_busy_loop(struct eventpoll *ep)
{
unsigned int napi_id = READ_ONCE(ep->napi_id);
u16 budget = READ_ONCE(ep->busy_poll_budget);
@ -448,7 +448,7 @@ static bool ep_busy_loop(struct eventpoll *ep, int nonblock)
budget = BUSY_POLL_BUDGET;
if (napi_id >= MIN_NAPI_ID && ep_busy_loop_on(ep)) {
napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end,
napi_busy_loop(napi_id, ep_busy_loop_end,
ep, prefer_busy_poll, budget);
if (ep_events_available(ep))
return true;
@ -560,7 +560,7 @@ static void ep_resume_napi_irqs(struct eventpoll *ep)
#else
static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
static inline bool ep_busy_loop(struct eventpoll *ep)
{
return false;
}
@ -2047,7 +2047,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
if (timed_out)
return 0;
eavail = ep_busy_loop(ep, timed_out);
eavail = ep_busy_loop(ep);
if (eavail)
continue;

View File

@ -126,10 +126,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
int err;
parent = ERR_PTR(-EACCES);
inode_lock(dentry->d_inode);
if (mnt->mnt_sb->s_export_op->get_parent)
parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
inode_unlock(dentry->d_inode);
if (IS_ERR(parent)) {
dprintk("get_parent of %lu failed, err %ld\n",

View File

@ -418,17 +418,25 @@ struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_ho
old_fds = old_fdt->fd;
new_fds = new_fdt->fd;
/*
* We may be racing against fd allocation from other threads using this
* files_struct, despite holding ->file_lock.
*
* alloc_fd() might have already claimed a slot, while fd_install()
* did not populate it yet. Note the latter operates locklessly, so
* the file can show up as we are walking the array below.
*
* At the same time we know no files will disappear as all other
* operations take the lock.
*
* Instead of trying to placate userspace racing with itself, we
* ref the file if we see it and mark the fd slot as unused otherwise.
*/
for (i = open_files; i != 0; i--) {
struct file *f = *old_fds++;
struct file *f = rcu_dereference_raw(*old_fds++);
if (f) {
get_file(f);
} else {
/*
* The fd may be claimed in the fd bitmap but not yet
* instantiated in the files array if a sibling thread
* is partway through open(). So make sure that this
* fd is available to the new process.
*/
__clear_open_fd(open_files - i, new_fdt);
}
rcu_assign_pointer(*new_fds++, f);
@ -577,6 +585,7 @@ repeat:
__set_open_fd(fd, fdt, flags & O_CLOEXEC);
error = fd;
VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL);
out:
spin_unlock(&files->file_lock);
@ -612,22 +621,14 @@ void put_unused_fd(unsigned int fd)
EXPORT_SYMBOL(put_unused_fd);
/*
* Install a file pointer in the fd array.
*
* The VFS is full of places where we drop the files lock between
* setting the open_fds bitmap and installing the file in the file
* array. At any such point, we are vulnerable to a dup2() race
* installing a file in the array before us. We need to detect this and
* fput() the struct file we are about to overwrite in this case.
*
* It should never happen - if we allow dup2() do it, _really_ bad things
* will follow.
/**
* fd_install - install a file pointer in the fd array
* @fd: file descriptor to install the file in
* @file: the file to install
*
* This consumes the "file" refcount, so callers should treat it
* as if they had called fput(file).
*/
void fd_install(unsigned int fd, struct file *file)
{
struct files_struct *files = current->files;
@ -642,7 +643,7 @@ void fd_install(unsigned int fd, struct file *file)
rcu_read_unlock_sched();
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
WARN_ON(fdt->fd[fd] != NULL);
VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL);
rcu_assign_pointer(fdt->fd[fd], file);
spin_unlock(&files->file_lock);
return;
@ -650,7 +651,7 @@ void fd_install(unsigned int fd, struct file *file)
/* coupled with smp_wmb() in expand_fdtable() */
smp_rmb();
fdt = rcu_dereference_sched(files->fdt);
BUG_ON(fdt->fd[fd] != NULL);
VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL);
rcu_assign_pointer(fdt->fd[fd], file);
rcu_read_unlock_sched();
}
@ -679,7 +680,7 @@ struct file *file_close_fd_locked(struct files_struct *files, unsigned fd)
return NULL;
fd = array_index_nospec(fd, fdt->max_fds);
file = fdt->fd[fd];
file = rcu_dereference_raw(fdt->fd[fd]);
if (file) {
rcu_assign_pointer(fdt->fd[fd], NULL);
__put_unused_fd(files, fd);
@ -1182,6 +1183,16 @@ static inline bool file_needs_f_pos_lock(struct file *file)
(file_count(file) > 1 || file->f_op->iterate_shared);
}
bool file_seek_cur_needs_f_lock(struct file *file)
{
if (!(file->f_mode & FMODE_ATOMIC_POS) && !file->f_op->iterate_shared)
return false;
VFS_WARN_ON_ONCE((file_count(file) > 1) &&
!mutex_is_locked(&file->f_pos_lock));
return true;
}
struct fd fdget_pos(unsigned int fd)
{
struct fd f = fdget(fd);
@ -1230,14 +1241,34 @@ __releases(&files->file_lock)
struct fdtable *fdt;
/*
* We need to detect attempts to do dup2() over allocated but still
* not finished descriptor.
* dup2() is expected to close the file installed in the target fd slot
* (if any). However, userspace hand-picking a fd may be racing against
* its own threads which happened to allocate it in open() et al but did
* not populate it yet.
*
* Broadly speaking we may be racing against the following:
* fd = get_unused_fd_flags(); // fd slot reserved, ->fd[fd] == NULL
* file = hard_work_goes_here();
* fd_install(fd, file); // only now ->fd[fd] == file
*
* It is an invariant that a successfully allocated fd has a NULL entry
* in the array until the matching fd_install().
*
* If we fit the window, we have the fd to populate, yet no target file
* to close. Trying to ignore it and install our new file would violate
* the invariant and make fd_install() overwrite our file.
*
* Things can be done(tm) to handle this. However, the issue does not
* concern legitimate programs and we only need to make sure the kernel
* does not trip over it.
*
* The simplest way out is to return an error if we find ourselves here.
*
* POSIX is silent on the issue, we return -EBUSY.
*/
fdt = files_fdtable(files);
fd = array_index_nospec(fd, fdt->max_fds);
tofree = fdt->fd[fd];
tofree = rcu_dereference_raw(fdt->fd[fd]);
if (!tofree && fd_is_open(fd, fdt))
goto Ebusy;
get_file(file);

View File

@ -221,7 +221,8 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
/*
* Privileged users can go above max_files
*/
if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
if (unlikely(get_nr_files() >= files_stat.max_files) &&
!capable(CAP_SYS_ADMIN)) {
/*
* percpu_counters are inaccurate. Do an expensive check before
* we go and fail.

View File

@ -327,7 +327,17 @@ static void i_callback(struct rcu_head *head)
free_inode_nonrcu(inode);
}
static struct inode *alloc_inode(struct super_block *sb)
/**
* alloc_inode - obtain an inode
* @sb: superblock
*
* Allocates a new inode for given superblock.
* Inode wont be chained in superblock s_inodes list
* This means :
* - fs can't be unmount
* - quotas, fsnotify, writeback can't work
*/
struct inode *alloc_inode(struct super_block *sb)
{
const struct super_operations *ops = sb->s_op;
struct inode *inode;
@ -613,18 +623,22 @@ static void inode_wait_for_lru_isolating(struct inode *inode)
*/
void inode_sb_list_add(struct inode *inode)
{
spin_lock(&inode->i_sb->s_inode_list_lock);
list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
spin_unlock(&inode->i_sb->s_inode_list_lock);
struct super_block *sb = inode->i_sb;
spin_lock(&sb->s_inode_list_lock);
list_add(&inode->i_sb_list, &sb->s_inodes);
spin_unlock(&sb->s_inode_list_lock);
}
EXPORT_SYMBOL_GPL(inode_sb_list_add);
static inline void inode_sb_list_del(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
if (!list_empty(&inode->i_sb_list)) {
spin_lock(&inode->i_sb->s_inode_list_lock);
spin_lock(&sb->s_inode_list_lock);
list_del_init(&inode->i_sb_list);
spin_unlock(&inode->i_sb->s_inode_list_lock);
spin_unlock(&sb->s_inode_list_lock);
}
}
@ -806,23 +820,16 @@ static void evict(struct inode *inode)
/*
* Wake up waiters in __wait_on_freeing_inode().
*
* Lockless hash lookup may end up finding the inode before we removed
* it above, but only lock it *after* we are done with the wakeup below.
* In this case the potential waiter cannot safely block.
* It is an invariant that any thread we need to wake up is already
* accounted for before remove_inode_hash() acquires ->i_lock -- both
* sides take the lock and sleep is aborted if the inode is found
* unhashed. Thus either the sleeper wins and goes off CPU, or removal
* wins and the sleeper aborts after testing with the lock.
*
* The inode being unhashed after the call to remove_inode_hash() is
* used as an indicator whether blocking on it is safe.
* This also means we don't need any fences for the call below.
*/
spin_lock(&inode->i_lock);
/*
* Pairs with the barrier in prepare_to_wait_event() to make sure
* ___wait_var_event() either sees the bit cleared or
* waitqueue_active() check in wake_up_var() sees the waiter.
*/
smp_mb__after_spinlock();
inode_wake_up_bit(inode, __I_NEW);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
spin_unlock(&inode->i_lock);
destroy_inode(inode);
}
@ -900,46 +907,6 @@ again:
}
EXPORT_SYMBOL_GPL(evict_inodes);
/**
* invalidate_inodes - attempt to free all inodes on a superblock
* @sb: superblock to operate on
*
* Attempts to free all inodes (including dirty inodes) for a given superblock.
*/
void invalidate_inodes(struct super_block *sb)
{
struct inode *inode, *next;
LIST_HEAD(dispose);
again:
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
spin_unlock(&inode->i_lock);
continue;
}
if (atomic_read(&inode->i_count)) {
spin_unlock(&inode->i_lock);
continue;
}
inode->i_state |= I_FREEING;
inode_lru_list_del(inode);
spin_unlock(&inode->i_lock);
list_add(&inode->i_lru, &dispose);
if (need_resched()) {
spin_unlock(&sb->s_inode_list_lock);
cond_resched();
dispose_list(&dispose);
goto again;
}
}
spin_unlock(&sb->s_inode_list_lock);
dispose_list(&dispose);
}
/*
* Isolate the inode from the LRU in preparation for freeing it.
*
@ -1159,21 +1126,6 @@ unsigned int get_next_ino(void)
}
EXPORT_SYMBOL(get_next_ino);
/**
* new_inode_pseudo - obtain an inode
* @sb: superblock
*
* Allocates a new inode for given superblock.
* Inode wont be chained in superblock s_inodes list
* This means :
* - fs can't be unmount
* - quotas, fsnotify, writeback can't work
*/
struct inode *new_inode_pseudo(struct super_block *sb)
{
return alloc_inode(sb);
}
/**
* new_inode - obtain an inode
* @sb: superblock
@ -1190,7 +1142,7 @@ struct inode *new_inode(struct super_block *sb)
{
struct inode *inode;
inode = new_inode_pseudo(sb);
inode = alloc_inode(sb);
if (inode)
inode_sb_list_add(inode);
return inode;
@ -1348,8 +1300,8 @@ again:
}
if (set && unlikely(set(inode, data))) {
inode = NULL;
goto unlock;
spin_unlock(&inode_hash_lock);
return NULL;
}
/*
@ -1361,14 +1313,14 @@ again:
hlist_add_head_rcu(&inode->i_hash, head);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock);
/*
* Add inode to the sb list if it's not already. It has I_NEW at this
* point, so it should be safe to test i_sb_list locklessly.
*/
if (list_empty(&inode->i_sb_list))
inode_sb_list_add(inode);
unlock:
spin_unlock(&inode_hash_lock);
return inode;
}
@ -1497,8 +1449,8 @@ again:
inode->i_state = I_NEW;
hlist_add_head_rcu(&inode->i_hash, head);
spin_unlock(&inode->i_lock);
inode_sb_list_add(inode);
spin_unlock(&inode_hash_lock);
inode_sb_list_add(inode);
/* Return the locked inode with I_NEW set, the
* caller is responsible for filling in the contents
@ -2953,3 +2905,18 @@ umode_t mode_strip_sgid(struct mnt_idmap *idmap,
return mode & ~S_ISGID;
}
EXPORT_SYMBOL(mode_strip_sgid);
#ifdef CONFIG_DEBUG_VFS
/*
* Dump an inode.
*
* TODO: add a proper inode dumping routine, this is a stub to get debug off the
* ground.
*/
void dump_inode(struct inode *inode, const char *reason)
{
pr_warn("%s encountered for inode %px", reason, inode);
}
EXPORT_SYMBOL(dump_inode);
#endif

View File

@ -187,8 +187,8 @@ extern struct open_how build_open_how(int flags, umode_t mode);
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);
long do_ftruncate(struct file *file, loff_t length, int small);
long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
int do_ftruncate(struct file *file, loff_t length, int small);
int do_sys_ftruncate(unsigned int fd, loff_t length, int small);
int chmod_common(const struct path *path, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
@ -207,7 +207,6 @@ bool in_group_or_capable(struct mnt_idmap *idmap,
* fs-writeback.c
*/
extern long get_nr_dirty_inodes(void);
void invalidate_inodes(struct super_block *sb);
/*
* dcache.c
@ -338,3 +337,4 @@ static inline bool path_mounted(const struct path *path)
return path->mnt->mnt_root == path->dentry;
}
void file_f_owner_release(struct file *file);
bool file_seek_cur_needs_f_lock(struct file *file);

View File

@ -41,7 +41,7 @@
*
* Returns 0 on success, -errno on error.
*/
long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
int vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
int error = -ENOTTY;
@ -228,8 +228,8 @@ static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap)
return error;
}
static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
u64 off, u64 olen, u64 destoff)
static int ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
u64 off, u64 olen, u64 destoff)
{
CLASS(fd, src_file)(srcfd);
loff_t cloned;
@ -248,8 +248,8 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
return ret;
}
static long ioctl_file_clone_range(struct file *file,
struct file_clone_range __user *argp)
static int ioctl_file_clone_range(struct file *file,
struct file_clone_range __user *argp)
{
struct file_clone_range args;

View File

@ -125,6 +125,13 @@
#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
static inline void initname(struct filename *name)
{
name->uptr = NULL;
name->aname = NULL;
atomic_set(&name->refcnt, 1);
}
struct filename *
getname_flags(const char __user *filename, int flags)
{
@ -203,10 +210,7 @@ getname_flags(const char __user *filename, int flags)
return ERR_PTR(-ENAMETOOLONG);
}
}
atomic_set(&result->refcnt, 1);
result->uptr = filename;
result->aname = NULL;
initname(result);
audit_getname(result);
return result;
}
@ -218,11 +222,6 @@ struct filename *getname_uflags(const char __user *filename, int uflags)
return getname_flags(filename, flags);
}
struct filename *getname(const char __user * filename)
{
return getname_flags(filename, 0);
}
struct filename *__getname_maybe_null(const char __user *pathname)
{
struct filename *name;
@ -269,25 +268,27 @@ struct filename *getname_kernel(const char * filename)
return ERR_PTR(-ENAMETOOLONG);
}
memcpy((char *)result->name, filename, len);
result->uptr = NULL;
result->aname = NULL;
atomic_set(&result->refcnt, 1);
initname(result);
audit_getname(result);
return result;
}
EXPORT_SYMBOL(getname_kernel);
void putname(struct filename *name)
{
int refcnt;
if (IS_ERR_OR_NULL(name))
return;
if (WARN_ON_ONCE(!atomic_read(&name->refcnt)))
return;
refcnt = atomic_read(&name->refcnt);
if (refcnt != 1) {
if (WARN_ON_ONCE(!refcnt))
return;
if (!atomic_dec_and_test(&name->refcnt))
return;
if (!atomic_dec_and_test(&name->refcnt))
return;
}
if (name->name != name->iname) {
__putname(name->name);
@ -2863,15 +2864,14 @@ static int lookup_one_common(struct mnt_idmap *idmap,
* Note that this routine is purely a helper for filesystem usage and should
* not be called by generic code.
*
* The caller must hold base->i_mutex.
* No locks need be held - only a counted reference to @base is needed.
*
*/
struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len)
{
struct qstr this;
int err;
WARN_ON_ONCE(!inode_is_locked(base->d_inode));
err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this);
if (err)
return ERR_PTR(err);
@ -3415,6 +3415,8 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path,
if ((acc_mode & MAY_EXEC) && path_noexec(path))
return -EACCES;
break;
default:
VFS_BUG_ON_INODE(1, inode);
}
error = inode_permission(idmap, inode, MAY_OPEN | acc_mode);

View File

@ -67,11 +67,11 @@ int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry,
return ret;
}
long vfs_truncate(const struct path *path, loff_t length)
int vfs_truncate(const struct path *path, loff_t length)
{
struct mnt_idmap *idmap;
struct inode *inode;
long error;
int error;
inode = path->dentry->d_inode;
@ -123,7 +123,7 @@ mnt_drop_write_and_out:
}
EXPORT_SYMBOL_GPL(vfs_truncate);
long do_sys_truncate(const char __user *pathname, loff_t length)
int do_sys_truncate(const char __user *pathname, loff_t length)
{
unsigned int lookup_flags = LOOKUP_FOLLOW;
struct path path;
@ -157,7 +157,7 @@ COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length
}
#endif
long do_ftruncate(struct file *file, loff_t length, int small)
int do_ftruncate(struct file *file, loff_t length, int small)
{
struct inode *inode;
struct dentry *dentry;
@ -196,7 +196,7 @@ long do_ftruncate(struct file *file, loff_t length, int small)
return error;
}
long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
int do_sys_ftruncate(unsigned int fd, loff_t length, int small)
{
if (length < 0)
return -EINVAL;
@ -251,7 +251,7 @@ COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd,
int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
long ret;
int ret;
loff_t sum;
if (offset < 0 || len <= 0)
@ -460,7 +460,7 @@ static const struct cred *access_override_creds(void)
return override_creds(override_cred);
}
static long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
static int do_faccessat(int dfd, const char __user *filename, int mode, int flags)
{
struct path path;
struct inode *inode;
@ -1409,22 +1409,23 @@ struct file *file_open_root(const struct path *root,
}
EXPORT_SYMBOL(file_open_root);
static long do_sys_openat2(int dfd, const char __user *filename,
struct open_how *how)
static int do_sys_openat2(int dfd, const char __user *filename,
struct open_how *how)
{
struct open_flags op;
int fd = build_open_flags(how, &op);
struct filename *tmp;
int err, fd;
if (fd)
return fd;
err = build_open_flags(how, &op);
if (unlikely(err))
return err;
tmp = getname(filename);
if (IS_ERR(tmp))
return PTR_ERR(tmp);
fd = get_unused_fd_flags(how->flags);
if (fd >= 0) {
if (likely(fd >= 0)) {
struct file *f = do_filp_open(dfd, tmp, &op);
if (IS_ERR(f)) {
put_unused_fd(fd);
@ -1437,7 +1438,7 @@ static long do_sys_openat2(int dfd, const char __user *filename,
return fd;
}
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
int do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
struct open_how how = build_open_how(flags, mode);
return do_sys_openat2(dfd, filename, &how);

View File

@ -169,11 +169,16 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
if (whence == SEEK_CUR) {
/*
* f_lock protects against read/modify/write race with
* other SEEK_CURs. Note that parallel writes and reads
* behave like SEEK_SET.
* If the file requires locking via f_pos_lock we know
* that mutual exclusion for SEEK_CUR on the same file
* is guaranteed. If the file isn't locked, we take
* f_lock to protect against f_pos races with other
* SEEK_CURs.
*/
guard(spinlock)(&file->f_lock);
if (file_seek_cur_needs_f_lock(file)) {
guard(spinlock)(&file->f_lock);
return vfs_setpos(file, file->f_pos + offset, maxsize);
}
return vfs_setpos(file, file->f_pos + offset, maxsize);
}

View File

@ -277,15 +277,14 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
return ufd;
}
file = anon_inode_getfile("[signalfd]", &signalfd_fops, ctx,
O_RDWR | (flags & O_NONBLOCK));
file = anon_inode_getfile_fmode("[signalfd]", &signalfd_fops,
ctx, O_RDWR | (flags & O_NONBLOCK),
FMODE_NOWAIT);
if (IS_ERR(file)) {
put_unused_fd(ufd);
kfree(ctx);
return PTR_ERR(file);
}
file->f_mode |= FMODE_NOWAIT;
fd_install(ufd, file);
} else {
CLASS(fd, f)(ufd);

View File

@ -388,7 +388,7 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
spin_unlock(&tcon->tc_lock);
/*
* BB Add call to invalidate_inodes(sb) for all superblocks mounted
* BB Add call to evict_inodes(sb) for all superblocks mounted
* to this tcon.
*/
}

View File

@ -1417,7 +1417,7 @@ static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
if (!surprise)
sync_filesystem(sb);
shrink_dcache_sb(sb);
invalidate_inodes(sb);
evict_inodes(sb);
if (sb->s_op->shutdown)
sb->s_op->shutdown(sb);

View File

@ -439,15 +439,15 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
return ufd;
}
file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx,
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
file = anon_inode_getfile_fmode("[timerfd]", &timerfd_fops, ctx,
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS),
FMODE_NOWAIT);
if (IS_ERR(file)) {
put_unused_fd(ufd);
kfree(ctx);
return PTR_ERR(file);
}
file->f_mode |= FMODE_NOWAIT;
fd_install(ufd, file);
return ufd;
}

View File

@ -2,6 +2,7 @@
#ifndef _LINUX_FS_H
#define _LINUX_FS_H
#include <linux/vfsdebug.h>
#include <linux/linkage.h>
#include <linux/wait_bit.h>
#include <linux/kdev_t.h>
@ -790,19 +791,8 @@ struct inode {
static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
{
int testlen;
/*
* TODO: patch it into a debug-only check if relevant macros show up.
* In the meantime, since we are suffering strlen even on production kernels
* to find the right length, do a fixup if the wrong value got passed.
*/
testlen = strlen(link);
if (testlen != linklen) {
WARN_ONCE(1, "bad length passed for symlink [%s] (got %d, expected %d)",
link, linklen, testlen);
linklen = testlen;
}
VFS_WARN_ON_INODE(strlen(link) != linklen, inode);
VFS_WARN_ON_INODE(inode->i_opflags & IOP_CACHED_LINK, inode);
inode->i_link = link;
inode->i_linklen = linklen;
inode->i_opflags |= IOP_CACHED_LINK;
@ -1067,7 +1057,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
/**
* struct file - Represents a file
* @f_ref: reference count
* @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context.
* @f_mode: FMODE_* flags often used in hotpaths
* @f_op: file operations
@ -1077,12 +1066,12 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
* @f_flags: file flags
* @f_iocb_flags: iocb flags
* @f_cred: stashed credentials of creator/opener
* @f_owner: file owner
* @f_path: path of the file
* @f_pos_lock: lock protecting file position
* @f_pipe: specific to pipes
* @f_pos: file position
* @f_security: LSM security context of this file
* @f_owner: file owner
* @f_wb_err: writeback error
* @f_sb_err: per sb writeback errors
* @f_ep: link of all epoll hooks for this file
@ -1090,9 +1079,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
* @f_llist: work queue entrypoint
* @f_ra: file's readahead state
* @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.)
* @f_ref: reference count
*/
struct file {
file_ref_t f_ref;
spinlock_t f_lock;
fmode_t f_mode;
const struct file_operations *f_op;
@ -1102,6 +1091,7 @@ struct file {
unsigned int f_flags;
unsigned int f_iocb_flags;
const struct cred *f_cred;
struct fown_struct *f_owner;
/* --- cacheline 1 boundary (64 bytes) --- */
struct path f_path;
union {
@ -1115,7 +1105,6 @@ struct file {
void *f_security;
#endif
/* --- cacheline 2 boundary (128 bytes) --- */
struct fown_struct *f_owner;
errseq_t f_wb_err;
errseq_t f_sb_err;
#ifdef CONFIG_EPOLL
@ -1127,6 +1116,7 @@ struct file {
struct file_ra_state f_ra;
freeptr_t f_freeptr;
};
file_ref_t f_ref;
/* --- cacheline 3 boundary (192 bytes) --- */
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
@ -2039,7 +2029,7 @@ int vfs_fchown(struct file *file, uid_t user, gid_t group);
int vfs_fchmod(struct file *file, umode_t mode);
int vfs_utimes(const struct path *path, struct timespec64 *times);
extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
@ -2791,13 +2781,13 @@ static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
return mnt_idmap(mnt) != &nop_mnt_idmap;
}
extern long vfs_truncate(const struct path *, loff_t);
int vfs_truncate(const struct path *, loff_t);
int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start,
unsigned int time_attrs, struct file *filp);
extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
loff_t len);
extern long do_sys_open(int dfd, const char __user *filename, int flags,
umode_t mode);
int do_sys_open(int dfd, const char __user *filename, int flags,
umode_t mode);
extern struct file *file_open_name(struct filename *, int, umode_t);
extern struct file *filp_open(const char *, int, umode_t);
extern struct file *file_open_root(const struct path *,
@ -2848,7 +2838,10 @@ extern int filp_close(struct file *, fl_owner_t id);
extern struct filename *getname_flags(const char __user *, int);
extern struct filename *getname_uflags(const char __user *, int);
extern struct filename *getname(const char __user *);
static inline struct filename *getname(const char __user *name)
{
return getname_flags(name, 0);
}
extern struct filename *getname_kernel(const char *);
extern struct filename *__getname_maybe_null(const char __user *);
static inline struct filename *getname_maybe_null(const char __user *name, int flags)
@ -2862,6 +2855,12 @@ static inline struct filename *getname_maybe_null(const char __user *name, int f
}
extern void putname(struct filename *name);
static inline struct filename *refname(struct filename *name)
{
atomic_inc(&name->refcnt);
return name;
}
extern int finish_open(struct file *file, struct dentry *dentry,
int (*open)(struct inode *, struct file *));
extern int finish_no_open(struct file *file, struct dentry *dentry);
@ -3294,7 +3293,11 @@ static inline void __iget(struct inode *inode)
extern void iget_failed(struct inode *);
extern void clear_inode(struct inode *);
extern void __destroy_inode(struct inode *);
extern struct inode *new_inode_pseudo(struct super_block *sb);
struct inode *alloc_inode(struct super_block *sb);
static inline struct inode *new_inode_pseudo(struct super_block *sb)
{
return alloc_inode(sb);
}
extern struct inode *new_inode(struct super_block *sb);
extern void free_inode_nonrcu(struct inode *inode);
extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);

View File

@ -2555,7 +2555,7 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
struct task_struct *task, bool bypass_rlim);
struct kvec;
struct page *get_dump_page(unsigned long addr);
struct page *get_dump_page(unsigned long addr, int *locked);
bool folio_mark_dirty(struct folio *folio);
bool folio_mark_dirty_lock(struct folio *folio);

View File

@ -1044,21 +1044,23 @@ static inline pgoff_t page_pgoff(const struct folio *folio,
return folio->index + folio_page_idx(folio, page);
}
/**
* folio_pos - Returns the byte position of this folio in its file.
* @folio: The folio.
*/
static inline loff_t folio_pos(const struct folio *folio)
{
return ((loff_t)folio->index) * PAGE_SIZE;
}
/*
* Return byte-offset into filesystem object for page.
*/
static inline loff_t page_offset(struct page *page)
{
return ((loff_t)page->index) << PAGE_SHIFT;
}
struct folio *folio = page_folio(page);
/**
* folio_pos - Returns the byte position of this folio in its file.
* @folio: The folio.
*/
static inline loff_t folio_pos(struct folio *folio)
{
return page_offset(&folio->page);
return folio_pos(folio) + folio_page_idx(folio, page) * PAGE_SIZE;
}
/*

View File

@ -1266,14 +1266,14 @@ static inline long ksys_lchown(const char __user *filename, uid_t user,
AT_SYMLINK_NOFOLLOW);
}
extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
int do_sys_ftruncate(unsigned int fd, loff_t length, int small);
static inline long ksys_ftruncate(unsigned int fd, loff_t length)
{
return do_sys_ftruncate(fd, length, 1);
}
extern long do_sys_truncate(const char __user *pathname, loff_t length);
int do_sys_truncate(const char __user *pathname, loff_t length);
static inline long ksys_truncate(const char __user *pathname, loff_t length)
{

45
include/linux/vfsdebug.h Normal file
View File

@ -0,0 +1,45 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef LINUX_VFS_DEBUG_H
#define LINUX_VFS_DEBUG_H 1
#include <linux/bug.h>
struct inode;
#ifdef CONFIG_DEBUG_VFS
void dump_inode(struct inode *inode, const char *reason);
#define VFS_BUG_ON(cond) BUG_ON(cond)
#define VFS_WARN_ON(cond) (void)WARN_ON(cond)
#define VFS_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
#define VFS_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
#define VFS_WARN(cond, format...) (void)WARN(cond, format)
#define VFS_BUG_ON_INODE(cond, inode) ({ \
if (unlikely(!!(cond))) { \
dump_inode(inode, "VFS_BUG_ON_INODE(" #cond")");\
BUG_ON(1); \
} \
})
#define VFS_WARN_ON_INODE(cond, inode) ({ \
int __ret_warn = !!(cond); \
\
if (unlikely(__ret_warn)) { \
dump_inode(inode, "VFS_WARN_ON_INODE(" #cond")");\
WARN_ON(1); \
} \
unlikely(__ret_warn); \
})
#else
#define VFS_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VFS_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VFS_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
#define VFS_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
#define VFS_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
#define VFS_BUG_ON_INODE(cond, inode) VFS_BUG_ON(cond)
#define VFS_WARN_ON_INODE(cond, inode) BUILD_BUG_ON_INVALID(cond)
#endif /* CONFIG_DEBUG_VFS */
#endif

View File

@ -2207,10 +2207,8 @@ __audit_reusename(const __user char *uptr)
list_for_each_entry(n, &context->names_list, list) {
if (!n->name)
continue;
if (n->name->uptr == uptr) {
atomic_inc(&n->name->refcnt);
return n->name;
}
if (n->name->uptr == uptr)
return refname(n->name);
}
return NULL;
}
@ -2237,7 +2235,7 @@ void __audit_getname(struct filename *name)
n->name = name;
n->name_len = AUDIT_NAME_FULL;
name->aname = n;
atomic_inc(&name->refcnt);
refname(name);
}
static inline int audit_copy_fcaps(struct audit_names *name,
@ -2369,7 +2367,7 @@ out_alloc:
return;
if (name) {
n->name = name;
atomic_inc(&name->refcnt);
refname(name);
}
out:
@ -2496,7 +2494,7 @@ void __audit_inode_child(struct inode *parent,
if (found_parent) {
found_child->name = found_parent->name;
found_child->name_len = AUDIT_NAME_FULL;
atomic_inc(&found_child->name->refcnt);
refname(found_child->name);
}
}

View File

@ -145,7 +145,7 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
*/
task1 = find_task_by_vpid(pid1);
task2 = find_task_by_vpid(pid2);
if (!task1 || !task2)
if (unlikely(!task1 || !task2))
goto err_no_task;
get_task_struct(task1);

View File

@ -269,6 +269,15 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
if (ret < 0)
goto error;
/*
* pipe_resize_ring() does not update nr_accounted for watch_queue
* pipes, because the above vastly overprovisions. Set nr_accounted on
* and max_usage this pipe to the number that was actually charged to
* the user above via account_pipe_buffers.
*/
pipe->max_usage = nr_pages;
pipe->nr_accounted = nr_pages;
ret = -ENOMEM;
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
if (!pages)

View File

@ -808,6 +808,15 @@ config ARCH_HAS_DEBUG_VM_PGTABLE
An architecture should select this when it can successfully
build and run DEBUG_VM_PGTABLE.
config DEBUG_VFS
bool "Debug VFS"
depends on DEBUG_KERNEL
help
Enable this to turn on extended checks in the VFS layer that may impact
performance.
If unsure, say N.
config DEBUG_VM_IRQSOFF
def_bool DEBUG_VM && !PREEMPT_RT

View File

@ -2254,6 +2254,7 @@ EXPORT_SYMBOL(fault_in_readable);
/**
* get_dump_page() - pin user page in memory while writing it to core dump
* @addr: user address
* @locked: a pointer to an int denoting whether the mmap sem is held
*
* Returns struct page pointer of user page pinned for dump,
* to be freed afterwards by put_page().
@ -2266,13 +2267,12 @@ EXPORT_SYMBOL(fault_in_readable);
* Called without mmap_lock (takes and releases the mmap_lock by itself).
*/
#ifdef CONFIG_ELF_CORE
struct page *get_dump_page(unsigned long addr)
struct page *get_dump_page(unsigned long addr, int *locked)
{
struct page *page;
int locked = 0;
int ret;
ret = __get_user_pages_locked(current->mm, addr, 1, &page, &locked,
ret = __get_user_pages_locked(current->mm, addr, 1, &page, locked,
FOLL_FORCE | FOLL_DUMP | FOLL_GET);
return (ret == 1) ? page : NULL;
}

View File

@ -1216,7 +1216,7 @@ static void hook_inode_free_security_rcu(void *inode_security)
/*
* Release the inodes used in a security policy.
*
* Cf. fsnotify_unmount_inodes() and invalidate_inodes()
* Cf. fsnotify_unmount_inodes() and evict_inodes()
*/
static void hook_sb_delete(struct super_block *const sb)
{

View File

@ -4224,15 +4224,14 @@ static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
if (fd < 0)
return fd;
file = anon_inode_getfile(name, &kvm_vcpu_stats_fops, vcpu, O_RDONLY);
file = anon_inode_getfile_fmode(name, &kvm_vcpu_stats_fops, vcpu,
O_RDONLY, FMODE_PREAD);
if (IS_ERR(file)) {
put_unused_fd(fd);
return PTR_ERR(file);
}
kvm_get_kvm(vcpu->kvm);
file->f_mode |= FMODE_PREAD;
fd_install(fd, file);
return fd;
@ -5020,16 +5019,14 @@ static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm)
if (fd < 0)
return fd;
file = anon_inode_getfile("kvm-vm-stats",
&kvm_vm_stats_fops, kvm, O_RDONLY);
file = anon_inode_getfile_fmode("kvm-vm-stats",
&kvm_vm_stats_fops, kvm, O_RDONLY, FMODE_PREAD);
if (IS_ERR(file)) {
put_unused_fd(fd);
return PTR_ERR(file);
}
kvm_get_kvm(kvm);
file->f_mode |= FMODE_PREAD;
fd_install(fd, file);
return fd;