mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/
synced 2025-04-19 20:58:31 +09:00
vfs-6.15-rc1.misc
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZ90p4AAKCRCRxhvAZXjc ojMIAP9atkG3u7+490+NGWLdulQlaHnD51Owa9MiW87UfKpsTQEArwi/NrJqXJNT PFQ2xIa5TxG+9haChR89w3kjZ6b/hgs= =iDkx -----END PGP SIGNATURE----- Merge tag 'vfs-6.15-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs Pull misc vfs updates from Christian Brauner: "Features: - Add CONFIG_DEBUG_VFS infrastucture: - Catch invalid modes in open - Use the new debug macros in inode_set_cached_link() - Use debug-only asserts around fd allocation and install - Place f_ref to 3rd cache line in struct file to resolve false sharing Cleanups: - Start using anon_inode_getfile_fmode() helper in various places - Don't take f_lock during SEEK_CUR if exclusion is guaranteed by f_pos_lock - Add unlikely() to kcmp() - Remove legacy ->remount_fs method from ecryptfs after port to the new mount api - Remove invalidate_inodes() in favour of evict_inodes() - Simplify ep_busy_loopER by removing unused argument - Avoid mmap sem relocks when coredumping with many missing pages - Inline getname() - Inline new_inode_pseudo() and de-staticize alloc_inode() - Dodge an atomic in putname if ref == 1 - Consistently deref the files table with rcu_dereference_raw() - Dedup handling of struct filename init and refcounts bumps - Use wq_has_sleeper() in end_dir_add() - Drop the lock trip around I_NEW wake up in evict() - Load the ->i_sb pointer once in inode_sb_list_{add,del} - Predict not reaching the limit in alloc_empty_file() - Tidy up do_sys_openat2() with likely/unlikely - Call inode_sb_list_add() outside of inode hash lock - Sort out fd allocation vs dup2 race commentary - Turn page_offset() into a wrapper around folio_pos() - Remove locking in exportfs around ->get_parent() call - try_lookup_one_len() does not need any locks in autofs - Fix return type of several functions from long to int in open - Fix return type of several functions from long to int in ioctls Fixes: - Fix watch queue accounting mismatch" * tag 'vfs-6.15-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (30 commits) fs: sort out fd allocation vs dup2 race commentary, take 2 fs: call inode_sb_list_add() outside of inode hash lock fs: tidy up do_sys_openat2() with likely/unlikely fs: predict not reaching the limit in alloc_empty_file() fs: load the ->i_sb pointer once in inode_sb_list_{add,del} fs: drop the lock trip around I_NEW wake up in evict() fs: use wq_has_sleeper() in end_dir_add() VFS/autofs: try_lookup_one_len() does not need any locks fs: dedup handling of struct filename init and refcounts bumps fs: consistently deref the files table with rcu_dereference_raw() exportfs: remove locking around ->get_parent() call. fs: use debug-only asserts around fd allocation and install fs: dodge an atomic in putname if ref == 1 vfs: Remove invalidate_inodes() ecryptfs: remove NULL remount_fs from super_operations watch_queue: fix pipe accounting mismatch fs: place f_ref to 3rd cache line in struct file to resolve false sharing epoll: simplify ep_busy_loop by removing always 0 argument fs: Turn page_offset() into a wrapper around folio_pos() kcmp: improve performance adding an unlikely hint to task comparisons ...
This commit is contained in:
commit
99c21beaab
@ -1157,3 +1157,8 @@ in normal case it points into the pathname being looked up.
|
||||
NOTE: if you need something like full path from the root of filesystem,
|
||||
you are still on your own - this assists with simple cases, but it's not
|
||||
magic.
|
||||
|
||||
---
|
||||
|
||||
** mandatory **
|
||||
invalidate_inodes() is gone use evict_inodes() instead.
|
||||
|
@ -27,9 +27,10 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
|
||||
int ret = 1;
|
||||
unsigned long addr;
|
||||
void *tags = NULL;
|
||||
int locked = 0;
|
||||
|
||||
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
|
||||
struct page *page = get_dump_page(addr);
|
||||
struct page *page = get_dump_page(addr, &locked);
|
||||
|
||||
/*
|
||||
* get_dump_page() returns NULL when encountering an empty
|
||||
|
@ -482,14 +482,13 @@ static long papr_vpd_create_handle(struct papr_location_code __user *ulc)
|
||||
goto free_blob;
|
||||
}
|
||||
|
||||
file = anon_inode_getfile("[papr-vpd]", &papr_vpd_handle_ops,
|
||||
(void *)blob, O_RDONLY);
|
||||
file = anon_inode_getfile_fmode("[papr-vpd]", &papr_vpd_handle_ops,
|
||||
(void *)blob, O_RDONLY,
|
||||
FMODE_LSEEK | FMODE_PREAD);
|
||||
if (IS_ERR(file)) {
|
||||
err = PTR_ERR(file);
|
||||
goto put_fd;
|
||||
}
|
||||
|
||||
file->f_mode |= FMODE_LSEEK | FMODE_PREAD;
|
||||
fd_install(fd, file);
|
||||
return fd;
|
||||
put_fd:
|
||||
|
@ -266,24 +266,12 @@ static struct file *vfio_device_open_file(struct vfio_device *device)
|
||||
if (ret)
|
||||
goto err_free;
|
||||
|
||||
/*
|
||||
* We can't use anon_inode_getfd() because we need to modify
|
||||
* the f_mode flags directly to allow more than just ioctls
|
||||
*/
|
||||
filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
|
||||
df, O_RDWR);
|
||||
filep = anon_inode_getfile_fmode("[vfio-device]", &vfio_device_fops,
|
||||
df, O_RDWR, FMODE_PREAD | FMODE_PWRITE);
|
||||
if (IS_ERR(filep)) {
|
||||
ret = PTR_ERR(filep);
|
||||
goto err_close_device;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: add an anon_inode interface to do this.
|
||||
* Appears to be missing by lack of need rather than
|
||||
* explicitly prevented. Now there's need.
|
||||
*/
|
||||
filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
|
||||
|
||||
/*
|
||||
* Use the pseudo fs inode on the device to link all mmaps
|
||||
* to the same address space, allowing us to unmap all vmas
|
||||
|
@ -442,7 +442,6 @@ static int autofs_dev_ioctl_timeout(struct file *fp,
|
||||
sbi->exp_timeout = timeout * HZ;
|
||||
} else {
|
||||
struct dentry *base = fp->f_path.dentry;
|
||||
struct inode *inode = base->d_inode;
|
||||
int path_len = param->size - AUTOFS_DEV_IOCTL_SIZE - 1;
|
||||
struct dentry *dentry;
|
||||
struct autofs_info *ino;
|
||||
@ -460,9 +459,7 @@ static int autofs_dev_ioctl_timeout(struct file *fp,
|
||||
"the parent autofs mount timeout which could "
|
||||
"prevent shutdown\n");
|
||||
|
||||
inode_lock_shared(inode);
|
||||
dentry = try_lookup_one_len(param->path, base, path_len);
|
||||
inode_unlock_shared(inode);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
return dentry ? PTR_ERR(dentry) : -ENOENT;
|
||||
ino = autofs_dentry_ino(dentry);
|
||||
|
@ -317,8 +317,9 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req,
|
||||
goto err_free_id;
|
||||
}
|
||||
|
||||
anon_file->file = anon_inode_getfile("[cachefiles]",
|
||||
&cachefiles_ondemand_fd_fops, object, O_WRONLY);
|
||||
anon_file->file = anon_inode_getfile_fmode("[cachefiles]",
|
||||
&cachefiles_ondemand_fd_fops, object,
|
||||
O_WRONLY, FMODE_PWRITE | FMODE_LSEEK);
|
||||
if (IS_ERR(anon_file->file)) {
|
||||
ret = PTR_ERR(anon_file->file);
|
||||
goto err_put_fd;
|
||||
@ -333,8 +334,6 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req,
|
||||
goto err_put_file;
|
||||
}
|
||||
|
||||
anon_file->file->f_mode |= FMODE_PWRITE | FMODE_LSEEK;
|
||||
|
||||
load = (void *)req->msg.data;
|
||||
load->fd = anon_file->fd;
|
||||
object->ondemand->ondemand_id = object_id;
|
||||
|
@ -926,14 +926,23 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
|
||||
{
|
||||
unsigned long addr;
|
||||
struct page *dump_page;
|
||||
int locked, ret;
|
||||
|
||||
dump_page = dump_page_alloc();
|
||||
if (!dump_page)
|
||||
return 0;
|
||||
|
||||
ret = 0;
|
||||
locked = 0;
|
||||
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
|
||||
struct page *page;
|
||||
|
||||
if (!locked) {
|
||||
if (mmap_read_lock_killable(current->mm))
|
||||
goto out;
|
||||
locked = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* To avoid having to allocate page tables for virtual address
|
||||
* ranges that have never been used yet, and also to make it
|
||||
@ -941,21 +950,38 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
|
||||
* NULL when encountering an empty page table entry that would
|
||||
* otherwise have been filled with the zero page.
|
||||
*/
|
||||
page = get_dump_page(addr);
|
||||
page = get_dump_page(addr, &locked);
|
||||
if (page) {
|
||||
if (locked) {
|
||||
mmap_read_unlock(current->mm);
|
||||
locked = 0;
|
||||
}
|
||||
int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page));
|
||||
put_page(page);
|
||||
if (stop) {
|
||||
dump_page_free(dump_page);
|
||||
return 0;
|
||||
}
|
||||
if (stop)
|
||||
goto out;
|
||||
} else {
|
||||
dump_skip(cprm, PAGE_SIZE);
|
||||
}
|
||||
|
||||
if (dump_interrupted())
|
||||
goto out;
|
||||
|
||||
if (!need_resched())
|
||||
continue;
|
||||
if (locked) {
|
||||
mmap_read_unlock(current->mm);
|
||||
locked = 0;
|
||||
}
|
||||
cond_resched();
|
||||
}
|
||||
ret = 1;
|
||||
out:
|
||||
if (locked)
|
||||
mmap_read_unlock(current->mm);
|
||||
|
||||
dump_page_free(dump_page);
|
||||
return 1;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -2480,7 +2480,8 @@ static inline void end_dir_add(struct inode *dir, unsigned int n,
|
||||
{
|
||||
smp_store_release(&dir->i_dir_seq, n + 2);
|
||||
preempt_enable_nested();
|
||||
wake_up_all(d_wait);
|
||||
if (wq_has_sleeper(d_wait))
|
||||
wake_up_all(d_wait);
|
||||
}
|
||||
|
||||
static void d_wait_lookup(struct dentry *dentry)
|
||||
|
@ -172,7 +172,6 @@ const struct super_operations ecryptfs_sops = {
|
||||
.destroy_inode = ecryptfs_destroy_inode,
|
||||
.free_inode = ecryptfs_free_inode,
|
||||
.statfs = ecryptfs_statfs,
|
||||
.remount_fs = NULL,
|
||||
.evict_inode = ecryptfs_evict_inode,
|
||||
.show_options = ecryptfs_show_options
|
||||
};
|
||||
|
@ -406,14 +406,13 @@ static int do_eventfd(unsigned int count, int flags)
|
||||
if (fd < 0)
|
||||
goto err;
|
||||
|
||||
file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
|
||||
file = anon_inode_getfile_fmode("[eventfd]", &eventfd_fops,
|
||||
ctx, flags, FMODE_NOWAIT);
|
||||
if (IS_ERR(file)) {
|
||||
put_unused_fd(fd);
|
||||
fd = PTR_ERR(file);
|
||||
goto err;
|
||||
}
|
||||
|
||||
file->f_mode |= FMODE_NOWAIT;
|
||||
fd_install(fd, file);
|
||||
return fd;
|
||||
err:
|
||||
|
@ -438,7 +438,7 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time)
|
||||
*
|
||||
* we must do our busy polling with irqs enabled
|
||||
*/
|
||||
static bool ep_busy_loop(struct eventpoll *ep, int nonblock)
|
||||
static bool ep_busy_loop(struct eventpoll *ep)
|
||||
{
|
||||
unsigned int napi_id = READ_ONCE(ep->napi_id);
|
||||
u16 budget = READ_ONCE(ep->busy_poll_budget);
|
||||
@ -448,7 +448,7 @@ static bool ep_busy_loop(struct eventpoll *ep, int nonblock)
|
||||
budget = BUSY_POLL_BUDGET;
|
||||
|
||||
if (napi_id >= MIN_NAPI_ID && ep_busy_loop_on(ep)) {
|
||||
napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end,
|
||||
napi_busy_loop(napi_id, ep_busy_loop_end,
|
||||
ep, prefer_busy_poll, budget);
|
||||
if (ep_events_available(ep))
|
||||
return true;
|
||||
@ -560,7 +560,7 @@ static void ep_resume_napi_irqs(struct eventpoll *ep)
|
||||
|
||||
#else
|
||||
|
||||
static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
|
||||
static inline bool ep_busy_loop(struct eventpoll *ep)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@ -2047,7 +2047,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
|
||||
if (timed_out)
|
||||
return 0;
|
||||
|
||||
eavail = ep_busy_loop(ep, timed_out);
|
||||
eavail = ep_busy_loop(ep);
|
||||
if (eavail)
|
||||
continue;
|
||||
|
||||
|
@ -126,10 +126,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
|
||||
int err;
|
||||
|
||||
parent = ERR_PTR(-EACCES);
|
||||
inode_lock(dentry->d_inode);
|
||||
if (mnt->mnt_sb->s_export_op->get_parent)
|
||||
parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
|
||||
inode_unlock(dentry->d_inode);
|
||||
|
||||
if (IS_ERR(parent)) {
|
||||
dprintk("get_parent of %lu failed, err %ld\n",
|
||||
|
81
fs/file.c
81
fs/file.c
@ -418,17 +418,25 @@ struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_ho
|
||||
old_fds = old_fdt->fd;
|
||||
new_fds = new_fdt->fd;
|
||||
|
||||
/*
|
||||
* We may be racing against fd allocation from other threads using this
|
||||
* files_struct, despite holding ->file_lock.
|
||||
*
|
||||
* alloc_fd() might have already claimed a slot, while fd_install()
|
||||
* did not populate it yet. Note the latter operates locklessly, so
|
||||
* the file can show up as we are walking the array below.
|
||||
*
|
||||
* At the same time we know no files will disappear as all other
|
||||
* operations take the lock.
|
||||
*
|
||||
* Instead of trying to placate userspace racing with itself, we
|
||||
* ref the file if we see it and mark the fd slot as unused otherwise.
|
||||
*/
|
||||
for (i = open_files; i != 0; i--) {
|
||||
struct file *f = *old_fds++;
|
||||
struct file *f = rcu_dereference_raw(*old_fds++);
|
||||
if (f) {
|
||||
get_file(f);
|
||||
} else {
|
||||
/*
|
||||
* The fd may be claimed in the fd bitmap but not yet
|
||||
* instantiated in the files array if a sibling thread
|
||||
* is partway through open(). So make sure that this
|
||||
* fd is available to the new process.
|
||||
*/
|
||||
__clear_open_fd(open_files - i, new_fdt);
|
||||
}
|
||||
rcu_assign_pointer(*new_fds++, f);
|
||||
@ -577,6 +585,7 @@ repeat:
|
||||
|
||||
__set_open_fd(fd, fdt, flags & O_CLOEXEC);
|
||||
error = fd;
|
||||
VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL);
|
||||
|
||||
out:
|
||||
spin_unlock(&files->file_lock);
|
||||
@ -612,22 +621,14 @@ void put_unused_fd(unsigned int fd)
|
||||
|
||||
EXPORT_SYMBOL(put_unused_fd);
|
||||
|
||||
/*
|
||||
* Install a file pointer in the fd array.
|
||||
*
|
||||
* The VFS is full of places where we drop the files lock between
|
||||
* setting the open_fds bitmap and installing the file in the file
|
||||
* array. At any such point, we are vulnerable to a dup2() race
|
||||
* installing a file in the array before us. We need to detect this and
|
||||
* fput() the struct file we are about to overwrite in this case.
|
||||
*
|
||||
* It should never happen - if we allow dup2() do it, _really_ bad things
|
||||
* will follow.
|
||||
/**
|
||||
* fd_install - install a file pointer in the fd array
|
||||
* @fd: file descriptor to install the file in
|
||||
* @file: the file to install
|
||||
*
|
||||
* This consumes the "file" refcount, so callers should treat it
|
||||
* as if they had called fput(file).
|
||||
*/
|
||||
|
||||
void fd_install(unsigned int fd, struct file *file)
|
||||
{
|
||||
struct files_struct *files = current->files;
|
||||
@ -642,7 +643,7 @@ void fd_install(unsigned int fd, struct file *file)
|
||||
rcu_read_unlock_sched();
|
||||
spin_lock(&files->file_lock);
|
||||
fdt = files_fdtable(files);
|
||||
WARN_ON(fdt->fd[fd] != NULL);
|
||||
VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL);
|
||||
rcu_assign_pointer(fdt->fd[fd], file);
|
||||
spin_unlock(&files->file_lock);
|
||||
return;
|
||||
@ -650,7 +651,7 @@ void fd_install(unsigned int fd, struct file *file)
|
||||
/* coupled with smp_wmb() in expand_fdtable() */
|
||||
smp_rmb();
|
||||
fdt = rcu_dereference_sched(files->fdt);
|
||||
BUG_ON(fdt->fd[fd] != NULL);
|
||||
VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL);
|
||||
rcu_assign_pointer(fdt->fd[fd], file);
|
||||
rcu_read_unlock_sched();
|
||||
}
|
||||
@ -679,7 +680,7 @@ struct file *file_close_fd_locked(struct files_struct *files, unsigned fd)
|
||||
return NULL;
|
||||
|
||||
fd = array_index_nospec(fd, fdt->max_fds);
|
||||
file = fdt->fd[fd];
|
||||
file = rcu_dereference_raw(fdt->fd[fd]);
|
||||
if (file) {
|
||||
rcu_assign_pointer(fdt->fd[fd], NULL);
|
||||
__put_unused_fd(files, fd);
|
||||
@ -1182,6 +1183,16 @@ static inline bool file_needs_f_pos_lock(struct file *file)
|
||||
(file_count(file) > 1 || file->f_op->iterate_shared);
|
||||
}
|
||||
|
||||
bool file_seek_cur_needs_f_lock(struct file *file)
|
||||
{
|
||||
if (!(file->f_mode & FMODE_ATOMIC_POS) && !file->f_op->iterate_shared)
|
||||
return false;
|
||||
|
||||
VFS_WARN_ON_ONCE((file_count(file) > 1) &&
|
||||
!mutex_is_locked(&file->f_pos_lock));
|
||||
return true;
|
||||
}
|
||||
|
||||
struct fd fdget_pos(unsigned int fd)
|
||||
{
|
||||
struct fd f = fdget(fd);
|
||||
@ -1230,14 +1241,34 @@ __releases(&files->file_lock)
|
||||
struct fdtable *fdt;
|
||||
|
||||
/*
|
||||
* We need to detect attempts to do dup2() over allocated but still
|
||||
* not finished descriptor.
|
||||
* dup2() is expected to close the file installed in the target fd slot
|
||||
* (if any). However, userspace hand-picking a fd may be racing against
|
||||
* its own threads which happened to allocate it in open() et al but did
|
||||
* not populate it yet.
|
||||
*
|
||||
* Broadly speaking we may be racing against the following:
|
||||
* fd = get_unused_fd_flags(); // fd slot reserved, ->fd[fd] == NULL
|
||||
* file = hard_work_goes_here();
|
||||
* fd_install(fd, file); // only now ->fd[fd] == file
|
||||
*
|
||||
* It is an invariant that a successfully allocated fd has a NULL entry
|
||||
* in the array until the matching fd_install().
|
||||
*
|
||||
* If we fit the window, we have the fd to populate, yet no target file
|
||||
* to close. Trying to ignore it and install our new file would violate
|
||||
* the invariant and make fd_install() overwrite our file.
|
||||
*
|
||||
* Things can be done(tm) to handle this. However, the issue does not
|
||||
* concern legitimate programs and we only need to make sure the kernel
|
||||
* does not trip over it.
|
||||
*
|
||||
* The simplest way out is to return an error if we find ourselves here.
|
||||
*
|
||||
* POSIX is silent on the issue, we return -EBUSY.
|
||||
*/
|
||||
fdt = files_fdtable(files);
|
||||
fd = array_index_nospec(fd, fdt->max_fds);
|
||||
tofree = fdt->fd[fd];
|
||||
tofree = rcu_dereference_raw(fdt->fd[fd]);
|
||||
if (!tofree && fd_is_open(fd, fdt))
|
||||
goto Ebusy;
|
||||
get_file(file);
|
||||
|
@ -221,7 +221,8 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
|
||||
/*
|
||||
* Privileged users can go above max_files
|
||||
*/
|
||||
if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
|
||||
if (unlikely(get_nr_files() >= files_stat.max_files) &&
|
||||
!capable(CAP_SYS_ADMIN)) {
|
||||
/*
|
||||
* percpu_counters are inaccurate. Do an expensive check before
|
||||
* we go and fail.
|
||||
|
127
fs/inode.c
127
fs/inode.c
@ -327,7 +327,17 @@ static void i_callback(struct rcu_head *head)
|
||||
free_inode_nonrcu(inode);
|
||||
}
|
||||
|
||||
static struct inode *alloc_inode(struct super_block *sb)
|
||||
/**
|
||||
* alloc_inode - obtain an inode
|
||||
* @sb: superblock
|
||||
*
|
||||
* Allocates a new inode for given superblock.
|
||||
* Inode wont be chained in superblock s_inodes list
|
||||
* This means :
|
||||
* - fs can't be unmount
|
||||
* - quotas, fsnotify, writeback can't work
|
||||
*/
|
||||
struct inode *alloc_inode(struct super_block *sb)
|
||||
{
|
||||
const struct super_operations *ops = sb->s_op;
|
||||
struct inode *inode;
|
||||
@ -613,18 +623,22 @@ static void inode_wait_for_lru_isolating(struct inode *inode)
|
||||
*/
|
||||
void inode_sb_list_add(struct inode *inode)
|
||||
{
|
||||
spin_lock(&inode->i_sb->s_inode_list_lock);
|
||||
list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
|
||||
spin_unlock(&inode->i_sb->s_inode_list_lock);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
|
||||
spin_lock(&sb->s_inode_list_lock);
|
||||
list_add(&inode->i_sb_list, &sb->s_inodes);
|
||||
spin_unlock(&sb->s_inode_list_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(inode_sb_list_add);
|
||||
|
||||
static inline void inode_sb_list_del(struct inode *inode)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
|
||||
if (!list_empty(&inode->i_sb_list)) {
|
||||
spin_lock(&inode->i_sb->s_inode_list_lock);
|
||||
spin_lock(&sb->s_inode_list_lock);
|
||||
list_del_init(&inode->i_sb_list);
|
||||
spin_unlock(&inode->i_sb->s_inode_list_lock);
|
||||
spin_unlock(&sb->s_inode_list_lock);
|
||||
}
|
||||
}
|
||||
|
||||
@ -806,23 +820,16 @@ static void evict(struct inode *inode)
|
||||
/*
|
||||
* Wake up waiters in __wait_on_freeing_inode().
|
||||
*
|
||||
* Lockless hash lookup may end up finding the inode before we removed
|
||||
* it above, but only lock it *after* we are done with the wakeup below.
|
||||
* In this case the potential waiter cannot safely block.
|
||||
* It is an invariant that any thread we need to wake up is already
|
||||
* accounted for before remove_inode_hash() acquires ->i_lock -- both
|
||||
* sides take the lock and sleep is aborted if the inode is found
|
||||
* unhashed. Thus either the sleeper wins and goes off CPU, or removal
|
||||
* wins and the sleeper aborts after testing with the lock.
|
||||
*
|
||||
* The inode being unhashed after the call to remove_inode_hash() is
|
||||
* used as an indicator whether blocking on it is safe.
|
||||
* This also means we don't need any fences for the call below.
|
||||
*/
|
||||
spin_lock(&inode->i_lock);
|
||||
/*
|
||||
* Pairs with the barrier in prepare_to_wait_event() to make sure
|
||||
* ___wait_var_event() either sees the bit cleared or
|
||||
* waitqueue_active() check in wake_up_var() sees the waiter.
|
||||
*/
|
||||
smp_mb__after_spinlock();
|
||||
inode_wake_up_bit(inode, __I_NEW);
|
||||
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
destroy_inode(inode);
|
||||
}
|
||||
@ -900,46 +907,6 @@ again:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(evict_inodes);
|
||||
|
||||
/**
|
||||
* invalidate_inodes - attempt to free all inodes on a superblock
|
||||
* @sb: superblock to operate on
|
||||
*
|
||||
* Attempts to free all inodes (including dirty inodes) for a given superblock.
|
||||
*/
|
||||
void invalidate_inodes(struct super_block *sb)
|
||||
{
|
||||
struct inode *inode, *next;
|
||||
LIST_HEAD(dispose);
|
||||
|
||||
again:
|
||||
spin_lock(&sb->s_inode_list_lock);
|
||||
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
|
||||
spin_lock(&inode->i_lock);
|
||||
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
continue;
|
||||
}
|
||||
if (atomic_read(&inode->i_count)) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
inode->i_state |= I_FREEING;
|
||||
inode_lru_list_del(inode);
|
||||
spin_unlock(&inode->i_lock);
|
||||
list_add(&inode->i_lru, &dispose);
|
||||
if (need_resched()) {
|
||||
spin_unlock(&sb->s_inode_list_lock);
|
||||
cond_resched();
|
||||
dispose_list(&dispose);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
spin_unlock(&sb->s_inode_list_lock);
|
||||
|
||||
dispose_list(&dispose);
|
||||
}
|
||||
|
||||
/*
|
||||
* Isolate the inode from the LRU in preparation for freeing it.
|
||||
*
|
||||
@ -1159,21 +1126,6 @@ unsigned int get_next_ino(void)
|
||||
}
|
||||
EXPORT_SYMBOL(get_next_ino);
|
||||
|
||||
/**
|
||||
* new_inode_pseudo - obtain an inode
|
||||
* @sb: superblock
|
||||
*
|
||||
* Allocates a new inode for given superblock.
|
||||
* Inode wont be chained in superblock s_inodes list
|
||||
* This means :
|
||||
* - fs can't be unmount
|
||||
* - quotas, fsnotify, writeback can't work
|
||||
*/
|
||||
struct inode *new_inode_pseudo(struct super_block *sb)
|
||||
{
|
||||
return alloc_inode(sb);
|
||||
}
|
||||
|
||||
/**
|
||||
* new_inode - obtain an inode
|
||||
* @sb: superblock
|
||||
@ -1190,7 +1142,7 @@ struct inode *new_inode(struct super_block *sb)
|
||||
{
|
||||
struct inode *inode;
|
||||
|
||||
inode = new_inode_pseudo(sb);
|
||||
inode = alloc_inode(sb);
|
||||
if (inode)
|
||||
inode_sb_list_add(inode);
|
||||
return inode;
|
||||
@ -1348,8 +1300,8 @@ again:
|
||||
}
|
||||
|
||||
if (set && unlikely(set(inode, data))) {
|
||||
inode = NULL;
|
||||
goto unlock;
|
||||
spin_unlock(&inode_hash_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1361,14 +1313,14 @@ again:
|
||||
hlist_add_head_rcu(&inode->i_hash, head);
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
spin_unlock(&inode_hash_lock);
|
||||
|
||||
/*
|
||||
* Add inode to the sb list if it's not already. It has I_NEW at this
|
||||
* point, so it should be safe to test i_sb_list locklessly.
|
||||
*/
|
||||
if (list_empty(&inode->i_sb_list))
|
||||
inode_sb_list_add(inode);
|
||||
unlock:
|
||||
spin_unlock(&inode_hash_lock);
|
||||
|
||||
return inode;
|
||||
}
|
||||
@ -1497,8 +1449,8 @@ again:
|
||||
inode->i_state = I_NEW;
|
||||
hlist_add_head_rcu(&inode->i_hash, head);
|
||||
spin_unlock(&inode->i_lock);
|
||||
inode_sb_list_add(inode);
|
||||
spin_unlock(&inode_hash_lock);
|
||||
inode_sb_list_add(inode);
|
||||
|
||||
/* Return the locked inode with I_NEW set, the
|
||||
* caller is responsible for filling in the contents
|
||||
@ -2953,3 +2905,18 @@ umode_t mode_strip_sgid(struct mnt_idmap *idmap,
|
||||
return mode & ~S_ISGID;
|
||||
}
|
||||
EXPORT_SYMBOL(mode_strip_sgid);
|
||||
|
||||
#ifdef CONFIG_DEBUG_VFS
|
||||
/*
|
||||
* Dump an inode.
|
||||
*
|
||||
* TODO: add a proper inode dumping routine, this is a stub to get debug off the
|
||||
* ground.
|
||||
*/
|
||||
void dump_inode(struct inode *inode, const char *reason)
|
||||
{
|
||||
pr_warn("%s encountered for inode %px", reason, inode);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(dump_inode);
|
||||
#endif
|
||||
|
@ -187,8 +187,8 @@ extern struct open_how build_open_how(int flags, umode_t mode);
|
||||
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
|
||||
struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);
|
||||
|
||||
long do_ftruncate(struct file *file, loff_t length, int small);
|
||||
long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
|
||||
int do_ftruncate(struct file *file, loff_t length, int small);
|
||||
int do_sys_ftruncate(unsigned int fd, loff_t length, int small);
|
||||
int chmod_common(const struct path *path, umode_t mode);
|
||||
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
|
||||
int flag);
|
||||
@ -207,7 +207,6 @@ bool in_group_or_capable(struct mnt_idmap *idmap,
|
||||
* fs-writeback.c
|
||||
*/
|
||||
extern long get_nr_dirty_inodes(void);
|
||||
void invalidate_inodes(struct super_block *sb);
|
||||
|
||||
/*
|
||||
* dcache.c
|
||||
@ -338,3 +337,4 @@ static inline bool path_mounted(const struct path *path)
|
||||
return path->mnt->mnt_root == path->dentry;
|
||||
}
|
||||
void file_f_owner_release(struct file *file);
|
||||
bool file_seek_cur_needs_f_lock(struct file *file);
|
||||
|
10
fs/ioctl.c
10
fs/ioctl.c
@ -41,7 +41,7 @@
|
||||
*
|
||||
* Returns 0 on success, -errno on error.
|
||||
*/
|
||||
long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
int vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
int error = -ENOTTY;
|
||||
|
||||
@ -228,8 +228,8 @@ static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap)
|
||||
return error;
|
||||
}
|
||||
|
||||
static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
|
||||
u64 off, u64 olen, u64 destoff)
|
||||
static int ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
|
||||
u64 off, u64 olen, u64 destoff)
|
||||
{
|
||||
CLASS(fd, src_file)(srcfd);
|
||||
loff_t cloned;
|
||||
@ -248,8 +248,8 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long ioctl_file_clone_range(struct file *file,
|
||||
struct file_clone_range __user *argp)
|
||||
static int ioctl_file_clone_range(struct file *file,
|
||||
struct file_clone_range __user *argp)
|
||||
{
|
||||
struct file_clone_range args;
|
||||
|
||||
|
42
fs/namei.c
42
fs/namei.c
@ -125,6 +125,13 @@
|
||||
|
||||
#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
|
||||
|
||||
static inline void initname(struct filename *name)
|
||||
{
|
||||
name->uptr = NULL;
|
||||
name->aname = NULL;
|
||||
atomic_set(&name->refcnt, 1);
|
||||
}
|
||||
|
||||
struct filename *
|
||||
getname_flags(const char __user *filename, int flags)
|
||||
{
|
||||
@ -203,10 +210,7 @@ getname_flags(const char __user *filename, int flags)
|
||||
return ERR_PTR(-ENAMETOOLONG);
|
||||
}
|
||||
}
|
||||
|
||||
atomic_set(&result->refcnt, 1);
|
||||
result->uptr = filename;
|
||||
result->aname = NULL;
|
||||
initname(result);
|
||||
audit_getname(result);
|
||||
return result;
|
||||
}
|
||||
@ -218,11 +222,6 @@ struct filename *getname_uflags(const char __user *filename, int uflags)
|
||||
return getname_flags(filename, flags);
|
||||
}
|
||||
|
||||
struct filename *getname(const char __user * filename)
|
||||
{
|
||||
return getname_flags(filename, 0);
|
||||
}
|
||||
|
||||
struct filename *__getname_maybe_null(const char __user *pathname)
|
||||
{
|
||||
struct filename *name;
|
||||
@ -269,25 +268,27 @@ struct filename *getname_kernel(const char * filename)
|
||||
return ERR_PTR(-ENAMETOOLONG);
|
||||
}
|
||||
memcpy((char *)result->name, filename, len);
|
||||
result->uptr = NULL;
|
||||
result->aname = NULL;
|
||||
atomic_set(&result->refcnt, 1);
|
||||
initname(result);
|
||||
audit_getname(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
EXPORT_SYMBOL(getname_kernel);
|
||||
|
||||
void putname(struct filename *name)
|
||||
{
|
||||
int refcnt;
|
||||
|
||||
if (IS_ERR_OR_NULL(name))
|
||||
return;
|
||||
|
||||
if (WARN_ON_ONCE(!atomic_read(&name->refcnt)))
|
||||
return;
|
||||
refcnt = atomic_read(&name->refcnt);
|
||||
if (refcnt != 1) {
|
||||
if (WARN_ON_ONCE(!refcnt))
|
||||
return;
|
||||
|
||||
if (!atomic_dec_and_test(&name->refcnt))
|
||||
return;
|
||||
if (!atomic_dec_and_test(&name->refcnt))
|
||||
return;
|
||||
}
|
||||
|
||||
if (name->name != name->iname) {
|
||||
__putname(name->name);
|
||||
@ -2863,15 +2864,14 @@ static int lookup_one_common(struct mnt_idmap *idmap,
|
||||
* Note that this routine is purely a helper for filesystem usage and should
|
||||
* not be called by generic code.
|
||||
*
|
||||
* The caller must hold base->i_mutex.
|
||||
* No locks need be held - only a counted reference to @base is needed.
|
||||
*
|
||||
*/
|
||||
struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len)
|
||||
{
|
||||
struct qstr this;
|
||||
int err;
|
||||
|
||||
WARN_ON_ONCE(!inode_is_locked(base->d_inode));
|
||||
|
||||
err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
@ -3415,6 +3415,8 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path,
|
||||
if ((acc_mode & MAY_EXEC) && path_noexec(path))
|
||||
return -EACCES;
|
||||
break;
|
||||
default:
|
||||
VFS_BUG_ON_INODE(1, inode);
|
||||
}
|
||||
|
||||
error = inode_permission(idmap, inode, MAY_OPEN | acc_mode);
|
||||
|
29
fs/open.c
29
fs/open.c
@ -67,11 +67,11 @@ int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry,
|
||||
return ret;
|
||||
}
|
||||
|
||||
long vfs_truncate(const struct path *path, loff_t length)
|
||||
int vfs_truncate(const struct path *path, loff_t length)
|
||||
{
|
||||
struct mnt_idmap *idmap;
|
||||
struct inode *inode;
|
||||
long error;
|
||||
int error;
|
||||
|
||||
inode = path->dentry->d_inode;
|
||||
|
||||
@ -123,7 +123,7 @@ mnt_drop_write_and_out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfs_truncate);
|
||||
|
||||
long do_sys_truncate(const char __user *pathname, loff_t length)
|
||||
int do_sys_truncate(const char __user *pathname, loff_t length)
|
||||
{
|
||||
unsigned int lookup_flags = LOOKUP_FOLLOW;
|
||||
struct path path;
|
||||
@ -157,7 +157,7 @@ COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length
|
||||
}
|
||||
#endif
|
||||
|
||||
long do_ftruncate(struct file *file, loff_t length, int small)
|
||||
int do_ftruncate(struct file *file, loff_t length, int small)
|
||||
{
|
||||
struct inode *inode;
|
||||
struct dentry *dentry;
|
||||
@ -196,7 +196,7 @@ long do_ftruncate(struct file *file, loff_t length, int small)
|
||||
return error;
|
||||
}
|
||||
|
||||
long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
|
||||
int do_sys_ftruncate(unsigned int fd, loff_t length, int small)
|
||||
{
|
||||
if (length < 0)
|
||||
return -EINVAL;
|
||||
@ -251,7 +251,7 @@ COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd,
|
||||
int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
long ret;
|
||||
int ret;
|
||||
loff_t sum;
|
||||
|
||||
if (offset < 0 || len <= 0)
|
||||
@ -460,7 +460,7 @@ static const struct cred *access_override_creds(void)
|
||||
return override_creds(override_cred);
|
||||
}
|
||||
|
||||
static long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
|
||||
static int do_faccessat(int dfd, const char __user *filename, int mode, int flags)
|
||||
{
|
||||
struct path path;
|
||||
struct inode *inode;
|
||||
@ -1409,22 +1409,23 @@ struct file *file_open_root(const struct path *root,
|
||||
}
|
||||
EXPORT_SYMBOL(file_open_root);
|
||||
|
||||
static long do_sys_openat2(int dfd, const char __user *filename,
|
||||
struct open_how *how)
|
||||
static int do_sys_openat2(int dfd, const char __user *filename,
|
||||
struct open_how *how)
|
||||
{
|
||||
struct open_flags op;
|
||||
int fd = build_open_flags(how, &op);
|
||||
struct filename *tmp;
|
||||
int err, fd;
|
||||
|
||||
if (fd)
|
||||
return fd;
|
||||
err = build_open_flags(how, &op);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
tmp = getname(filename);
|
||||
if (IS_ERR(tmp))
|
||||
return PTR_ERR(tmp);
|
||||
|
||||
fd = get_unused_fd_flags(how->flags);
|
||||
if (fd >= 0) {
|
||||
if (likely(fd >= 0)) {
|
||||
struct file *f = do_filp_open(dfd, tmp, &op);
|
||||
if (IS_ERR(f)) {
|
||||
put_unused_fd(fd);
|
||||
@ -1437,7 +1438,7 @@ static long do_sys_openat2(int dfd, const char __user *filename,
|
||||
return fd;
|
||||
}
|
||||
|
||||
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
|
||||
int do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
|
||||
{
|
||||
struct open_how how = build_open_how(flags, mode);
|
||||
return do_sys_openat2(dfd, filename, &how);
|
||||
|
@ -169,11 +169,16 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
|
||||
|
||||
if (whence == SEEK_CUR) {
|
||||
/*
|
||||
* f_lock protects against read/modify/write race with
|
||||
* other SEEK_CURs. Note that parallel writes and reads
|
||||
* behave like SEEK_SET.
|
||||
* If the file requires locking via f_pos_lock we know
|
||||
* that mutual exclusion for SEEK_CUR on the same file
|
||||
* is guaranteed. If the file isn't locked, we take
|
||||
* f_lock to protect against f_pos races with other
|
||||
* SEEK_CURs.
|
||||
*/
|
||||
guard(spinlock)(&file->f_lock);
|
||||
if (file_seek_cur_needs_f_lock(file)) {
|
||||
guard(spinlock)(&file->f_lock);
|
||||
return vfs_setpos(file, file->f_pos + offset, maxsize);
|
||||
}
|
||||
return vfs_setpos(file, file->f_pos + offset, maxsize);
|
||||
}
|
||||
|
||||
|
@ -277,15 +277,14 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
|
||||
return ufd;
|
||||
}
|
||||
|
||||
file = anon_inode_getfile("[signalfd]", &signalfd_fops, ctx,
|
||||
O_RDWR | (flags & O_NONBLOCK));
|
||||
file = anon_inode_getfile_fmode("[signalfd]", &signalfd_fops,
|
||||
ctx, O_RDWR | (flags & O_NONBLOCK),
|
||||
FMODE_NOWAIT);
|
||||
if (IS_ERR(file)) {
|
||||
put_unused_fd(ufd);
|
||||
kfree(ctx);
|
||||
return PTR_ERR(file);
|
||||
}
|
||||
file->f_mode |= FMODE_NOWAIT;
|
||||
|
||||
fd_install(ufd, file);
|
||||
} else {
|
||||
CLASS(fd, f)(ufd);
|
||||
|
@ -388,7 +388,7 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
|
||||
spin_unlock(&tcon->tc_lock);
|
||||
|
||||
/*
|
||||
* BB Add call to invalidate_inodes(sb) for all superblocks mounted
|
||||
* BB Add call to evict_inodes(sb) for all superblocks mounted
|
||||
* to this tcon.
|
||||
*/
|
||||
}
|
||||
|
@ -1417,7 +1417,7 @@ static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
|
||||
if (!surprise)
|
||||
sync_filesystem(sb);
|
||||
shrink_dcache_sb(sb);
|
||||
invalidate_inodes(sb);
|
||||
evict_inodes(sb);
|
||||
if (sb->s_op->shutdown)
|
||||
sb->s_op->shutdown(sb);
|
||||
|
||||
|
@ -439,15 +439,15 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
|
||||
return ufd;
|
||||
}
|
||||
|
||||
file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx,
|
||||
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
|
||||
file = anon_inode_getfile_fmode("[timerfd]", &timerfd_fops, ctx,
|
||||
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS),
|
||||
FMODE_NOWAIT);
|
||||
if (IS_ERR(file)) {
|
||||
put_unused_fd(ufd);
|
||||
kfree(ctx);
|
||||
return PTR_ERR(file);
|
||||
}
|
||||
|
||||
file->f_mode |= FMODE_NOWAIT;
|
||||
fd_install(ufd, file);
|
||||
return ufd;
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#ifndef _LINUX_FS_H
|
||||
#define _LINUX_FS_H
|
||||
|
||||
#include <linux/vfsdebug.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/wait_bit.h>
|
||||
#include <linux/kdev_t.h>
|
||||
@ -790,19 +791,8 @@ struct inode {
|
||||
|
||||
static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
|
||||
{
|
||||
int testlen;
|
||||
|
||||
/*
|
||||
* TODO: patch it into a debug-only check if relevant macros show up.
|
||||
* In the meantime, since we are suffering strlen even on production kernels
|
||||
* to find the right length, do a fixup if the wrong value got passed.
|
||||
*/
|
||||
testlen = strlen(link);
|
||||
if (testlen != linklen) {
|
||||
WARN_ONCE(1, "bad length passed for symlink [%s] (got %d, expected %d)",
|
||||
link, linklen, testlen);
|
||||
linklen = testlen;
|
||||
}
|
||||
VFS_WARN_ON_INODE(strlen(link) != linklen, inode);
|
||||
VFS_WARN_ON_INODE(inode->i_opflags & IOP_CACHED_LINK, inode);
|
||||
inode->i_link = link;
|
||||
inode->i_linklen = linklen;
|
||||
inode->i_opflags |= IOP_CACHED_LINK;
|
||||
@ -1067,7 +1057,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
|
||||
|
||||
/**
|
||||
* struct file - Represents a file
|
||||
* @f_ref: reference count
|
||||
* @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context.
|
||||
* @f_mode: FMODE_* flags often used in hotpaths
|
||||
* @f_op: file operations
|
||||
@ -1077,12 +1066,12 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
|
||||
* @f_flags: file flags
|
||||
* @f_iocb_flags: iocb flags
|
||||
* @f_cred: stashed credentials of creator/opener
|
||||
* @f_owner: file owner
|
||||
* @f_path: path of the file
|
||||
* @f_pos_lock: lock protecting file position
|
||||
* @f_pipe: specific to pipes
|
||||
* @f_pos: file position
|
||||
* @f_security: LSM security context of this file
|
||||
* @f_owner: file owner
|
||||
* @f_wb_err: writeback error
|
||||
* @f_sb_err: per sb writeback errors
|
||||
* @f_ep: link of all epoll hooks for this file
|
||||
@ -1090,9 +1079,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
|
||||
* @f_llist: work queue entrypoint
|
||||
* @f_ra: file's readahead state
|
||||
* @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.)
|
||||
* @f_ref: reference count
|
||||
*/
|
||||
struct file {
|
||||
file_ref_t f_ref;
|
||||
spinlock_t f_lock;
|
||||
fmode_t f_mode;
|
||||
const struct file_operations *f_op;
|
||||
@ -1102,6 +1091,7 @@ struct file {
|
||||
unsigned int f_flags;
|
||||
unsigned int f_iocb_flags;
|
||||
const struct cred *f_cred;
|
||||
struct fown_struct *f_owner;
|
||||
/* --- cacheline 1 boundary (64 bytes) --- */
|
||||
struct path f_path;
|
||||
union {
|
||||
@ -1115,7 +1105,6 @@ struct file {
|
||||
void *f_security;
|
||||
#endif
|
||||
/* --- cacheline 2 boundary (128 bytes) --- */
|
||||
struct fown_struct *f_owner;
|
||||
errseq_t f_wb_err;
|
||||
errseq_t f_sb_err;
|
||||
#ifdef CONFIG_EPOLL
|
||||
@ -1127,6 +1116,7 @@ struct file {
|
||||
struct file_ra_state f_ra;
|
||||
freeptr_t f_freeptr;
|
||||
};
|
||||
file_ref_t f_ref;
|
||||
/* --- cacheline 3 boundary (192 bytes) --- */
|
||||
} __randomize_layout
|
||||
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
|
||||
@ -2039,7 +2029,7 @@ int vfs_fchown(struct file *file, uid_t user, gid_t group);
|
||||
int vfs_fchmod(struct file *file, umode_t mode);
|
||||
int vfs_utimes(const struct path *path, struct timespec64 *times);
|
||||
|
||||
extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
|
||||
int vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
|
||||
@ -2791,13 +2781,13 @@ static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
|
||||
return mnt_idmap(mnt) != &nop_mnt_idmap;
|
||||
}
|
||||
|
||||
extern long vfs_truncate(const struct path *, loff_t);
|
||||
int vfs_truncate(const struct path *, loff_t);
|
||||
int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start,
|
||||
unsigned int time_attrs, struct file *filp);
|
||||
extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
|
||||
loff_t len);
|
||||
extern long do_sys_open(int dfd, const char __user *filename, int flags,
|
||||
umode_t mode);
|
||||
int do_sys_open(int dfd, const char __user *filename, int flags,
|
||||
umode_t mode);
|
||||
extern struct file *file_open_name(struct filename *, int, umode_t);
|
||||
extern struct file *filp_open(const char *, int, umode_t);
|
||||
extern struct file *file_open_root(const struct path *,
|
||||
@ -2848,7 +2838,10 @@ extern int filp_close(struct file *, fl_owner_t id);
|
||||
|
||||
extern struct filename *getname_flags(const char __user *, int);
|
||||
extern struct filename *getname_uflags(const char __user *, int);
|
||||
extern struct filename *getname(const char __user *);
|
||||
static inline struct filename *getname(const char __user *name)
|
||||
{
|
||||
return getname_flags(name, 0);
|
||||
}
|
||||
extern struct filename *getname_kernel(const char *);
|
||||
extern struct filename *__getname_maybe_null(const char __user *);
|
||||
static inline struct filename *getname_maybe_null(const char __user *name, int flags)
|
||||
@ -2862,6 +2855,12 @@ static inline struct filename *getname_maybe_null(const char __user *name, int f
|
||||
}
|
||||
extern void putname(struct filename *name);
|
||||
|
||||
static inline struct filename *refname(struct filename *name)
|
||||
{
|
||||
atomic_inc(&name->refcnt);
|
||||
return name;
|
||||
}
|
||||
|
||||
extern int finish_open(struct file *file, struct dentry *dentry,
|
||||
int (*open)(struct inode *, struct file *));
|
||||
extern int finish_no_open(struct file *file, struct dentry *dentry);
|
||||
@ -3294,7 +3293,11 @@ static inline void __iget(struct inode *inode)
|
||||
extern void iget_failed(struct inode *);
|
||||
extern void clear_inode(struct inode *);
|
||||
extern void __destroy_inode(struct inode *);
|
||||
extern struct inode *new_inode_pseudo(struct super_block *sb);
|
||||
struct inode *alloc_inode(struct super_block *sb);
|
||||
static inline struct inode *new_inode_pseudo(struct super_block *sb)
|
||||
{
|
||||
return alloc_inode(sb);
|
||||
}
|
||||
extern struct inode *new_inode(struct super_block *sb);
|
||||
extern void free_inode_nonrcu(struct inode *inode);
|
||||
extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
|
||||
|
@ -2555,7 +2555,7 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
|
||||
struct task_struct *task, bool bypass_rlim);
|
||||
|
||||
struct kvec;
|
||||
struct page *get_dump_page(unsigned long addr);
|
||||
struct page *get_dump_page(unsigned long addr, int *locked);
|
||||
|
||||
bool folio_mark_dirty(struct folio *folio);
|
||||
bool folio_mark_dirty_lock(struct folio *folio);
|
||||
|
@ -1044,21 +1044,23 @@ static inline pgoff_t page_pgoff(const struct folio *folio,
|
||||
return folio->index + folio_page_idx(folio, page);
|
||||
}
|
||||
|
||||
/**
|
||||
* folio_pos - Returns the byte position of this folio in its file.
|
||||
* @folio: The folio.
|
||||
*/
|
||||
static inline loff_t folio_pos(const struct folio *folio)
|
||||
{
|
||||
return ((loff_t)folio->index) * PAGE_SIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return byte-offset into filesystem object for page.
|
||||
*/
|
||||
static inline loff_t page_offset(struct page *page)
|
||||
{
|
||||
return ((loff_t)page->index) << PAGE_SHIFT;
|
||||
}
|
||||
struct folio *folio = page_folio(page);
|
||||
|
||||
/**
|
||||
* folio_pos - Returns the byte position of this folio in its file.
|
||||
* @folio: The folio.
|
||||
*/
|
||||
static inline loff_t folio_pos(struct folio *folio)
|
||||
{
|
||||
return page_offset(&folio->page);
|
||||
return folio_pos(folio) + folio_page_idx(folio, page) * PAGE_SIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1266,14 +1266,14 @@ static inline long ksys_lchown(const char __user *filename, uid_t user,
|
||||
AT_SYMLINK_NOFOLLOW);
|
||||
}
|
||||
|
||||
extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
|
||||
int do_sys_ftruncate(unsigned int fd, loff_t length, int small);
|
||||
|
||||
static inline long ksys_ftruncate(unsigned int fd, loff_t length)
|
||||
{
|
||||
return do_sys_ftruncate(fd, length, 1);
|
||||
}
|
||||
|
||||
extern long do_sys_truncate(const char __user *pathname, loff_t length);
|
||||
int do_sys_truncate(const char __user *pathname, loff_t length);
|
||||
|
||||
static inline long ksys_truncate(const char __user *pathname, loff_t length)
|
||||
{
|
||||
|
45
include/linux/vfsdebug.h
Normal file
45
include/linux/vfsdebug.h
Normal file
@ -0,0 +1,45 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef LINUX_VFS_DEBUG_H
|
||||
#define LINUX_VFS_DEBUG_H 1
|
||||
|
||||
#include <linux/bug.h>
|
||||
|
||||
struct inode;
|
||||
|
||||
#ifdef CONFIG_DEBUG_VFS
|
||||
void dump_inode(struct inode *inode, const char *reason);
|
||||
|
||||
#define VFS_BUG_ON(cond) BUG_ON(cond)
|
||||
#define VFS_WARN_ON(cond) (void)WARN_ON(cond)
|
||||
#define VFS_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
|
||||
#define VFS_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
|
||||
#define VFS_WARN(cond, format...) (void)WARN(cond, format)
|
||||
|
||||
#define VFS_BUG_ON_INODE(cond, inode) ({ \
|
||||
if (unlikely(!!(cond))) { \
|
||||
dump_inode(inode, "VFS_BUG_ON_INODE(" #cond")");\
|
||||
BUG_ON(1); \
|
||||
} \
|
||||
})
|
||||
|
||||
#define VFS_WARN_ON_INODE(cond, inode) ({ \
|
||||
int __ret_warn = !!(cond); \
|
||||
\
|
||||
if (unlikely(__ret_warn)) { \
|
||||
dump_inode(inode, "VFS_WARN_ON_INODE(" #cond")");\
|
||||
WARN_ON(1); \
|
||||
} \
|
||||
unlikely(__ret_warn); \
|
||||
})
|
||||
#else
|
||||
#define VFS_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
|
||||
#define VFS_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
|
||||
#define VFS_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
|
||||
#define VFS_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
|
||||
#define VFS_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
|
||||
|
||||
#define VFS_BUG_ON_INODE(cond, inode) VFS_BUG_ON(cond)
|
||||
#define VFS_WARN_ON_INODE(cond, inode) BUILD_BUG_ON_INVALID(cond)
|
||||
#endif /* CONFIG_DEBUG_VFS */
|
||||
|
||||
#endif
|
@ -2207,10 +2207,8 @@ __audit_reusename(const __user char *uptr)
|
||||
list_for_each_entry(n, &context->names_list, list) {
|
||||
if (!n->name)
|
||||
continue;
|
||||
if (n->name->uptr == uptr) {
|
||||
atomic_inc(&n->name->refcnt);
|
||||
return n->name;
|
||||
}
|
||||
if (n->name->uptr == uptr)
|
||||
return refname(n->name);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
@ -2237,7 +2235,7 @@ void __audit_getname(struct filename *name)
|
||||
n->name = name;
|
||||
n->name_len = AUDIT_NAME_FULL;
|
||||
name->aname = n;
|
||||
atomic_inc(&name->refcnt);
|
||||
refname(name);
|
||||
}
|
||||
|
||||
static inline int audit_copy_fcaps(struct audit_names *name,
|
||||
@ -2369,7 +2367,7 @@ out_alloc:
|
||||
return;
|
||||
if (name) {
|
||||
n->name = name;
|
||||
atomic_inc(&name->refcnt);
|
||||
refname(name);
|
||||
}
|
||||
|
||||
out:
|
||||
@ -2496,7 +2494,7 @@ void __audit_inode_child(struct inode *parent,
|
||||
if (found_parent) {
|
||||
found_child->name = found_parent->name;
|
||||
found_child->name_len = AUDIT_NAME_FULL;
|
||||
atomic_inc(&found_child->name->refcnt);
|
||||
refname(found_child->name);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -145,7 +145,7 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
|
||||
*/
|
||||
task1 = find_task_by_vpid(pid1);
|
||||
task2 = find_task_by_vpid(pid2);
|
||||
if (!task1 || !task2)
|
||||
if (unlikely(!task1 || !task2))
|
||||
goto err_no_task;
|
||||
|
||||
get_task_struct(task1);
|
||||
|
@ -269,6 +269,15 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
/*
|
||||
* pipe_resize_ring() does not update nr_accounted for watch_queue
|
||||
* pipes, because the above vastly overprovisions. Set nr_accounted on
|
||||
* and max_usage this pipe to the number that was actually charged to
|
||||
* the user above via account_pipe_buffers.
|
||||
*/
|
||||
pipe->max_usage = nr_pages;
|
||||
pipe->nr_accounted = nr_pages;
|
||||
|
||||
ret = -ENOMEM;
|
||||
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
|
||||
if (!pages)
|
||||
|
@ -808,6 +808,15 @@ config ARCH_HAS_DEBUG_VM_PGTABLE
|
||||
An architecture should select this when it can successfully
|
||||
build and run DEBUG_VM_PGTABLE.
|
||||
|
||||
config DEBUG_VFS
|
||||
bool "Debug VFS"
|
||||
depends on DEBUG_KERNEL
|
||||
help
|
||||
Enable this to turn on extended checks in the VFS layer that may impact
|
||||
performance.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config DEBUG_VM_IRQSOFF
|
||||
def_bool DEBUG_VM && !PREEMPT_RT
|
||||
|
||||
|
6
mm/gup.c
6
mm/gup.c
@ -2254,6 +2254,7 @@ EXPORT_SYMBOL(fault_in_readable);
|
||||
/**
|
||||
* get_dump_page() - pin user page in memory while writing it to core dump
|
||||
* @addr: user address
|
||||
* @locked: a pointer to an int denoting whether the mmap sem is held
|
||||
*
|
||||
* Returns struct page pointer of user page pinned for dump,
|
||||
* to be freed afterwards by put_page().
|
||||
@ -2266,13 +2267,12 @@ EXPORT_SYMBOL(fault_in_readable);
|
||||
* Called without mmap_lock (takes and releases the mmap_lock by itself).
|
||||
*/
|
||||
#ifdef CONFIG_ELF_CORE
|
||||
struct page *get_dump_page(unsigned long addr)
|
||||
struct page *get_dump_page(unsigned long addr, int *locked)
|
||||
{
|
||||
struct page *page;
|
||||
int locked = 0;
|
||||
int ret;
|
||||
|
||||
ret = __get_user_pages_locked(current->mm, addr, 1, &page, &locked,
|
||||
ret = __get_user_pages_locked(current->mm, addr, 1, &page, locked,
|
||||
FOLL_FORCE | FOLL_DUMP | FOLL_GET);
|
||||
return (ret == 1) ? page : NULL;
|
||||
}
|
||||
|
@ -1216,7 +1216,7 @@ static void hook_inode_free_security_rcu(void *inode_security)
|
||||
/*
|
||||
* Release the inodes used in a security policy.
|
||||
*
|
||||
* Cf. fsnotify_unmount_inodes() and invalidate_inodes()
|
||||
* Cf. fsnotify_unmount_inodes() and evict_inodes()
|
||||
*/
|
||||
static void hook_sb_delete(struct super_block *const sb)
|
||||
{
|
||||
|
@ -4224,15 +4224,14 @@ static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
file = anon_inode_getfile(name, &kvm_vcpu_stats_fops, vcpu, O_RDONLY);
|
||||
file = anon_inode_getfile_fmode(name, &kvm_vcpu_stats_fops, vcpu,
|
||||
O_RDONLY, FMODE_PREAD);
|
||||
if (IS_ERR(file)) {
|
||||
put_unused_fd(fd);
|
||||
return PTR_ERR(file);
|
||||
}
|
||||
|
||||
kvm_get_kvm(vcpu->kvm);
|
||||
|
||||
file->f_mode |= FMODE_PREAD;
|
||||
fd_install(fd, file);
|
||||
|
||||
return fd;
|
||||
@ -5020,16 +5019,14 @@ static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm)
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
file = anon_inode_getfile("kvm-vm-stats",
|
||||
&kvm_vm_stats_fops, kvm, O_RDONLY);
|
||||
file = anon_inode_getfile_fmode("kvm-vm-stats",
|
||||
&kvm_vm_stats_fops, kvm, O_RDONLY, FMODE_PREAD);
|
||||
if (IS_ERR(file)) {
|
||||
put_unused_fd(fd);
|
||||
return PTR_ERR(file);
|
||||
}
|
||||
|
||||
kvm_get_kvm(kvm);
|
||||
|
||||
file->f_mode |= FMODE_PREAD;
|
||||
fd_install(fd, file);
|
||||
|
||||
return fd;
|
||||
|
Loading…
x
Reference in New Issue
Block a user