VFS: Change vfs_mkdir() to return the dentry.

vfs_mkdir() does not guarantee to leave the child dentry hashed or make
it positive on success, and in many such cases the filesystem had to use
a different dentry which it can now return.

This patch changes vfs_mkdir() to return the dentry provided by the
filesystems which is hashed and positive when provided.  This reduces
the number of cases where the resulting dentry is not positive to a
handful which don't deserve extra efforts.

The only callers of vfs_mkdir() which are interested in the resulting
inode are in-kernel filesystem clients: cachefiles, nfsd, smb/server.
The only filesystems that don't reliably provide the inode are:
- kernfs, tracefs which these clients are unlikely to be interested in
- cifs in some configurations would need to do a lookup to find the
  created inode, but doesn't.  cifs cannot be exported via NFS, is
  unlikely to be used by cachefiles, and smb/server only has a soft
  requirement for the inode, so this is unlikely to be a problem in
  practice.
- hostfs, nfs, cifs may need to do a lookup (rarely for NFS) and it is
  possible for a race to make that lookup fail.  Actual failure
  is unlikely and providing callers handle negative dentries graceful
  they will fail-safe.

So this patch removes the lookup code in nfsd and smb/server and adjusts
them to fail safe if a negative dentry is provided:
- cache-files already fails safe by restarting the task from the
  top - it still does with this change, though it no longer calls
  cachefiles_put_directory() as that will crash if the dentry is
  negative.
- nfsd reports "Server-fault" which it what it used to do if the lookup
  failed. This will never happen on any file-systems that it can actually
  export, so this is of no consequence.  I removed the fh_update()
  call as that is not needed and out-of-place.  A subsequent
  nfsd_create_setattr() call will call fh_update() when needed.
- smb/server only wants the inode to call ksmbd_smb_inherit_owner()
  which updates ->i_uid (without calling notify_change() or similar)
  which can be safely skipping on cifs (I hope).

If a different dentry is returned, the first one is put.  If necessary
the fact that it is new can be determined by comparing pointers.  A new
dentry will certainly have a new pointer (as the old is put after the
new is obtained).
Similarly if an error is returned (via ERR_PTR()) the original dentry is
put.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20250227013949.536172-7-neilb@suse.de
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
NeilBrown 2025-02-27 12:32:58 +11:00 committed by Christian Brauner
parent 8376583b84
commit c54b386969
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
13 changed files with 104 additions and 127 deletions

View File

@ -160,18 +160,17 @@ static int dev_mkdir(const char *name, umode_t mode)
{
struct dentry *dentry;
struct path path;
int err;
dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
err = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode);
if (!err)
dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode);
if (!IS_ERR(dentry))
/* mark as kernel-created inode */
d_inode(dentry)->i_private = &thread;
done_path_create(&path, dentry);
return err;
return PTR_ERR_OR_ZERO(dentry);
}
static int create_path(const char *nodepath)

View File

@ -128,18 +128,19 @@ retry:
ret = security_path_mkdir(&path, subdir, 0700);
if (ret < 0)
goto mkdir_error;
ret = cachefiles_inject_write_error();
if (ret == 0)
ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700);
if (ret < 0) {
subdir = ERR_PTR(cachefiles_inject_write_error());
if (!IS_ERR(subdir))
subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700);
ret = PTR_ERR(subdir);
if (IS_ERR(subdir)) {
trace_cachefiles_vfs_error(NULL, d_inode(dir), ret,
cachefiles_trace_mkdir_error);
goto mkdir_error;
}
trace_cachefiles_mkdir(dir, subdir);
if (unlikely(d_unhashed(subdir))) {
cachefiles_put_directory(subdir);
if (unlikely(d_unhashed(subdir) || d_is_negative(subdir))) {
dput(subdir);
goto retry;
}
ASSERT(d_backing_inode(subdir));
@ -195,7 +196,8 @@ mark_error:
mkdir_error:
inode_unlock(d_inode(dir));
dput(subdir);
if (!IS_ERR(subdir))
dput(subdir);
pr_err("mkdir %s failed with error %d\n", dirname, ret);
return ERR_PTR(ret);

View File

@ -511,10 +511,16 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct inode *lower_dir;
rc = lock_parent(dentry, &lower_dentry, &lower_dir);
if (!rc)
rc = vfs_mkdir(&nop_mnt_idmap, lower_dir,
lower_dentry, mode);
if (rc || d_really_is_negative(lower_dentry))
if (rc)
goto out;
lower_dentry = vfs_mkdir(&nop_mnt_idmap, lower_dir,
lower_dentry, mode);
rc = PTR_ERR(lower_dentry);
if (IS_ERR(lower_dentry))
goto out;
rc = 0;
if (d_unhashed(lower_dentry))
goto out;
rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
if (rc)

View File

@ -230,9 +230,12 @@ int __init init_mkdir(const char *pathname, umode_t mode)
return PTR_ERR(dentry);
mode = mode_strip_umask(d_inode(path.dentry), mode);
error = security_path_mkdir(&path, dentry, mode);
if (!error)
error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
if (!error) {
dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
dentry, mode);
if (IS_ERR(dentry))
error = PTR_ERR(dentry);
}
done_path_create(&path, dentry);
return error;
}

View File

@ -4128,7 +4128,8 @@ EXPORT_SYMBOL(kern_path_create);
void done_path_create(struct path *path, struct dentry *dentry)
{
dput(dentry);
if (!IS_ERR(dentry))
dput(dentry);
inode_unlock(path->dentry->d_inode);
mnt_drop_write(path->mnt);
path_put(path);
@ -4274,7 +4275,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
}
/**
* vfs_mkdir - create directory
* vfs_mkdir - create directory returning correct dentry if possible
* @idmap: idmap of the mount the inode was found from
* @dir: inode of the parent directory
* @dentry: dentry of the child directory
@ -4287,9 +4288,15 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
* care to map the inode according to @idmap before checking permissions.
* On non-idmapped mounts or if permission checking is to be performed on the
* raw inode simply pass @nop_mnt_idmap.
*
* In the event that the filesystem does not use the *@dentry but leaves it
* negative or unhashes it and possibly splices a different one returning it,
* the original dentry is dput() and the alternate is returned.
*
* In case of an error the dentry is dput() and an ERR_PTR() is returned.
*/
int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
int error;
unsigned max_links = dir->i_sb->s_max_links;
@ -4297,31 +4304,35 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
error = may_create(idmap, dir, dentry);
if (error)
return error;
goto err;
error = -EPERM;
if (!dir->i_op->mkdir)
return -EPERM;
goto err;
mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0);
error = security_inode_mkdir(dir, dentry, mode);
if (error)
return error;
goto err;
error = -EMLINK;
if (max_links && dir->i_nlink >= max_links)
return -EMLINK;
goto err;
de = dir->i_op->mkdir(idmap, dir, dentry, mode);
error = PTR_ERR(de);
if (IS_ERR(de))
return PTR_ERR(de);
goto err;
if (de) {
fsnotify_mkdir(dir, de);
/* Cannot return de yet */
dput(de);
} else {
fsnotify_mkdir(dir, dentry);
dput(dentry);
dentry = de;
}
fsnotify_mkdir(dir, dentry);
return dentry;
return 0;
err:
dput(dentry);
return ERR_PTR(error);
}
EXPORT_SYMBOL(vfs_mkdir);
@ -4341,8 +4352,10 @@ retry:
error = security_path_mkdir(&path, dentry,
mode_strip_umask(path.dentry->d_inode, mode));
if (!error) {
error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
dentry, mode);
if (IS_ERR(dentry))
error = PTR_ERR(dentry);
}
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {

View File

@ -233,9 +233,12 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
* as well be forgiving and just succeed silently.
*/
goto out_put;
status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
if (IS_ERR(dentry))
status = PTR_ERR(dentry);
out_put:
dput(dentry);
if (!status)
dput(dentry);
out_unlock:
inode_unlock(d_inode(dir));
if (status == 0) {

View File

@ -1461,7 +1461,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct inode *dirp;
struct iattr *iap = attrs->na_iattr;
__be32 err;
int host_err;
int host_err = 0;
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
@ -1488,25 +1488,15 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_check_ignore_resizing(iap);
break;
case S_IFDIR:
host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode);
if (!host_err && unlikely(d_unhashed(dchild))) {
struct dentry *d;
d = lookup_one_len(dchild->d_name.name,
dchild->d_parent,
dchild->d_name.len);
if (IS_ERR(d)) {
host_err = PTR_ERR(d);
break;
}
if (unlikely(d_is_negative(d))) {
dput(d);
err = nfserr_serverfault;
goto out;
}
dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode);
if (IS_ERR(dchild)) {
host_err = PTR_ERR(dchild);
} else if (d_is_negative(dchild)) {
err = nfserr_serverfault;
goto out;
} else if (unlikely(dchild != resfhp->fh_dentry)) {
dput(resfhp->fh_dentry);
resfhp->fh_dentry = dget(d);
dput(dchild);
dchild = d;
resfhp->fh_dentry = dget(dchild);
}
break;
case S_IFCHR:
@ -1527,7 +1517,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
out:
dput(dchild);
if (!IS_ERR(dchild))
dput(dchild);
return err;
out_nfserr:

View File

@ -138,37 +138,6 @@ kill_whiteout:
goto out;
}
int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir,
struct dentry **newdentry, umode_t mode)
{
int err;
struct dentry *d, *dentry = *newdentry;
err = ovl_do_mkdir(ofs, dir, dentry, mode);
if (err)
return err;
if (likely(!d_unhashed(dentry)))
return 0;
/*
* vfs_mkdir() may succeed and leave the dentry passed
* to it unhashed and negative. If that happens, try to
* lookup a new hashed and positive dentry.
*/
d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent,
dentry->d_name.len);
if (IS_ERR(d)) {
pr_warn("failed lookup after mkdir (%pd2, err=%i).\n",
dentry, err);
return PTR_ERR(d);
}
dput(dentry);
*newdentry = d;
return 0;
}
struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir,
struct dentry *newdentry, struct ovl_cattr *attr)
{
@ -191,7 +160,8 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir,
case S_IFDIR:
/* mkdir is special... */
err = ovl_mkdir_real(ofs, dir, &newdentry, attr->mode);
newdentry = ovl_do_mkdir(ofs, dir, newdentry, attr->mode);
err = PTR_ERR_OR_ZERO(newdentry);
break;
case S_IFCHR:
@ -219,7 +189,8 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir,
}
out:
if (err) {
dput(newdentry);
if (!IS_ERR(newdentry))
dput(newdentry);
return ERR_PTR(err);
}
return newdentry;

View File

@ -241,13 +241,14 @@ static inline int ovl_do_create(struct ovl_fs *ofs,
return err;
}
static inline int ovl_do_mkdir(struct ovl_fs *ofs,
struct inode *dir, struct dentry *dentry,
umode_t mode)
static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs,
struct inode *dir,
struct dentry *dentry,
umode_t mode)
{
int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode);
pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
return err;
dentry = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode);
pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(dentry));
return dentry;
}
static inline int ovl_do_mknod(struct ovl_fs *ofs,
@ -838,8 +839,6 @@ struct ovl_cattr {
#define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) })
int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir,
struct dentry **newdentry, umode_t mode);
struct dentry *ovl_create_real(struct ovl_fs *ofs,
struct inode *dir, struct dentry *newdentry,
struct ovl_cattr *attr);

View File

@ -327,9 +327,10 @@ retry:
goto retry;
}
err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode);
if (err)
goto out_dput;
work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode);
err = PTR_ERR(work);
if (IS_ERR(work))
goto out_err;
/* Weird filesystem returning with hashed negative (kernfs)? */
err = -EINVAL;

View File

@ -206,8 +206,8 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
{
struct mnt_idmap *idmap;
struct path path;
struct dentry *dentry;
int err;
struct dentry *dentry, *d;
int err = 0;
dentry = ksmbd_vfs_kern_path_create(work, name,
LOOKUP_NO_SYMLINKS | LOOKUP_DIRECTORY,
@ -222,27 +222,15 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
idmap = mnt_idmap(path.mnt);
mode |= S_IFDIR;
err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode);
if (!err && d_unhashed(dentry)) {
struct dentry *d;
d = dentry;
dentry = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode);
if (IS_ERR(dentry))
err = PTR_ERR(dentry);
else if (d_is_negative(dentry))
err = -ENOENT;
if (!err && dentry != d)
ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(dentry));
d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent,
dentry->d_name.len);
if (IS_ERR(d)) {
err = PTR_ERR(d);
goto out_err;
}
if (unlikely(d_is_negative(d))) {
dput(d);
err = -ENOENT;
goto out_err;
}
ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d));
dput(d);
}
out_err:
done_path_create(&path, dentry);
if (err)
pr_err("mkdir(%s): creation failed (err:%d)\n", name, err);

View File

@ -167,10 +167,11 @@ xrep_orphanage_create(
* directory to control access to a file we put in here.
*/
if (d_really_is_negative(orphanage_dentry)) {
error = vfs_mkdir(&nop_mnt_idmap, root_inode, orphanage_dentry,
0750);
if (error)
goto out_dput_orphanage;
orphanage_dentry = vfs_mkdir(&nop_mnt_idmap, root_inode,
orphanage_dentry, 0750);
error = PTR_ERR(orphanage_dentry);
if (IS_ERR(orphanage_dentry))
goto out_unlock_root;
}
/* Not a directory? Bail out. */

View File

@ -1981,8 +1981,8 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap,
*/
int vfs_create(struct mnt_idmap *, struct inode *,
struct dentry *, umode_t, bool);
int vfs_mkdir(struct mnt_idmap *, struct inode *,
struct dentry *, umode_t);
struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *,
struct dentry *, umode_t);
int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t, dev_t);
int vfs_symlink(struct mnt_idmap *, struct inode *,