bcachefs: Recovery no longer holds state_lock

state_lock guards against devices coming or leaving, changing state, or
the filesystem changing between ro <-> rw.

But it's not necessary for running recovery passes, and holding it
blocks asynchronous events that would cause us to go RO or kick out
devices.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-03-26 11:41:07 -04:00
parent c6c6a39109
commit 2dd202dbaf
8 changed files with 31 additions and 36 deletions

View File

@ -589,6 +589,8 @@ iter_err:
int bch2_alloc_read(struct bch_fs *c)
{
down_read(&c->state_lock);
struct btree_trans *trans = bch2_trans_get(c);
struct bch_dev *ca = NULL;
int ret;
@ -652,6 +654,7 @@ int bch2_alloc_read(struct bch_fs *c)
bch2_dev_put(ca);
bch2_trans_put(trans);
up_read(&c->state_lock);
bch_err_fn(c, ret);
return ret;
}

View File

@ -1023,7 +1023,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
* Can't allow devices to come/go/resize while we have bucket bitmaps
* allocated
*/
lockdep_assert_held(&c->state_lock);
down_read(&c->state_lock);
for_each_member_device(c, ca) {
BUG_ON(ca->bucket_backpointer_mismatches);
@ -1108,6 +1108,7 @@ err_free_bitmaps:
ca->bucket_backpointer_mismatches = NULL;
}
up_read(&c->state_lock);
bch_err_fn(c, ret);
return ret;
}

View File

@ -1021,8 +1021,7 @@ int bch2_check_allocations(struct bch_fs *c)
{
int ret;
lockdep_assert_held(&c->state_lock);
down_read(&c->state_lock);
down_write(&c->gc_lock);
bch2_btree_interior_updates_flush(c);
@ -1060,6 +1059,7 @@ out:
percpu_up_write(&c->mark_lock);
up_write(&c->gc_lock);
up_read(&c->state_lock);
/*
* At startup, allocations can happen directly instead of via the

View File

@ -5,6 +5,8 @@
#define BCH_ERRCODES() \
x(ERANGE, ERANGE_option_too_small) \
x(ERANGE, ERANGE_option_too_big) \
x(EINVAL, injected) \
x(BCH_ERR_injected, injected_fs_start) \
x(EINVAL, mount_option) \
x(BCH_ERR_mount_option, option_name) \
x(BCH_ERR_mount_option, option_value) \

View File

@ -482,14 +482,12 @@ void bch2_opts_to_text(struct printbuf *out,
int bch2_opt_check_may_set(struct bch_fs *c, struct bch_dev *ca, int id, u64 v)
{
lockdep_assert_held(&c->state_lock);
int ret = 0;
switch (id) {
case Opt_state:
if (ca)
return __bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED);
return bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED);
break;
case Opt_compression:

View File

@ -234,28 +234,22 @@ static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
int bch2_run_online_recovery_passes(struct bch_fs *c)
{
int ret = 0;
down_read(&c->state_lock);
for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
struct recovery_pass_fn *p = recovery_pass_fns + i;
if (!(p->when & PASS_ONLINE))
continue;
ret = bch2_run_recovery_pass(c, i);
int ret = bch2_run_recovery_pass(c, i);
if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
i = c->curr_recovery_pass;
continue;
}
if (ret)
break;
return ret;
}
up_read(&c->state_lock);
return ret;
return 0;
}
int bch2_run_recovery_passes(struct bch_fs *c)

View File

@ -533,9 +533,11 @@ int bch2_fs_read_write(struct bch_fs *c)
int bch2_fs_read_write_early(struct bch_fs *c)
{
lockdep_assert_held(&c->state_lock);
down_write(&c->state_lock);
int ret = __bch2_fs_read_write(c, true);
up_write(&c->state_lock);
return __bch2_fs_read_write(c, true);
return ret;
}
/* Filesystem startup/shutdown: */
@ -1019,38 +1021,39 @@ static void print_mount_opts(struct bch_fs *c)
int bch2_fs_start(struct bch_fs *c)
{
time64_t now = ktime_get_real_seconds();
int ret;
int ret = 0;
print_mount_opts(c);
down_write(&c->state_lock);
mutex_lock(&c->sb_lock);
BUG_ON(test_bit(BCH_FS_started, &c->flags));
mutex_lock(&c->sb_lock);
if (!bch2_sb_field_get_minsize(&c->disk_sb, ext,
sizeof(struct bch_sb_field_ext) / sizeof(u64))) {
mutex_unlock(&c->sb_lock);
up_write(&c->state_lock);
ret = -BCH_ERR_ENOSPC_sb;
goto err;
}
ret = bch2_sb_members_v2_init(c);
if (ret) {
mutex_unlock(&c->sb_lock);
up_write(&c->state_lock);
goto err;
}
for_each_online_member(c, ca)
bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now);
struct bch_sb_field_ext *ext =
bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
mutex_unlock(&c->sb_lock);
if (!ext) {
bch_err(c, "insufficient space in superblock for sb_field_ext");
ret = -BCH_ERR_ENOSPC_sb;
goto err;
}
for_each_rw_member(c, ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
up_write(&c->state_lock);
c->recovery_task = current;
ret = BCH_SB_INITIALIZED(c->disk_sb.sb)
@ -1066,31 +1069,28 @@ int bch2_fs_start(struct bch_fs *c)
goto err;
if (bch2_fs_init_fault("fs_start")) {
bch_err(c, "fs_start fault injected");
ret = -EINVAL;
ret = -BCH_ERR_injected_fs_start;
goto err;
}
set_bit(BCH_FS_started, &c->flags);
wake_up(&c->ro_ref_wait);
down_write(&c->state_lock);
if (c->opts.read_only) {
bch2_fs_read_only(c);
} else {
ret = !test_bit(BCH_FS_rw, &c->flags)
? bch2_fs_read_write(c)
: bch2_fs_read_write_late(c);
if (ret)
goto err;
}
up_write(&c->state_lock);
ret = 0;
err:
if (ret)
bch_err_msg(c, ret, "starting filesystem");
else
bch_verbose(c, "done starting filesystem");
up_write(&c->state_lock);
return ret;
}

View File

@ -631,8 +631,6 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
return -EROFS;
down_write(&c->state_lock);
char *tmp = kstrdup(buf, GFP_KERNEL);
if (!tmp) {
ret = -ENOMEM;
@ -675,7 +673,6 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
ret = size;
err:
up_write(&c->state_lock);
bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
return ret;
}