mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/
synced 2025-04-19 20:58:31 +09:00
block: introduce a dedicated lock for protecting queue elevator updates
A queue's elevator can be updated either when modifying nr_hw_queues or through the sysfs scheduler attribute. Currently, elevator switching/ updating is protected using q->sysfs_lock, but this has led to lockdep splats[1] due to inconsistent lock ordering between q->sysfs_lock and the freeze-lock in multiple block layer call sites. As the scope of q->sysfs_lock is not well-defined, its (mis)use has resulted in numerous lockdep warnings. To address this, introduce a new q->elevator_lock, dedicated specifically for protecting elevator switches/updates. And we'd now use this new q->elevator_lock instead of q->sysfs_lock for protecting elevator switches/updates. While at it, make elv_iosched_load_module() a static function, as it is only called from elv_iosched_store(). Also, remove redundant parameters from elv_iosched_load_module() function signature. [1] https://lore.kernel.org/all/67637e70.050a0220.3157ee.000c.GAE@google.com/ Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Hannes Reinecke <hare@suse.de> Reviewed-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Nilay Shroff <nilay@linux.ibm.com> Link: https://lore.kernel.org/r/20250304102551.2533767-5-nilay@linux.ibm.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
d23977fee1
commit
1bf70d08cc
@ -429,6 +429,7 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
|
||||
|
||||
refcount_set(&q->refs, 1);
|
||||
mutex_init(&q->debugfs_mutex);
|
||||
mutex_init(&q->elevator_lock);
|
||||
mutex_init(&q->sysfs_lock);
|
||||
mutex_init(&q->limits_lock);
|
||||
mutex_init(&q->rq_qos_mutex);
|
||||
|
@ -4467,7 +4467,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
||||
unsigned long i, j;
|
||||
|
||||
/* protect against switching io scheduler */
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
mutex_lock(&q->elevator_lock);
|
||||
for (i = 0; i < set->nr_hw_queues; i++) {
|
||||
int old_node;
|
||||
int node = blk_mq_get_hctx_node(set, i);
|
||||
@ -4500,7 +4500,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
||||
|
||||
xa_for_each_start(&q->hctx_table, j, hctx, j)
|
||||
blk_mq_exit_hctx(q, set, hctx, j);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
/* unregister cpuhp callbacks for exited hctxs */
|
||||
blk_mq_remove_hw_queues_cpuhp(q);
|
||||
@ -4933,10 +4933,9 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
|
||||
if (!qe)
|
||||
return false;
|
||||
|
||||
/* q->elevator needs protection from ->sysfs_lock */
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
/* Accessing q->elevator needs protection from ->elevator_lock. */
|
||||
mutex_lock(&q->elevator_lock);
|
||||
|
||||
/* the check has to be done with holding sysfs_lock */
|
||||
if (!q->elevator) {
|
||||
kfree(qe);
|
||||
goto unlock;
|
||||
@ -4950,7 +4949,7 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
|
||||
list_add(&qe->node, head);
|
||||
elevator_disable(q);
|
||||
unlock:
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -4980,11 +4979,11 @@ static void blk_mq_elv_switch_back(struct list_head *head,
|
||||
list_del(&qe->node);
|
||||
kfree(qe);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
mutex_lock(&q->elevator_lock);
|
||||
elevator_switch(q, t);
|
||||
/* drop the reference acquired in blk_mq_elv_switch_none */
|
||||
elevator_put(t);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
}
|
||||
|
||||
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||
|
@ -693,10 +693,15 @@ static struct attribute *blk_mq_queue_attrs[] = {
|
||||
* Attributes which are protected with q->sysfs_lock.
|
||||
*/
|
||||
&queue_requests_entry.attr,
|
||||
&elv_iosched_entry.attr,
|
||||
#ifdef CONFIG_BLK_WBT
|
||||
&queue_wb_lat_entry.attr,
|
||||
#endif
|
||||
/*
|
||||
* Attributes which require some form of locking other than
|
||||
* q->sysfs_lock.
|
||||
*/
|
||||
&elv_iosched_entry.attr,
|
||||
|
||||
/*
|
||||
* Attributes which don't require locking.
|
||||
*/
|
||||
@ -865,15 +870,19 @@ int blk_register_queue(struct gendisk *disk)
|
||||
if (ret)
|
||||
goto out_debugfs_remove;
|
||||
|
||||
if (q->elevator) {
|
||||
ret = elv_register_queue(q, false);
|
||||
if (ret)
|
||||
goto out_unregister_ia_ranges;
|
||||
}
|
||||
|
||||
ret = blk_crypto_sysfs_register(disk);
|
||||
if (ret)
|
||||
goto out_elv_unregister;
|
||||
goto out_unregister_ia_ranges;
|
||||
|
||||
mutex_lock(&q->elevator_lock);
|
||||
if (q->elevator) {
|
||||
ret = elv_register_queue(q, false);
|
||||
if (ret) {
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
goto out_crypto_sysfs_unregister;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
|
||||
wbt_enable_default(disk);
|
||||
@ -898,8 +907,8 @@ int blk_register_queue(struct gendisk *disk)
|
||||
|
||||
return ret;
|
||||
|
||||
out_elv_unregister:
|
||||
elv_unregister_queue(q);
|
||||
out_crypto_sysfs_unregister:
|
||||
blk_crypto_sysfs_unregister(disk);
|
||||
out_unregister_ia_ranges:
|
||||
disk_unregister_independent_access_ranges(disk);
|
||||
out_debugfs_remove:
|
||||
@ -945,8 +954,11 @@ void blk_unregister_queue(struct gendisk *disk)
|
||||
blk_mq_sysfs_unregister(disk);
|
||||
blk_crypto_sysfs_unregister(disk);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
mutex_lock(&q->elevator_lock);
|
||||
elv_unregister_queue(q);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
disk_unregister_independent_access_ranges(disk);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
|
@ -457,7 +457,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
|
||||
struct elevator_queue *e = q->elevator;
|
||||
int error;
|
||||
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
lockdep_assert_held(&q->elevator_lock);
|
||||
|
||||
error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
|
||||
if (!error) {
|
||||
@ -481,7 +481,7 @@ void elv_unregister_queue(struct request_queue *q)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
lockdep_assert_held(&q->elevator_lock);
|
||||
|
||||
if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
|
||||
kobject_uevent(&e->kobj, KOBJ_REMOVE);
|
||||
@ -618,7 +618,7 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
|
||||
unsigned int memflags;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
lockdep_assert_held(&q->elevator_lock);
|
||||
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
@ -655,7 +655,7 @@ void elevator_disable(struct request_queue *q)
|
||||
{
|
||||
unsigned int memflags;
|
||||
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
lockdep_assert_held(&q->elevator_lock);
|
||||
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
@ -700,28 +700,23 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void elv_iosched_load_module(struct gendisk *disk, const char *buf,
|
||||
size_t count)
|
||||
static void elv_iosched_load_module(char *elevator_name)
|
||||
{
|
||||
char elevator_name[ELV_NAME_MAX];
|
||||
struct elevator_type *found;
|
||||
const char *name;
|
||||
|
||||
strscpy(elevator_name, buf, sizeof(elevator_name));
|
||||
name = strstrip(elevator_name);
|
||||
|
||||
spin_lock(&elv_list_lock);
|
||||
found = __elevator_find(name);
|
||||
found = __elevator_find(elevator_name);
|
||||
spin_unlock(&elv_list_lock);
|
||||
|
||||
if (!found)
|
||||
request_module("%s-iosched", name);
|
||||
request_module("%s-iosched", elevator_name);
|
||||
}
|
||||
|
||||
ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
char elevator_name[ELV_NAME_MAX];
|
||||
char *name;
|
||||
int ret;
|
||||
unsigned int memflags;
|
||||
struct request_queue *q = disk->queue;
|
||||
@ -731,16 +726,18 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
|
||||
* queue to ensure that the module file can be read when the request
|
||||
* queue is the one for the device storing the module file.
|
||||
*/
|
||||
elv_iosched_load_module(disk, buf, count);
|
||||
strscpy(elevator_name, buf, sizeof(elevator_name));
|
||||
name = strstrip(elevator_name);
|
||||
|
||||
elv_iosched_load_module(name);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
ret = elevator_change(q, strstrip(elevator_name));
|
||||
mutex_lock(&q->elevator_lock);
|
||||
ret = elevator_change(q, name);
|
||||
if (!ret)
|
||||
ret = count;
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
blk_mq_unfreeze_queue(q, memflags);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -751,7 +748,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
|
||||
struct elevator_type *cur = NULL, *e;
|
||||
int len = 0;
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
mutex_lock(&q->elevator_lock);
|
||||
if (!q->elevator) {
|
||||
len += sprintf(name+len, "[none] ");
|
||||
} else {
|
||||
@ -769,7 +766,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
|
||||
spin_unlock(&elv_list_lock);
|
||||
|
||||
len += sprintf(name+len, "\n");
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
@ -148,8 +148,6 @@ extern void elv_unregister(struct elevator_type *);
|
||||
* io scheduler sysfs switching
|
||||
*/
|
||||
ssize_t elv_iosched_show(struct gendisk *disk, char *page);
|
||||
void elv_iosched_load_module(struct gendisk *disk, const char *page,
|
||||
size_t count);
|
||||
ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count);
|
||||
|
||||
extern bool elv_bio_merge_ok(struct request *, struct bio *);
|
||||
|
@ -565,8 +565,11 @@ out_free_ext_minor:
|
||||
if (disk->major == BLOCK_EXT_MAJOR)
|
||||
blk_free_ext_minor(disk->first_minor);
|
||||
out_exit_elevator:
|
||||
if (disk->queue->elevator)
|
||||
if (disk->queue->elevator) {
|
||||
mutex_lock(&disk->queue->elevator_lock);
|
||||
elevator_exit(disk->queue);
|
||||
mutex_unlock(&disk->queue->elevator_lock);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(add_disk_fwnode);
|
||||
@ -742,9 +745,9 @@ void del_gendisk(struct gendisk *disk)
|
||||
|
||||
blk_mq_quiesce_queue(q);
|
||||
if (q->elevator) {
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
mutex_lock(&q->elevator_lock);
|
||||
elevator_exit(q);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->elevator_lock);
|
||||
}
|
||||
rq_qos_exit(q);
|
||||
blk_mq_unquiesce_queue(q);
|
||||
|
@ -560,6 +560,14 @@ struct request_queue {
|
||||
struct blk_flush_queue *fq;
|
||||
struct list_head flush_list;
|
||||
|
||||
/*
|
||||
* Protects against I/O scheduler switching, specifically when
|
||||
* updating q->elevator. To ensure proper locking order during
|
||||
* an elevator update, first freeze the queue, then acquire
|
||||
* ->elevator_lock.
|
||||
*/
|
||||
struct mutex elevator_lock;
|
||||
|
||||
struct mutex sysfs_lock;
|
||||
struct mutex limits_lock;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user