block-6.15-20250417

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmgBYN8QHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgprhiD/9LnoTEqPdDGaE/f/1yUgYcJL8IUSMTfpyB
 JajQp5klWNHmIyD/GdzFq+6SL/XDAllO/NgVdQlI+78s5GRn7A/fy3unmB3kYhs/
 Spz9reD7/wH6lp2/u5jKD0Dk3Wz9LCGAUxQ2QYtd9lXJo/Dem3roBpty6/GYvLQy
 3kSwa3e4dekd9jBZ+lbnSaFcvQg3Xc/x+SoP3r60wrMIEOyJrHLHWhLMolC/ZkGw
 sl1nvj4dnRAK77G7KPctYIu6K7ryJwQLJhBre7t5Fd4Dzn46l/sNwOkBn7hhdaTR
 e3+F7C1D22zIHFrknkm1+9KkZA/9tIz1CUYRlYCxGPsH1XP4dy78uTk7sgGORV9C
 0gFJ3nqzSu0LP3Mk06e2DH+Oqq0wtdnggxmAXjJhah9JFrP7H9bEi4lTEsJ6XjLV
 PCL4PYGEkrJp7faD0p2icq6GKwx/EINlCob6Cx0h+lNo/Crz0FjkPNZkLTiYLahc
 S8Wlc6xMiMtRxdH3LX8ptAGot2s3uTQiNIKmkPkzIiSDoUoZbao1oknm8tpmXa1x
 Wg6bmOj5Jbd1K+Gyu24rIxW7RVkXtfB63o5ScRu+YGXhulsnV2mCPXZ2qxlW3s51
 zZcHUNQPAgmBdf/qzkNbk4fPS2G1rC6eJOLn84B4E5PWbP0xFjv6FdEwPF/ovdb8
 aIyR3vSjyA==
 =YCi8
 -----END PGP SIGNATURE-----

Merge tag 'block-6.15-20250417' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - MD pull via Yu:
      - fix raid10 missing discard IO accounting (Yu Kuai)
      - fix bitmap stats for bitmap file (Zheng Qixing)
      - fix oops while reading all member disks failed during
        check/repair (Meir Elisha)

 - NVMe pull via Christoph:
      - fix scan failure for non-ANA multipath controllers (Hannes
        Reinecke)
      - fix multipath sysfs links creation for some cases (Hannes
        Reinecke)
      - PCIe endpoint fixes (Damien Le Moal)
      - use NULL instead of 0 in the auth code (Damien Le Moal)

 - Various ublk fixes:
      - Slew of selftest additions
      - Improvements and fixes for IO cancelation
      - Tweak to Kconfig verbiage

 - Fix for page dirtying for blk integrity mapped pages

 - loop fixes:
      - buffered IO fix
      - uevent fixes
      - request priority inheritance fix

 - Various little fixes

* tag 'block-6.15-20250417' of git://git.kernel.dk/linux: (38 commits)
  selftests: ublk: add generic_06 for covering fault inject
  ublk: simplify aborting ublk request
  ublk: remove __ublk_quiesce_dev()
  ublk: improve detection and handling of ublk server exit
  ublk: move device reset into ublk_ch_release()
  ublk: rely on ->canceling for dealing with ublk_nosrv_dev_should_queue_io
  ublk: add ublk_force_abort_dev()
  ublk: properly serialize all FETCH_REQs
  selftests: ublk: move creating UBLK_TMP into _prep_test()
  selftests: ublk: add test_stress_05.sh
  selftests: ublk: support user recovery
  selftests: ublk: support target specific command line
  selftests: ublk: increase max nr_queues and queue depth
  selftests: ublk: set queue pthread's cpu affinity
  selftests: ublk: setup ring with IORING_SETUP_SINGLE_ISSUER/IORING_SETUP_DEFER_TASKRUN
  selftests: ublk: add two stress tests for zero copy feature
  selftests: ublk: run stress tests in parallel
  selftests: ublk: make sure _add_ublk_dev can return in sub-shell
  selftests: ublk: cleanup backfile automatically
  selftests: ublk: add io_uring uapi header
  ...
This commit is contained in:
Linus Torvalds 2025-04-18 09:21:14 -07:00
commit f7c2ca2584
36 changed files with 1326 additions and 605 deletions

View File

@ -66,16 +66,12 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
}
EXPORT_SYMBOL(bio_integrity_alloc);
static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs,
bool dirty)
static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs)
{
int i;
for (i = 0; i < nr_vecs; i++) {
if (dirty && !PageCompound(bv[i].bv_page))
set_page_dirty_lock(bv[i].bv_page);
for (i = 0; i < nr_vecs; i++)
unpin_user_page(bv[i].bv_page);
}
}
static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
@ -91,7 +87,7 @@ static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
ret = copy_to_iter(bvec_virt(bounce_bvec), bytes, &orig_iter);
WARN_ON_ONCE(ret != bytes);
bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs, true);
bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs);
}
/**
@ -111,8 +107,7 @@ void bio_integrity_unmap_user(struct bio *bio)
return;
}
bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt,
bio_data_dir(bio) == READ);
bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt);
}
/**
@ -198,7 +193,7 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
}
if (write)
bio_integrity_unpin_bvec(bvec, nr_vecs, false);
bio_integrity_unpin_bvec(bvec, nr_vecs);
else
memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec));
@ -319,7 +314,7 @@ int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter)
return 0;
release_pages:
bio_integrity_unpin_bvec(bvec, nr_bvecs, false);
bio_integrity_unpin_bvec(bvec, nr_bvecs);
free_bvec:
if (bvec != stack_vec)
kfree(bvec);

View File

@ -909,6 +909,8 @@ out_unregister_ia_ranges:
out_debugfs_remove:
blk_debugfs_remove(disk);
mutex_unlock(&q->sysfs_lock);
if (queue_is_mq(q))
blk_mq_sysfs_unregister(disk);
out_put_queue_kobj:
kobject_put(&disk->queue_kobj);
return ret;

View File

@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef BLK_THROTTLE_H
#define BLK_THROTTLE_H

View File

@ -388,12 +388,6 @@ config BLK_DEV_UBLK
definition isn't finalized yet, and might change according to future
requirement, so mark is as experimental now.
Say Y if you want to get better performance because task_work_add()
can be used in IO path for replacing io_uring cmd, which will become
shared between IO tasks and ubq daemon, meantime task_work_add() can
can handle batch more effectively, but task_work_add() isn't exported
for module, so ublk has to be built to kernel.
config BLKDEV_UBLK_LEGACY_OPCODES
bool "Support legacy command opcode"
depends on BLK_DEV_UBLK

View File

@ -211,72 +211,6 @@ static void loop_set_size(struct loop_device *lo, loff_t size)
kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
}
static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
{
struct iov_iter i;
ssize_t bw;
iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len);
bw = vfs_iter_write(file, &i, ppos, 0);
if (likely(bw == bvec->bv_len))
return 0;
printk_ratelimited(KERN_ERR
"loop: Write error at byte offset %llu, length %i.\n",
(unsigned long long)*ppos, bvec->bv_len);
if (bw >= 0)
bw = -EIO;
return bw;
}
static int lo_write_simple(struct loop_device *lo, struct request *rq,
loff_t pos)
{
struct bio_vec bvec;
struct req_iterator iter;
int ret = 0;
rq_for_each_segment(bvec, rq, iter) {
ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos);
if (ret < 0)
break;
cond_resched();
}
return ret;
}
static int lo_read_simple(struct loop_device *lo, struct request *rq,
loff_t pos)
{
struct bio_vec bvec;
struct req_iterator iter;
struct iov_iter i;
ssize_t len;
rq_for_each_segment(bvec, rq, iter) {
iov_iter_bvec(&i, ITER_DEST, &bvec, 1, bvec.bv_len);
len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
if (len < 0)
return len;
flush_dcache_page(bvec.bv_page);
if (len != bvec.bv_len) {
struct bio *bio;
__rq_for_each_bio(bio, rq)
zero_fill_bio(bio);
break;
}
cond_resched();
}
return 0;
}
static void loop_clear_limits(struct loop_device *lo, int mode)
{
struct queue_limits lim = queue_limits_start_update(lo->lo_queue);
@ -342,7 +276,7 @@ static void lo_complete_rq(struct request *rq)
struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
blk_status_t ret = BLK_STS_OK;
if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) ||
if (cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) ||
req_op(rq) != REQ_OP_READ) {
if (cmd->ret < 0)
ret = errno_to_blk_status(cmd->ret);
@ -358,14 +292,13 @@ static void lo_complete_rq(struct request *rq)
cmd->ret = 0;
blk_mq_requeue_request(rq, true);
} else {
if (cmd->use_aio) {
struct bio *bio = rq->bio;
struct bio *bio = rq->bio;
while (bio) {
zero_fill_bio(bio);
bio = bio->bi_next;
}
while (bio) {
zero_fill_bio(bio);
bio = bio->bi_next;
}
ret = BLK_STS_IOERR;
end_io:
blk_mq_end_request(rq, ret);
@ -445,9 +378,14 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
cmd->iocb.ki_pos = pos;
cmd->iocb.ki_filp = file;
cmd->iocb.ki_complete = lo_rw_aio_complete;
cmd->iocb.ki_flags = IOCB_DIRECT;
cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
cmd->iocb.ki_ioprio = req_get_ioprio(rq);
if (cmd->use_aio) {
cmd->iocb.ki_complete = lo_rw_aio_complete;
cmd->iocb.ki_flags = IOCB_DIRECT;
} else {
cmd->iocb.ki_complete = NULL;
cmd->iocb.ki_flags = 0;
}
if (rw == ITER_SOURCE)
ret = file->f_op->write_iter(&cmd->iocb, &iter);
@ -458,7 +396,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
if (ret != -EIOCBQUEUED)
lo_rw_aio_complete(&cmd->iocb, ret);
return 0;
return -EIOCBQUEUED;
}
static int do_req_filebacked(struct loop_device *lo, struct request *rq)
@ -466,15 +404,6 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset;
/*
* lo_write_simple and lo_read_simple should have been covered
* by io submit style function like lo_rw_aio(), one blocker
* is that lo_read_simple() need to call flush_dcache_page after
* the page is written from kernel, and it isn't easy to handle
* this in io submit style function which submits all segments
* of the req at one time. And direct read IO doesn't need to
* run flush_dcache_page().
*/
switch (req_op(rq)) {
case REQ_OP_FLUSH:
return lo_req_flush(lo, rq);
@ -490,15 +419,9 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
case REQ_OP_DISCARD:
return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE);
case REQ_OP_WRITE:
if (cmd->use_aio)
return lo_rw_aio(lo, cmd, pos, ITER_SOURCE);
else
return lo_write_simple(lo, rq, pos);
return lo_rw_aio(lo, cmd, pos, ITER_SOURCE);
case REQ_OP_READ:
if (cmd->use_aio)
return lo_rw_aio(lo, cmd, pos, ITER_DEST);
else
return lo_read_simple(lo, rq, pos);
return lo_rw_aio(lo, cmd, pos, ITER_DEST);
default:
WARN_ON_ONCE(1);
return -EIO;
@ -662,19 +585,20 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
* dependency.
*/
fput(old_file);
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
if (partscan)
loop_reread_partitions(lo);
error = 0;
done:
/* enable and uncork uevent now that we are done */
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
return error;
out_err:
loop_global_unlock(lo, is_loop);
out_putf:
fput(file);
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
goto done;
}
@ -1129,8 +1053,8 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
if (partscan)
clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
/* enable and uncork uevent now that we are done */
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
loop_global_unlock(lo, is_loop);
if (partscan)
@ -1921,7 +1845,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
struct loop_device *lo = rq->q->queuedata;
int ret = 0;
struct mem_cgroup *old_memcg = NULL;
const bool use_aio = cmd->use_aio;
if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
ret = -EIO;
@ -1951,7 +1874,7 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
}
failed:
/* complete non-aio request */
if (!use_aio || ret) {
if (ret != -EIOCBQUEUED) {
if (ret == -EOPNOTSUPP)
cmd->ret = ret;
else

View File

@ -122,15 +122,6 @@ struct ublk_uring_cmd_pdu {
*/
#define UBLK_IO_FLAG_OWNED_BY_SRV 0x02
/*
* IO command is aborted, so this flag is set in case of
* !UBLK_IO_FLAG_ACTIVE.
*
* After this flag is observed, any pending or new incoming request
* associated with this io command will be failed immediately
*/
#define UBLK_IO_FLAG_ABORTED 0x04
/*
* UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires
* get data buffer address from ublksrv.
@ -199,8 +190,6 @@ struct ublk_device {
struct completion completion;
unsigned int nr_queues_ready;
unsigned int nr_privileged_daemon;
struct work_struct nosrv_work;
};
/* header of ublk_params */
@ -209,8 +198,8 @@ struct ublk_params_header {
__u32 types;
};
static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq);
static void ublk_stop_dev_unlocked(struct ublk_device *ub);
static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq);
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
struct ublk_queue *ubq, int tag, size_t offset);
static inline unsigned int ublk_req_build_flags(struct request *req);
@ -1074,7 +1063,7 @@ static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu(
static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq)
{
return ubq->ubq_daemon->flags & PF_EXITING;
return !ubq->ubq_daemon || ubq->ubq_daemon->flags & PF_EXITING;
}
/* todo: handle partial completion */
@ -1085,12 +1074,6 @@ static inline void __ublk_complete_rq(struct request *req)
unsigned int unmapped_bytes;
blk_status_t res = BLK_STS_OK;
/* called from ublk_abort_queue() code path */
if (io->flags & UBLK_IO_FLAG_ABORTED) {
res = BLK_STS_IOERR;
goto exit;
}
/* failed read IO if nothing is read */
if (!io->res && req_op(req) == REQ_OP_READ)
io->res = -EIO;
@ -1140,47 +1123,6 @@ static void ublk_complete_rq(struct kref *ref)
__ublk_complete_rq(req);
}
static void ublk_do_fail_rq(struct request *req)
{
struct ublk_queue *ubq = req->mq_hctx->driver_data;
if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
blk_mq_requeue_request(req, false);
else
__ublk_complete_rq(req);
}
static void ublk_fail_rq_fn(struct kref *ref)
{
struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data,
ref);
struct request *req = blk_mq_rq_from_pdu(data);
ublk_do_fail_rq(req);
}
/*
* Since ublk_rq_task_work_cb always fails requests immediately during
* exiting, __ublk_fail_req() is only called from abort context during
* exiting. So lock is unnecessary.
*
* Also aborting may not be started yet, keep in mind that one failed
* request may be issued by block layer again.
*/
static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
struct request *req)
{
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
if (ublk_need_req_ref(ubq)) {
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
kref_put(&data->ref, ublk_fail_rq_fn);
} else {
ublk_do_fail_rq(req);
}
}
static void ubq_complete_io_cmd(struct ublk_io *io, int res,
unsigned issue_flags)
{
@ -1336,8 +1278,6 @@ static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l)
static enum blk_eh_timer_return ublk_timeout(struct request *rq)
{
struct ublk_queue *ubq = rq->mq_hctx->driver_data;
unsigned int nr_inflight = 0;
int i;
if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) {
if (!ubq->timeout) {
@ -1348,26 +1288,6 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq)
return BLK_EH_DONE;
}
if (!ubq_daemon_is_dying(ubq))
return BLK_EH_RESET_TIMER;
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
if (!(io->flags & UBLK_IO_FLAG_ACTIVE))
nr_inflight++;
}
/* cancelable uring_cmd can't help us if all commands are in-flight */
if (nr_inflight == ubq->q_depth) {
struct ublk_device *ub = ubq->dev;
if (ublk_abort_requests(ub, ubq)) {
schedule_work(&ub->nosrv_work);
}
return BLK_EH_DONE;
}
return BLK_EH_RESET_TIMER;
}
@ -1470,6 +1390,37 @@ static const struct blk_mq_ops ublk_mq_ops = {
.timeout = ublk_timeout,
};
static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
{
int i;
/* All old ioucmds have to be completed */
ubq->nr_io_ready = 0;
/*
* old daemon is PF_EXITING, put it now
*
* It could be NULL in case of closing one quisced device.
*/
if (ubq->ubq_daemon)
put_task_struct(ubq->ubq_daemon);
/* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
ubq->ubq_daemon = NULL;
ubq->timeout = false;
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
/*
* UBLK_IO_FLAG_CANCELED is kept for avoiding to touch
* io->cmd
*/
io->flags &= UBLK_IO_FLAG_CANCELED;
io->cmd = NULL;
io->addr = 0;
}
}
static int ublk_ch_open(struct inode *inode, struct file *filp)
{
struct ublk_device *ub = container_of(inode->i_cdev,
@ -1481,10 +1432,119 @@ static int ublk_ch_open(struct inode *inode, struct file *filp)
return 0;
}
static void ublk_reset_ch_dev(struct ublk_device *ub)
{
int i;
for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
ublk_queue_reinit(ub, ublk_get_queue(ub, i));
/* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */
ub->mm = NULL;
ub->nr_queues_ready = 0;
ub->nr_privileged_daemon = 0;
}
static struct gendisk *ublk_get_disk(struct ublk_device *ub)
{
struct gendisk *disk;
spin_lock(&ub->lock);
disk = ub->ub_disk;
if (disk)
get_device(disk_to_dev(disk));
spin_unlock(&ub->lock);
return disk;
}
static void ublk_put_disk(struct gendisk *disk)
{
if (disk)
put_device(disk_to_dev(disk));
}
static int ublk_ch_release(struct inode *inode, struct file *filp)
{
struct ublk_device *ub = filp->private_data;
struct gendisk *disk;
int i;
/*
* disk isn't attached yet, either device isn't live, or it has
* been removed already, so we needn't to do anything
*/
disk = ublk_get_disk(ub);
if (!disk)
goto out;
/*
* All uring_cmd are done now, so abort any request outstanding to
* the ublk server
*
* This can be done in lockless way because ublk server has been
* gone
*
* More importantly, we have to provide forward progress guarantee
* without holding ub->mutex, otherwise control task grabbing
* ub->mutex triggers deadlock
*
* All requests may be inflight, so ->canceling may not be set, set
* it now.
*/
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
struct ublk_queue *ubq = ublk_get_queue(ub, i);
ubq->canceling = true;
ublk_abort_queue(ub, ubq);
}
blk_mq_kick_requeue_list(disk->queue);
/*
* All infligh requests have been completed or requeued and any new
* request will be failed or requeued via `->canceling` now, so it is
* fine to grab ub->mutex now.
*/
mutex_lock(&ub->mutex);
/* double check after grabbing lock */
if (!ub->ub_disk)
goto unlock;
/*
* Transition the device to the nosrv state. What exactly this
* means depends on the recovery flags
*/
blk_mq_quiesce_queue(disk->queue);
if (ublk_nosrv_should_stop_dev(ub)) {
/*
* Allow any pending/future I/O to pass through quickly
* with an error. This is needed because del_gendisk
* waits for all pending I/O to complete
*/
for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
ublk_get_queue(ub, i)->force_abort = true;
blk_mq_unquiesce_queue(disk->queue);
ublk_stop_dev_unlocked(ub);
} else {
if (ublk_nosrv_dev_should_queue_io(ub)) {
/* ->canceling is set and all requests are aborted */
ub->dev_info.state = UBLK_S_DEV_QUIESCED;
} else {
ub->dev_info.state = UBLK_S_DEV_FAIL_IO;
for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
ublk_get_queue(ub, i)->fail_io = true;
}
blk_mq_unquiesce_queue(disk->queue);
}
unlock:
mutex_unlock(&ub->mutex);
ublk_put_disk(disk);
/* all uring_cmd has been done now, reset device & ubq */
ublk_reset_ch_dev(ub);
out:
clear_bit(UB_STATE_OPEN, &ub->state);
return 0;
}
@ -1551,10 +1611,26 @@ static void ublk_commit_completion(struct ublk_device *ub,
ublk_put_req_ref(ubq, req);
}
static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
struct request *req)
{
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
blk_mq_requeue_request(req, false);
else {
io->res = -EIO;
__ublk_complete_rq(req);
}
}
/*
* Called from ubq_daemon context via cancel fn, meantime quiesce ublk
* blk-mq queue, so we are called exclusively with blk-mq and ubq_daemon
* context, so everything is serialized.
* Called from ublk char device release handler, when any uring_cmd is
* done, meantime request queue is "quiesced" since all inflight requests
* can't be completed because ublk server is dead.
*
* So no one can hold our request IO reference any more, simply ignore the
* reference, and complete the request immediately
*/
static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
{
@ -1571,46 +1647,29 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
* will do it
*/
rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
if (rq && blk_mq_request_started(rq)) {
io->flags |= UBLK_IO_FLAG_ABORTED;
if (rq && blk_mq_request_started(rq))
__ublk_fail_req(ubq, io, rq);
}
}
}
}
/* Must be called when queue is frozen */
static bool ublk_mark_queue_canceling(struct ublk_queue *ubq)
static void ublk_mark_queue_canceling(struct ublk_queue *ubq)
{
bool canceled;
spin_lock(&ubq->cancel_lock);
canceled = ubq->canceling;
if (!canceled)
if (!ubq->canceling)
ubq->canceling = true;
spin_unlock(&ubq->cancel_lock);
return canceled;
}
static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq)
static void ublk_start_cancel(struct ublk_queue *ubq)
{
bool was_canceled = ubq->canceling;
struct gendisk *disk;
if (was_canceled)
return false;
spin_lock(&ub->lock);
disk = ub->ub_disk;
if (disk)
get_device(disk_to_dev(disk));
spin_unlock(&ub->lock);
struct ublk_device *ub = ubq->dev;
struct gendisk *disk = ublk_get_disk(ub);
/* Our disk has been dead */
if (!disk)
return false;
return;
/*
* Now we are serialized with ublk_queue_rq()
*
@ -1619,15 +1678,9 @@ static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq)
* touch completed uring_cmd
*/
blk_mq_quiesce_queue(disk->queue);
was_canceled = ublk_mark_queue_canceling(ubq);
if (!was_canceled) {
/* abort queue is for making forward progress */
ublk_abort_queue(ub, ubq);
}
ublk_mark_queue_canceling(ubq);
blk_mq_unquiesce_queue(disk->queue);
put_device(disk_to_dev(disk));
return !was_canceled;
ublk_put_disk(disk);
}
static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io,
@ -1651,6 +1704,17 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io,
/*
* The ublk char device won't be closed when calling cancel fn, so both
* ublk device and queue are guaranteed to be live
*
* Two-stage cancel:
*
* - make every active uring_cmd done in ->cancel_fn()
*
* - aborting inflight ublk IO requests in ublk char device release handler,
* which depends on 1st stage because device can only be closed iff all
* uring_cmd are done
*
* Do _not_ try to acquire ub->mutex before all inflight requests are
* aborted, otherwise deadlock may be caused.
*/
static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
unsigned int issue_flags)
@ -1658,8 +1722,6 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
struct ublk_queue *ubq = pdu->ubq;
struct task_struct *task;
struct ublk_device *ub;
bool need_schedule;
struct ublk_io *io;
if (WARN_ON_ONCE(!ubq))
@ -1672,16 +1734,12 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
if (WARN_ON_ONCE(task && task != ubq->ubq_daemon))
return;
ub = ubq->dev;
need_schedule = ublk_abort_requests(ub, ubq);
if (!ubq->canceling)
ublk_start_cancel(ubq);
io = &ubq->ios[pdu->tag];
WARN_ON_ONCE(io->cmd != cmd);
ublk_cancel_cmd(ubq, io, issue_flags);
if (need_schedule) {
schedule_work(&ub->nosrv_work);
}
}
static inline bool ublk_queue_ready(struct ublk_queue *ubq)
@ -1732,33 +1790,20 @@ static void ublk_wait_tagset_rqs_idle(struct ublk_device *ub)
}
}
static void __ublk_quiesce_dev(struct ublk_device *ub)
static void ublk_force_abort_dev(struct ublk_device *ub)
{
pr_devel("%s: quiesce ub: dev_id %d state %s\n",
int i;
pr_devel("%s: force abort ub: dev_id %d state %s\n",
__func__, ub->dev_info.dev_id,
ub->dev_info.state == UBLK_S_DEV_LIVE ?
"LIVE" : "QUIESCED");
blk_mq_quiesce_queue(ub->ub_disk->queue);
ublk_wait_tagset_rqs_idle(ub);
ub->dev_info.state = UBLK_S_DEV_QUIESCED;
}
if (ub->dev_info.state == UBLK_S_DEV_LIVE)
ublk_wait_tagset_rqs_idle(ub);
static void ublk_unquiesce_dev(struct ublk_device *ub)
{
int i;
pr_devel("%s: unquiesce ub: dev_id %d state %s\n",
__func__, ub->dev_info.dev_id,
ub->dev_info.state == UBLK_S_DEV_LIVE ?
"LIVE" : "QUIESCED");
/* quiesce_work has run. We let requeued rqs be aborted
* before running fallback_wq. "force_abort" must be seen
* after request queue is unqiuesced. Then del_gendisk()
* can move on.
*/
for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
ublk_get_queue(ub, i)->force_abort = true;
blk_mq_unquiesce_queue(ub->ub_disk->queue);
/* We may have requeued some rqs in ublk_quiesce_queue() */
blk_mq_kick_requeue_list(ub->ub_disk->queue);
@ -1779,61 +1824,51 @@ static struct gendisk *ublk_detach_disk(struct ublk_device *ub)
return disk;
}
static void ublk_stop_dev(struct ublk_device *ub)
static void ublk_stop_dev_unlocked(struct ublk_device *ub)
__must_hold(&ub->mutex)
{
struct gendisk *disk;
mutex_lock(&ub->mutex);
if (ub->dev_info.state == UBLK_S_DEV_DEAD)
goto unlock;
if (ublk_nosrv_dev_should_queue_io(ub)) {
if (ub->dev_info.state == UBLK_S_DEV_LIVE)
__ublk_quiesce_dev(ub);
ublk_unquiesce_dev(ub);
}
return;
if (ublk_nosrv_dev_should_queue_io(ub))
ublk_force_abort_dev(ub);
del_gendisk(ub->ub_disk);
disk = ublk_detach_disk(ub);
put_disk(disk);
unlock:
}
static void ublk_stop_dev(struct ublk_device *ub)
{
mutex_lock(&ub->mutex);
ublk_stop_dev_unlocked(ub);
mutex_unlock(&ub->mutex);
ublk_cancel_dev(ub);
}
static void ublk_nosrv_work(struct work_struct *work)
/* reset ublk io_uring queue & io flags */
static void ublk_reset_io_flags(struct ublk_device *ub)
{
struct ublk_device *ub =
container_of(work, struct ublk_device, nosrv_work);
int i;
int i, j;
if (ublk_nosrv_should_stop_dev(ub)) {
ublk_stop_dev(ub);
return;
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
struct ublk_queue *ubq = ublk_get_queue(ub, i);
/* UBLK_IO_FLAG_CANCELED can be cleared now */
spin_lock(&ubq->cancel_lock);
for (j = 0; j < ubq->q_depth; j++)
ubq->ios[j].flags &= ~UBLK_IO_FLAG_CANCELED;
spin_unlock(&ubq->cancel_lock);
ubq->canceling = false;
ubq->fail_io = false;
}
mutex_lock(&ub->mutex);
if (ub->dev_info.state != UBLK_S_DEV_LIVE)
goto unlock;
if (ublk_nosrv_dev_should_queue_io(ub)) {
__ublk_quiesce_dev(ub);
} else {
blk_mq_quiesce_queue(ub->ub_disk->queue);
ub->dev_info.state = UBLK_S_DEV_FAIL_IO;
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
ublk_get_queue(ub, i)->fail_io = true;
}
blk_mq_unquiesce_queue(ub->ub_disk->queue);
}
unlock:
mutex_unlock(&ub->mutex);
ublk_cancel_dev(ub);
}
/* device can only be started after all IOs are ready */
static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
__must_hold(&ub->mutex)
{
mutex_lock(&ub->mutex);
ubq->nr_io_ready++;
if (ublk_queue_ready(ubq)) {
ubq->ubq_daemon = current;
@ -1843,9 +1878,12 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
if (capable(CAP_SYS_ADMIN))
ub->nr_privileged_daemon++;
}
if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues)
if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) {
/* now we are ready for handling ublk io request */
ublk_reset_io_flags(ub);
complete_all(&ub->completion);
mutex_unlock(&ub->mutex);
}
}
static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
@ -1929,6 +1967,52 @@ static int ublk_unregister_io_buf(struct io_uring_cmd *cmd,
return io_buffer_unregister_bvec(cmd, index, issue_flags);
}
static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
struct ublk_io *io, __u64 buf_addr)
{
struct ublk_device *ub = ubq->dev;
int ret = 0;
/*
* When handling FETCH command for setting up ublk uring queue,
* ub->mutex is the innermost lock, and we won't block for handling
* FETCH, so it is fine even for IO_URING_F_NONBLOCK.
*/
mutex_lock(&ub->mutex);
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
if (ublk_queue_ready(ubq)) {
ret = -EBUSY;
goto out;
}
/* allow each command to be FETCHed at most once */
if (io->flags & UBLK_IO_FLAG_ACTIVE) {
ret = -EINVAL;
goto out;
}
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV);
if (ublk_need_map_io(ubq)) {
/*
* FETCH_RQ has to provide IO buffer if NEED GET
* DATA is not enabled
*/
if (!buf_addr && !ublk_need_get_data(ubq))
goto out;
} else if (buf_addr) {
/* User copy requires addr to be unset */
ret = -EINVAL;
goto out;
}
ublk_fill_io_cmd(io, cmd, buf_addr);
ublk_mark_io_ready(ub, ubq);
out:
mutex_unlock(&ub->mutex);
return ret;
}
static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags,
const struct ublksrv_io_cmd *ub_cmd)
@ -1985,33 +2069,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
case UBLK_IO_UNREGISTER_IO_BUF:
return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags);
case UBLK_IO_FETCH_REQ:
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
if (ublk_queue_ready(ubq)) {
ret = -EBUSY;
ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr);
if (ret)
goto out;
}
/*
* The io is being handled by server, so COMMIT_RQ is expected
* instead of FETCH_REQ
*/
if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
goto out;
if (ublk_need_map_io(ubq)) {
/*
* FETCH_RQ has to provide IO buffer if NEED GET
* DATA is not enabled
*/
if (!ub_cmd->addr && !ublk_need_get_data(ubq))
goto out;
} else if (ub_cmd->addr) {
/* User copy requires addr to be unset */
ret = -EINVAL;
goto out;
}
ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
ublk_mark_io_ready(ub, ubq);
break;
case UBLK_IO_COMMIT_AND_FETCH_REQ:
req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
@ -2411,7 +2471,6 @@ static void ublk_remove(struct ublk_device *ub)
bool unprivileged;
ublk_stop_dev(ub);
cancel_work_sync(&ub->nosrv_work);
cdev_device_del(&ub->cdev, &ub->cdev_dev);
unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV;
ublk_put_device(ub);
@ -2696,7 +2755,6 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
goto out_unlock;
mutex_init(&ub->mutex);
spin_lock_init(&ub->lock);
INIT_WORK(&ub->nosrv_work, ublk_nosrv_work);
ret = ublk_alloc_dev_number(ub, header->dev_id);
if (ret < 0)
@ -2828,7 +2886,6 @@ static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
static int ublk_ctrl_stop_dev(struct ublk_device *ub)
{
ublk_stop_dev(ub);
cancel_work_sync(&ub->nosrv_work);
return 0;
}
@ -2932,42 +2989,14 @@ static int ublk_ctrl_set_params(struct ublk_device *ub,
return ret;
}
static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
{
int i;
WARN_ON_ONCE(!(ubq->ubq_daemon && ubq_daemon_is_dying(ubq)));
/* All old ioucmds have to be completed */
ubq->nr_io_ready = 0;
/* old daemon is PF_EXITING, put it now */
put_task_struct(ubq->ubq_daemon);
/* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
ubq->ubq_daemon = NULL;
ubq->timeout = false;
ubq->canceling = false;
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
/* forget everything now and be ready for new FETCH_REQ */
io->flags = 0;
io->cmd = NULL;
io->addr = 0;
}
}
static int ublk_ctrl_start_recovery(struct ublk_device *ub,
const struct ublksrv_ctrl_cmd *header)
{
int ret = -EINVAL;
int i;
mutex_lock(&ub->mutex);
if (ublk_nosrv_should_stop_dev(ub))
goto out_unlock;
if (!ub->nr_queues_ready)
goto out_unlock;
/*
* START_RECOVERY is only allowd after:
*
@ -2991,12 +3020,6 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
goto out_unlock;
}
pr_devel("%s: start recovery for dev id %d.\n", __func__, header->dev_id);
for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
ublk_queue_reinit(ub, ublk_get_queue(ub, i));
/* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */
ub->mm = NULL;
ub->nr_queues_ready = 0;
ub->nr_privileged_daemon = 0;
init_completion(&ub->completion);
ret = 0;
out_unlock:
@ -3009,7 +3032,6 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
{
int ublksrv_pid = (int)header->data[0];
int ret = -EINVAL;
int i;
pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n",
__func__, ub->dev_info.nr_hw_queues, header->dev_id);
@ -3029,24 +3051,10 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
goto out_unlock;
}
ub->dev_info.ublksrv_pid = ublksrv_pid;
ub->dev_info.state = UBLK_S_DEV_LIVE;
pr_devel("%s: new ublksrv_pid %d, dev id %d\n",
__func__, ublksrv_pid, header->dev_id);
if (ublk_nosrv_dev_should_queue_io(ub)) {
ub->dev_info.state = UBLK_S_DEV_LIVE;
blk_mq_unquiesce_queue(ub->ub_disk->queue);
pr_devel("%s: queue unquiesced, dev id %d.\n",
__func__, header->dev_id);
blk_mq_kick_requeue_list(ub->ub_disk->queue);
} else {
blk_mq_quiesce_queue(ub->ub_disk->queue);
ub->dev_info.state = UBLK_S_DEV_LIVE;
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
ublk_get_queue(ub, i)->fail_io = false;
}
blk_mq_unquiesce_queue(ub->ub_disk->queue);
}
blk_mq_kick_requeue_list(ub->ub_disk->queue);
ret = 0;
out_unlock:
mutex_unlock(&ub->mutex);

View File

@ -2357,9 +2357,8 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats)
if (!bitmap)
return -ENOENT;
if (bitmap->mddev->bitmap_info.external)
return -ENOENT;
if (!bitmap->storage.sb_page) /* no superblock */
if (!bitmap->mddev->bitmap_info.external &&
!bitmap->storage.sb_page)
return -EINVAL;
sb = kmap_local_page(bitmap->storage.sb_page);
stats->sync_size = le64_to_cpu(sb->sync_size);

View File

@ -2200,14 +2200,9 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
if (!rdev_set_badblocks(rdev, sect, s, 0))
abort = 1;
}
if (abort) {
conf->recovery_disabled =
mddev->recovery_disabled;
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_done_sync(mddev, r1_bio->sectors, 0);
put_buf(r1_bio);
if (abort)
return 0;
}
/* Try next page */
sectors -= s;
sect += s;
@ -2346,10 +2341,21 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
int disks = conf->raid_disks * 2;
struct bio *wbio;
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
/* ouch - failed to read all of that. */
if (!fix_sync_read_error(r1_bio))
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
/*
* ouch - failed to read all of that.
* No need to fix read error for check/repair
* because all member disks are read.
*/
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) ||
!fix_sync_read_error(r1_bio)) {
conf->recovery_disabled = mddev->recovery_disabled;
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_done_sync(mddev, r1_bio->sectors, 0);
put_buf(r1_bio);
return;
}
}
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
process_checks(r1_bio);

View File

@ -1735,6 +1735,7 @@ retry_discard:
* The discard bio returns only first r10bio finishes
*/
if (first_copy) {
md_account_bio(mddev, &bio);
r10_bio->master_bio = bio;
set_bit(R10BIO_Discard, &r10_bio->state);
first_copy = false;

View File

@ -4300,7 +4300,7 @@ static void nvme_scan_work(struct work_struct *work)
if (test_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events))
nvme_queue_scan(ctrl);
#ifdef CONFIG_NVME_MULTIPATH
else
else if (ctrl->ana_log_buf)
/* Re-read the ANA log page to not miss updates */
queue_work(nvme_wq, &ctrl->ana_work);
#endif

View File

@ -1050,6 +1050,13 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head)
srcu_idx = srcu_read_lock(&head->srcu);
list_for_each_entry_rcu(ns, &head->list, siblings) {
/*
* Ensure that ns path disk node is already added otherwise we
* may get invalid kobj name for target
*/
if (!test_bit(GD_ADDED, &ns->disk->state))
continue;
/*
* Avoid creating link if it already exists for the given path.
* When path ana state transitions from optimized to non-
@ -1065,13 +1072,6 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head)
if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags))
continue;
/*
* Ensure that ns path disk node is already added otherwise we
* may get invalid kobj name for target
*/
if (!test_bit(GD_ADDED, &ns->disk->state))
continue;
target = disk_to_dev(ns->disk);
/*
* Create sysfs link from head gendisk kobject @kobj to the

View File

@ -240,7 +240,7 @@ void nvmet_auth_sq_free(struct nvmet_sq *sq)
{
cancel_delayed_work(&sq->auth_expired_work);
#ifdef CONFIG_NVME_TARGET_TCP_TLS
sq->tls_key = 0;
sq->tls_key = NULL;
#endif
kfree(sq->dhchap_c1);
sq->dhchap_c1 = NULL;

View File

@ -1648,16 +1648,17 @@ static int nvmet_pci_epf_process_sq(struct nvmet_pci_epf_ctrl *ctrl,
{
struct nvmet_pci_epf_iod *iod;
int ret, n = 0;
u16 head = sq->head;
sq->tail = nvmet_pci_epf_bar_read32(ctrl, sq->db);
while (sq->head != sq->tail && (!ctrl->sq_ab || n < ctrl->sq_ab)) {
while (head != sq->tail && (!ctrl->sq_ab || n < ctrl->sq_ab)) {
iod = nvmet_pci_epf_alloc_iod(sq);
if (!iod)
break;
/* Get the NVMe command submitted by the host. */
ret = nvmet_pci_epf_transfer(ctrl, &iod->cmd,
sq->pci_addr + sq->head * sq->qes,
sq->pci_addr + head * sq->qes,
sq->qes, DMA_FROM_DEVICE);
if (ret) {
/* Not much we can do... */
@ -1666,12 +1667,13 @@ static int nvmet_pci_epf_process_sq(struct nvmet_pci_epf_ctrl *ctrl,
}
dev_dbg(ctrl->dev, "SQ[%u]: head %u, tail %u, command %s\n",
sq->qid, sq->head, sq->tail,
sq->qid, head, sq->tail,
nvmet_pci_epf_iod_name(iod));
sq->head++;
if (sq->head == sq->depth)
sq->head = 0;
head++;
if (head == sq->depth)
head = 0;
WRITE_ONCE(sq->head, head);
n++;
queue_work_on(WORK_CPU_UNBOUND, sq->iod_wq, &iod->work);
@ -1761,8 +1763,17 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
if (!iod)
break;
/* Post the IOD completion entry. */
/*
* Post the IOD completion entry. If the IOD request was
* executed (req->execute() called), the CQE is already
* initialized. However, the IOD may have been failed before
* that, leaving the CQE not properly initialized. So always
* initialize it here.
*/
cqe = &iod->cqe;
cqe->sq_head = cpu_to_le16(READ_ONCE(iod->sq->head));
cqe->sq_id = cpu_to_le16(iod->sq->qid);
cqe->command_id = iod->cmd.common.command_id;
cqe->status = cpu_to_le16((iod->status << 1) | cq->phase);
dev_dbg(ctrl->dev,
@ -1800,6 +1811,21 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
NVMET_PCI_EPF_CQ_RETRY_INTERVAL);
}
static void nvmet_pci_epf_clear_ctrl_config(struct nvmet_pci_epf_ctrl *ctrl)
{
struct nvmet_ctrl *tctrl = ctrl->tctrl;
/* Initialize controller status. */
tctrl->csts = 0;
ctrl->csts = 0;
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts);
/* Initialize controller configuration and start polling. */
tctrl->cc = 0;
ctrl->cc = 0;
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc);
}
static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
{
u64 pci_addr, asq, acq;
@ -1865,18 +1891,20 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
return 0;
err:
ctrl->csts = 0;
nvmet_pci_epf_clear_ctrl_config(ctrl);
return -EINVAL;
}
static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl,
bool shutdown)
{
int qid;
if (!ctrl->enabled)
return;
dev_info(ctrl->dev, "Disabling controller\n");
dev_info(ctrl->dev, "%s controller\n",
shutdown ? "Shutting down" : "Disabling");
ctrl->enabled = false;
cancel_delayed_work_sync(&ctrl->poll_sqs);
@ -1893,6 +1921,11 @@ static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
nvmet_pci_epf_delete_cq(ctrl->tctrl, 0);
ctrl->csts &= ~NVME_CSTS_RDY;
if (shutdown) {
ctrl->csts |= NVME_CSTS_SHST_CMPLT;
ctrl->cc &= ~NVME_CC_ENABLE;
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc);
}
}
static void nvmet_pci_epf_poll_cc_work(struct work_struct *work)
@ -1919,12 +1952,10 @@ static void nvmet_pci_epf_poll_cc_work(struct work_struct *work)
}
if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc))
nvmet_pci_epf_disable_ctrl(ctrl);
nvmet_pci_epf_disable_ctrl(ctrl, false);
if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc)) {
nvmet_pci_epf_disable_ctrl(ctrl);
ctrl->csts |= NVME_CSTS_SHST_CMPLT;
}
if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc))
nvmet_pci_epf_disable_ctrl(ctrl, true);
if (!nvmet_cc_shn(new_cc) && nvmet_cc_shn(old_cc))
ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
@ -1963,16 +1994,10 @@ static void nvmet_pci_epf_init_bar(struct nvmet_pci_epf_ctrl *ctrl)
/* Clear Controller Memory Buffer Supported (CMBS). */
ctrl->cap &= ~(0x1ULL << 57);
/* Controller configuration. */
ctrl->cc = tctrl->cc & (~NVME_CC_ENABLE);
/* Controller status. */
ctrl->csts = ctrl->tctrl->csts;
nvmet_pci_epf_bar_write64(ctrl, NVME_REG_CAP, ctrl->cap);
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_VS, tctrl->subsys->ver);
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts);
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc);
nvmet_pci_epf_clear_ctrl_config(ctrl);
}
static int nvmet_pci_epf_create_ctrl(struct nvmet_pci_epf *nvme_epf,
@ -2070,14 +2095,22 @@ out_mempool_exit:
static void nvmet_pci_epf_start_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
{
dev_info(ctrl->dev, "PCI link up\n");
ctrl->link_up = true;
schedule_delayed_work(&ctrl->poll_cc, NVMET_PCI_EPF_CC_POLL_INTERVAL);
}
static void nvmet_pci_epf_stop_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
{
dev_info(ctrl->dev, "PCI link down\n");
ctrl->link_up = false;
cancel_delayed_work_sync(&ctrl->poll_cc);
nvmet_pci_epf_disable_ctrl(ctrl);
nvmet_pci_epf_disable_ctrl(ctrl, false);
nvmet_pci_epf_clear_ctrl_config(ctrl);
}
static void nvmet_pci_epf_destroy_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
@ -2300,10 +2333,8 @@ static int nvmet_pci_epf_epc_init(struct pci_epf *epf)
if (ret)
goto out_clear_bar;
if (!epc_features->linkup_notifier) {
ctrl->link_up = true;
if (!epc_features->linkup_notifier)
nvmet_pci_epf_start_ctrl(&nvme_epf->ctrl);
}
return 0;
@ -2319,7 +2350,6 @@ static void nvmet_pci_epf_epc_deinit(struct pci_epf *epf)
struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf);
struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl;
ctrl->link_up = false;
nvmet_pci_epf_destroy_ctrl(ctrl);
nvmet_pci_epf_deinit_dma(nvme_epf);
@ -2331,7 +2361,6 @@ static int nvmet_pci_epf_link_up(struct pci_epf *epf)
struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf);
struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl;
ctrl->link_up = true;
nvmet_pci_epf_start_ctrl(ctrl);
return 0;
@ -2342,7 +2371,6 @@ static int nvmet_pci_epf_link_down(struct pci_epf *epf)
struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf);
struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl;
ctrl->link_up = false;
nvmet_pci_epf_stop_ctrl(ctrl);
return 0;

View File

@ -6,6 +6,9 @@ LDLIBS += -lpthread -lm -luring
TEST_PROGS := test_generic_01.sh
TEST_PROGS += test_generic_02.sh
TEST_PROGS += test_generic_03.sh
TEST_PROGS += test_generic_04.sh
TEST_PROGS += test_generic_05.sh
TEST_PROGS += test_generic_06.sh
TEST_PROGS += test_null_01.sh
TEST_PROGS += test_null_02.sh
@ -21,12 +24,16 @@ TEST_PROGS += test_stripe_04.sh
TEST_PROGS += test_stress_01.sh
TEST_PROGS += test_stress_02.sh
TEST_PROGS += test_stress_03.sh
TEST_PROGS += test_stress_04.sh
TEST_PROGS += test_stress_05.sh
TEST_GEN_PROGS_EXTENDED = kublk
include ../lib.mk
$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c
$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c \
fault_inject.c
check:
shellcheck -x -f gcc *.sh

View File

@ -0,0 +1,98 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Fault injection ublk target. Hack this up however you like for
* testing specific behaviors of ublk_drv. Currently is a null target
* with a configurable delay before completing each I/O. This delay can
* be used to test ublk_drv's handling of I/O outstanding to the ublk
* server when it dies.
*/
#include "kublk.h"
static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
struct ublk_dev *dev)
{
const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
unsigned long dev_size = 250UL << 30;
dev->tgt.dev_size = dev_size;
dev->tgt.params = (struct ublk_params) {
.types = UBLK_PARAM_TYPE_BASIC,
.basic = {
.logical_bs_shift = 9,
.physical_bs_shift = 12,
.io_opt_shift = 12,
.io_min_shift = 9,
.max_sectors = info->max_io_buf_bytes >> 9,
.dev_sectors = dev_size >> 9,
},
};
dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000);
return 0;
}
static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
struct io_uring_sqe *sqe;
struct __kernel_timespec ts = {
.tv_nsec = (long long)q->dev->private_data,
};
ublk_queue_alloc_sqes(q, &sqe, 1);
io_uring_prep_timeout(sqe, &ts, 1, 0);
sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, 1);
ublk_queued_tgt_io(q, tag, 1);
return 0;
}
static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag,
const struct io_uring_cqe *cqe)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
if (cqe->res != -ETIME)
ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res);
if (ublk_completed_tgt_io(q, tag))
ublk_complete_io(q, tag, iod->nr_sectors << 9);
else
ublk_err("%s: io not complete after 1 cqe\n", __func__);
}
static void ublk_fault_inject_cmd_line(struct dev_ctx *ctx, int argc, char *argv[])
{
static const struct option longopts[] = {
{ "delay_us", 1, NULL, 0 },
{ 0, 0, 0, 0 }
};
int option_idx, opt;
ctx->fault_inject.delay_us = 0;
while ((opt = getopt_long(argc, argv, "",
longopts, &option_idx)) != -1) {
switch (opt) {
case 0:
if (!strcmp(longopts[option_idx].name, "delay_us"))
ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10);
}
}
}
static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops)
{
printf("\tfault_inject: [--delay_us us (default 0)]\n");
}
const struct ublk_tgt_ops fault_inject_tgt_ops = {
.name = "fault_inject",
.init_tgt = ublk_fault_inject_tgt_init,
.queue_io = ublk_fault_inject_queue_io,
.tgt_io_done = ublk_fault_inject_tgt_io_done,
.parse_cmd_line = ublk_fault_inject_cmd_line,
.usage = ublk_fault_inject_usage,
};

View File

@ -5,22 +5,24 @@
#include "kublk.h"
#define MAX_NR_TGT_ARG 64
unsigned int ublk_dbg_mask = UBLK_LOG;
static const struct ublk_tgt_ops *tgt_ops_list[] = {
&null_tgt_ops,
&loop_tgt_ops,
&stripe_tgt_ops,
&fault_inject_tgt_ops,
};
static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
{
const struct ublk_tgt_ops *ops;
int i;
if (name == NULL)
return NULL;
for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++)
for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++)
if (strcmp(tgt_ops_list[i]->name, name) == 0)
return tgt_ops_list[i];
return NULL;
@ -118,6 +120,27 @@ static int ublk_ctrl_start_dev(struct ublk_dev *dev,
return __ublk_ctrl_cmd(dev, &data);
}
static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev)
{
struct ublk_ctrl_cmd_data data = {
.cmd_op = UBLK_U_CMD_START_USER_RECOVERY,
};
return __ublk_ctrl_cmd(dev, &data);
}
static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid)
{
struct ublk_ctrl_cmd_data data = {
.cmd_op = UBLK_U_CMD_END_USER_RECOVERY,
.flags = CTRL_CMD_HAS_DATA,
};
dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
return __ublk_ctrl_cmd(dev, &data);
}
static int ublk_ctrl_add_dev(struct ublk_dev *dev)
{
struct ublk_ctrl_cmd_data data = {
@ -207,10 +230,73 @@ static const char *ublk_dev_state_desc(struct ublk_dev *dev)
};
}
static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len)
{
unsigned done = 0;
int i;
for (i = 0; i < CPU_SETSIZE; i++) {
if (CPU_ISSET(i, set))
done += snprintf(&buf[done], len - done, "%d ", i);
}
}
static void ublk_adjust_affinity(cpu_set_t *set)
{
int j, updated = 0;
/*
* Just keep the 1st CPU now.
*
* In future, auto affinity selection can be tried.
*/
for (j = 0; j < CPU_SETSIZE; j++) {
if (CPU_ISSET(j, set)) {
if (!updated) {
updated = 1;
continue;
}
CPU_CLR(j, set);
}
}
}
/* Caller must free the allocated buffer */
static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf)
{
struct ublk_ctrl_cmd_data data = {
.cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY,
.flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF,
};
cpu_set_t *buf;
int i, ret;
buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues);
if (!buf)
return -ENOMEM;
for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) {
data.data[0] = i;
data.len = sizeof(cpu_set_t);
data.addr = (__u64)&buf[i];
ret = __ublk_ctrl_cmd(ctrl_dev, &data);
if (ret < 0) {
free(buf);
return ret;
}
ublk_adjust_affinity(&buf[i]);
}
*ptr_buf = buf;
return 0;
}
static void ublk_ctrl_dump(struct ublk_dev *dev)
{
struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
struct ublk_params p;
cpu_set_t *affinity;
int ret;
ret = ublk_ctrl_get_params(dev, &p);
@ -219,12 +305,31 @@ static void ublk_ctrl_dump(struct ublk_dev *dev)
return;
}
ret = ublk_ctrl_get_affinity(dev, &affinity);
if (ret < 0) {
ublk_err("failed to get affinity %m\n");
return;
}
ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
info->dev_id, info->nr_hw_queues, info->queue_depth,
1 << p.basic.logical_bs_shift, p.basic.dev_sectors);
ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n",
info->max_io_buf_bytes, info->ublksrv_pid, info->flags,
ublk_dev_state_desc(dev));
if (affinity) {
char buf[512];
int i;
for (i = 0; i < info->nr_hw_queues; i++) {
ublk_print_cpu_set(&affinity[i], buf, sizeof(buf));
printf("\tqueue %u: tid %d affinity(%s)\n",
i, dev->q[i].tid, buf);
}
free(affinity);
}
fflush(stdout);
}
@ -347,7 +452,9 @@ static int ublk_queue_init(struct ublk_queue *q)
}
ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
IORING_SETUP_COOP_TASKRUN);
IORING_SETUP_COOP_TASKRUN |
IORING_SETUP_SINGLE_ISSUER |
IORING_SETUP_DEFER_TASKRUN);
if (ret < 0) {
ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
q->dev->dev_info.dev_id, q->q_id, ret);
@ -602,9 +709,24 @@ static int ublk_process_io(struct ublk_queue *q)
return reapped;
}
static void ublk_queue_set_sched_affinity(const struct ublk_queue *q,
cpu_set_t *cpuset)
{
if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0)
ublk_err("ublk dev %u queue %u set affinity failed",
q->dev->dev_info.dev_id, q->q_id);
}
struct ublk_queue_info {
struct ublk_queue *q;
sem_t *queue_sem;
cpu_set_t *affinity;
};
static void *ublk_io_handler_fn(void *data)
{
struct ublk_queue *q = data;
struct ublk_queue_info *info = data;
struct ublk_queue *q = info->q;
int dev_id = q->dev->dev_info.dev_id;
int ret;
@ -614,6 +736,10 @@ static void *ublk_io_handler_fn(void *data)
dev_id, q->q_id);
return NULL;
}
/* IO perf is sensitive with queue pthread affinity on NUMA machine*/
ublk_queue_set_sched_affinity(q, info->affinity);
sem_post(info->queue_sem);
ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
q->tid, dev_id, q->q_id);
@ -639,7 +765,7 @@ static void ublk_set_parameters(struct ublk_dev *dev)
dev->dev_info.dev_id, ret);
}
static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id)
{
uint64_t id;
int evtfd = ctx->_evtfd;
@ -652,36 +778,68 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
else
id = ERROR_EVTFD_DEVID;
if (dev && ctx->shadow_dev)
memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q));
if (write(evtfd, &id, sizeof(id)) != sizeof(id))
return -EINVAL;
close(evtfd);
shmdt(ctx->shadow_dev);
return 0;
}
static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
{
int ret, i;
void *thread_ret;
const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
struct ublk_queue_info *qinfo;
cpu_set_t *affinity_buf;
void *thread_ret;
sem_t queue_sem;
int ret, i;
ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
qinfo = (struct ublk_queue_info *)calloc(sizeof(struct ublk_queue_info),
dinfo->nr_hw_queues);
if (!qinfo)
return -ENOMEM;
sem_init(&queue_sem, 0, 0);
ret = ublk_dev_prep(ctx, dev);
if (ret)
return ret;
ret = ublk_ctrl_get_affinity(dev, &affinity_buf);
if (ret)
return ret;
for (i = 0; i < dinfo->nr_hw_queues; i++) {
dev->q[i].dev = dev;
dev->q[i].q_id = i;
qinfo[i].q = &dev->q[i];
qinfo[i].queue_sem = &queue_sem;
qinfo[i].affinity = &affinity_buf[i];
pthread_create(&dev->q[i].thread, NULL,
ublk_io_handler_fn,
&dev->q[i]);
&qinfo[i]);
}
for (i = 0; i < dinfo->nr_hw_queues; i++)
sem_wait(&queue_sem);
free(qinfo);
free(affinity_buf);
/* everything is fine now, start us */
ublk_set_parameters(dev);
ret = ublk_ctrl_start_dev(dev, getpid());
if (ctx->recovery)
ret = ublk_ctrl_end_user_recovery(dev, getpid());
else {
ublk_set_parameters(dev);
ret = ublk_ctrl_start_dev(dev, getpid());
}
if (ret < 0) {
ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
goto fail;
@ -691,7 +849,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
if (ctx->fg)
ublk_ctrl_dump(dev);
else
ublk_send_dev_event(ctx, dev->dev_info.dev_id);
ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id);
/* wait until we are terminated */
for (i = 0; i < dinfo->nr_hw_queues; i++)
@ -856,7 +1014,10 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
}
}
ret = ublk_ctrl_add_dev(dev);
if (ctx->recovery)
ret = ublk_ctrl_start_user_recovery(dev);
else
ret = ublk_ctrl_add_dev(dev);
if (ret < 0) {
ublk_err("%s: can't add dev id %d, type %s ret %d\n",
__func__, dev_id, tgt_type, ret);
@ -870,7 +1031,7 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
fail:
if (ret < 0)
ublk_send_dev_event(ctx, -1);
ublk_send_dev_event(ctx, dev, -1);
ublk_ctrl_deinit(dev);
return ret;
}
@ -884,30 +1045,58 @@ static int cmd_dev_add(struct dev_ctx *ctx)
if (ctx->fg)
goto run;
ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666);
if (ctx->_shmid < 0) {
ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno));
exit(-1);
}
ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0);
if (ctx->shadow_dev == (struct ublk_dev *)-1) {
ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno));
exit(-1);
}
ctx->_evtfd = eventfd(0, 0);
if (ctx->_evtfd < 0) {
ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno));
exit(-1);
}
setsid();
res = fork();
if (res == 0) {
int res2;
setsid();
res2 = fork();
if (res2 == 0) {
/* prepare for detaching */
close(STDIN_FILENO);
close(STDOUT_FILENO);
close(STDERR_FILENO);
run:
res = __cmd_dev_add(ctx);
return res;
res = __cmd_dev_add(ctx);
return res;
} else {
/* detached from the foreground task */
exit(EXIT_SUCCESS);
}
} else if (res > 0) {
uint64_t id;
int exit_code = EXIT_FAILURE;
res = read(ctx->_evtfd, &id, sizeof(id));
close(ctx->_evtfd);
if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) {
ctx->dev_id = id - 1;
return __cmd_dev_list(ctx);
if (__cmd_dev_list(ctx) >= 0)
exit_code = EXIT_SUCCESS;
}
exit(EXIT_FAILURE);
shmdt(ctx->shadow_dev);
shmctl(ctx->_shmid, IPC_RMID, NULL);
/* wait for child and detach from it */
wait(NULL);
exit(exit_code);
} else {
return res;
exit(EXIT_FAILURE);
}
}
@ -969,6 +1158,9 @@ static int __cmd_dev_list(struct dev_ctx *ctx)
ublk_err("%s: can't get dev info from %d: %d\n",
__func__, ctx->dev_id, ret);
} else {
if (ctx->shadow_dev)
memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q));
ublk_ctrl_dump(dev);
}
@ -1039,14 +1231,47 @@ static int cmd_dev_get_features(void)
return ret;
}
static void __cmd_create_help(char *exe, bool recovery)
{
int i;
printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
exe, recovery ? "recover" : "add");
printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g 0|1]\n");
printf("\t[-e 0|1 ] [-i 0|1]\n");
printf("\t[target options] [backfile1] [backfile2] ...\n");
printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) {
const struct ublk_tgt_ops *ops = tgt_ops_list[i];
if (ops->usage)
ops->usage(ops);
}
}
static void cmd_add_help(char *exe)
{
__cmd_create_help(exe, false);
printf("\n");
}
static void cmd_recover_help(char *exe)
{
__cmd_create_help(exe, true);
printf("\tPlease provide exact command line for creating this device with real dev_id\n");
printf("\n");
}
static int cmd_dev_help(char *exe)
{
printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe);
printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n");
cmd_add_help(exe);
cmd_recover_help(exe);
printf("%s del [-n dev_id] -a \n", exe);
printf("\t -a delete all devices -n delete specified device\n");
printf("\t -a delete all devices -n delete specified device\n\n");
printf("%s list [-n dev_id] -a \n", exe);
printf("\t -a list all devices, -n list specified device, default -a \n");
printf("\t -a list all devices, -n list specified device, default -a \n\n");
printf("%s features\n", exe);
return 0;
}
@ -1063,9 +1288,13 @@ int main(int argc, char *argv[])
{ "quiet", 0, NULL, 0 },
{ "zero_copy", 0, NULL, 'z' },
{ "foreground", 0, NULL, 0 },
{ "chunk_size", 1, NULL, 0 },
{ "recovery", 1, NULL, 'r' },
{ "recovery_fail_io", 1, NULL, 'e'},
{ "recovery_reissue", 1, NULL, 'i'},
{ "get_data", 1, NULL, 'g'},
{ 0, 0, 0, 0 }
};
const struct ublk_tgt_ops *ops = NULL;
int option_idx, opt;
const char *cmd = argv[1];
struct dev_ctx ctx = {
@ -1073,15 +1302,18 @@ int main(int argc, char *argv[])
.nr_hw_queues = 2,
.dev_id = -1,
.tgt_type = "unknown",
.chunk_size = 65536, /* def chunk size is 64K */
};
int ret = -EINVAL, i;
int tgt_argc = 1;
char *tgt_argv[MAX_NR_TGT_ARG] = { NULL };
int value;
if (argc == 1)
return ret;
opterr = 0;
optind = 2;
while ((opt = getopt_long(argc, argv, "t:n:d:q:az",
while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:az",
longopts, &option_idx)) != -1) {
switch (opt) {
case 'a':
@ -1103,6 +1335,25 @@ int main(int argc, char *argv[])
case 'z':
ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY;
break;
case 'r':
value = strtol(optarg, NULL, 10);
if (value)
ctx.flags |= UBLK_F_USER_RECOVERY;
break;
case 'e':
value = strtol(optarg, NULL, 10);
if (value)
ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO;
break;
case 'i':
value = strtol(optarg, NULL, 10);
if (value)
ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE;
break;
case 'g':
value = strtol(optarg, NULL, 10);
if (value)
ctx.flags |= UBLK_F_NEED_GET_DATA;
case 0:
if (!strcmp(longopts[option_idx].name, "debug_mask"))
ublk_dbg_mask = strtol(optarg, NULL, 16);
@ -1110,8 +1361,26 @@ int main(int argc, char *argv[])
ublk_dbg_mask = 0;
if (!strcmp(longopts[option_idx].name, "foreground"))
ctx.fg = 1;
if (!strcmp(longopts[option_idx].name, "chunk_size"))
ctx.chunk_size = strtol(optarg, NULL, 10);
break;
case '?':
/*
* target requires every option must have argument
*/
if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') {
fprintf(stderr, "every target option requires argument: %s %s\n",
argv[optind - 1], argv[optind]);
exit(EXIT_FAILURE);
}
if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) {
tgt_argv[tgt_argc++] = argv[optind - 1];
tgt_argv[tgt_argc++] = argv[optind];
} else {
fprintf(stderr, "too many target options\n");
exit(EXIT_FAILURE);
}
optind += 1;
break;
}
}
@ -1120,9 +1389,25 @@ int main(int argc, char *argv[])
ctx.files[ctx.nr_files++] = argv[i++];
}
ops = ublk_find_tgt(ctx.tgt_type);
if (ops && ops->parse_cmd_line) {
optind = 0;
tgt_argv[0] = ctx.tgt_type;
ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv);
}
if (!strcmp(cmd, "add"))
ret = cmd_dev_add(&ctx);
else if (!strcmp(cmd, "del"))
else if (!strcmp(cmd, "recover")) {
if (ctx.dev_id < 0) {
fprintf(stderr, "device id isn't provided for recovering\n");
ret = -EINVAL;
} else {
ctx.recovery = 1;
ret = cmd_dev_add(&ctx);
}
} else if (!strcmp(cmd, "del"))
ret = cmd_dev_del(&ctx);
else if (!strcmp(cmd, "list")) {
ctx.all = 1;

View File

@ -20,9 +20,15 @@
#include <sys/wait.h>
#include <sys/eventfd.h>
#include <sys/uio.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <linux/io_uring.h>
#include <liburing.h>
#include <linux/ublk_cmd.h>
#include <semaphore.h>
/* allow ublk_dep.h to override ublk_cmd.h */
#include "ublk_dep.h"
#include <linux/ublk_cmd.h>
#define __maybe_unused __attribute__((unused))
#define MAX_BACK_FILES 4
@ -30,6 +36,8 @@
#define min(a, b) ((a) < (b) ? (a) : (b))
#endif
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
/****************** part 1: libublk ********************/
#define CTRL_DEV "/dev/ublk-control"
@ -42,8 +50,8 @@
#define UBLKSRV_IO_IDLE_SECS 20
#define UBLK_IO_MAX_BYTES (1 << 20)
#define UBLK_MAX_QUEUES 4
#define UBLK_QUEUE_DEPTH 128
#define UBLK_MAX_QUEUES 32
#define UBLK_QUEUE_DEPTH 1024
#define UBLK_DBG_DEV (1U << 0)
#define UBLK_DBG_QUEUE (1U << 1)
@ -55,6 +63,16 @@
struct ublk_dev;
struct ublk_queue;
struct stripe_ctx {
/* stripe */
unsigned int chunk_size;
};
struct fault_inject_ctx {
/* fault_inject */
unsigned long delay_us;
};
struct dev_ctx {
char tgt_type[16];
unsigned long flags;
@ -66,11 +84,21 @@ struct dev_ctx {
unsigned int logging:1;
unsigned int all:1;
unsigned int fg:1;
unsigned int recovery:1;
/* stripe */
unsigned int chunk_size;
/* fault_inject */
long long delay_us;
int _evtfd;
int _shmid;
/* built from shmem, only for ublk_dump_dev() */
struct ublk_dev *shadow_dev;
union {
struct stripe_ctx stripe;
struct fault_inject_ctx fault_inject;
};
};
struct ublk_ctrl_cmd_data {
@ -107,6 +135,14 @@ struct ublk_tgt_ops {
int (*queue_io)(struct ublk_queue *, int tag);
void (*tgt_io_done)(struct ublk_queue *,
int tag, const struct io_uring_cqe *);
/*
* Target specific command line handling
*
* each option requires argument for target command line
*/
void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
void (*usage)(const struct ublk_tgt_ops *ops);
};
struct ublk_tgt {
@ -357,6 +393,7 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q)
extern const struct ublk_tgt_ops null_tgt_ops;
extern const struct ublk_tgt_ops loop_tgt_ops;
extern const struct ublk_tgt_ops stripe_tgt_ops;
extern const struct ublk_tgt_ops fault_inject_tgt_ops;
void backing_file_tgt_deinit(struct ublk_dev *dev);
int backing_file_tgt_init(struct ublk_dev *dev);

View File

@ -281,7 +281,7 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
.max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
},
};
unsigned chunk_size = ctx->chunk_size;
unsigned chunk_size = ctx->stripe.chunk_size;
struct stripe_conf *conf;
unsigned chunk_shift;
loff_t bytes = 0;
@ -344,10 +344,36 @@ static void ublk_stripe_tgt_deinit(struct ublk_dev *dev)
backing_file_tgt_deinit(dev);
}
static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[])
{
static const struct option longopts[] = {
{ "chunk_size", 1, NULL, 0 },
{ 0, 0, 0, 0 }
};
int option_idx, opt;
ctx->stripe.chunk_size = 65536;
while ((opt = getopt_long(argc, argv, "",
longopts, &option_idx)) != -1) {
switch (opt) {
case 0:
if (!strcmp(longopts[option_idx].name, "chunk_size"))
ctx->stripe.chunk_size = strtol(optarg, NULL, 10);
}
}
}
static void ublk_stripe_usage(const struct ublk_tgt_ops *ops)
{
printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n");
}
const struct ublk_tgt_ops stripe_tgt_ops = {
.name = "stripe",
.init_tgt = ublk_stripe_tgt_init,
.deinit_tgt = ublk_stripe_tgt_deinit,
.queue_io = ublk_stripe_queue_io,
.tgt_io_done = ublk_stripe_io_done,
.parse_cmd_line = ublk_stripe_cmd_line,
.usage = ublk_stripe_usage,
};

View File

@ -30,18 +30,26 @@ _run_fio_verify_io() {
}
_create_backfile() {
local my_size=$1
local my_file
local index=$1
local new_size=$2
local old_file
local new_file
my_file=$(mktemp ublk_file_"${my_size}"_XXXXX)
truncate -s "${my_size}" "${my_file}"
echo "$my_file"
old_file="${UBLK_BACKFILES[$index]}"
[ -f "$old_file" ] && rm -f "$old_file"
new_file=$(mktemp ublk_file_"${new_size}"_XXXXX)
truncate -s "${new_size}" "${new_file}"
UBLK_BACKFILES["$index"]="$new_file"
}
_remove_backfile() {
local file=$1
_remove_files() {
local file
[ -f "$file" ] && rm -f "$file"
for file in "${UBLK_BACKFILES[@]}"; do
[ -f "$file" ] && rm -f "$file"
done
[ -f "$UBLK_TMP" ] && rm -f "$UBLK_TMP"
}
_create_tmp_dir() {
@ -106,6 +114,7 @@ _prep_test() {
local type=$1
shift 1
modprobe ublk_drv > /dev/null 2>&1
UBLK_TMP=$(mktemp ublk_test_XXXXX)
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*"
}
@ -129,7 +138,10 @@ _show_result()
echo "$1 : [FAIL]"
fi
fi
[ "$2" -ne 0 ] && exit "$2"
if [ "$2" -ne 0 ]; then
_remove_files
exit "$2"
fi
return 0
}
@ -138,16 +150,16 @@ _check_add_dev()
{
local tid=$1
local code=$2
shift 2
if [ "${code}" -ne 0 ]; then
_remove_test_files "$@"
_show_result "${tid}" "${code}"
fi
}
_cleanup_test() {
"${UBLK_PROG}" del -a
rm -f "$UBLK_TMP"
_remove_files
}
_have_feature()
@ -158,9 +170,11 @@ _have_feature()
return 1
}
_add_ublk_dev() {
local kublk_temp;
_create_ublk_dev() {
local dev_id;
local cmd=$1
shift 1
if [ ! -c /dev/ublk-control ]; then
return ${UBLK_SKIP_CODE}
@ -171,17 +185,34 @@ _add_ublk_dev() {
fi
fi
kublk_temp=$(mktemp /tmp/kublk-XXXXXX)
if ! "${UBLK_PROG}" add "$@" > "${kublk_temp}" 2>&1; then
if ! dev_id=$("${UBLK_PROG}" "$cmd" "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then
echo "fail to add ublk dev $*"
rm -f "${kublk_temp}"
return 255
fi
dev_id=$(grep "dev id" "${kublk_temp}" | awk -F '[ :]' '{print $3}')
udevadm settle
rm -f "${kublk_temp}"
echo "${dev_id}"
if [[ "$dev_id" =~ ^[0-9]+$ ]]; then
echo "${dev_id}"
else
return 255
fi
}
_add_ublk_dev() {
_create_ublk_dev "add" "$@"
}
_recover_ublk_dev() {
local dev_id
local state
dev_id=$(_create_ublk_dev "recover" "$@")
for ((j=0;j<20;j++)); do
state=$(_get_ublk_dev_state "${dev_id}")
[ "$state" == "LIVE" ] && break
sleep 1
done
echo "$state"
}
# kill the ublk daemon and return ublk device state
@ -220,7 +251,7 @@ __run_io_and_remove()
local kill_server=$3
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
--rw=readwrite --iodepth=64 --size="${size}" --numjobs=4 \
--rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
--runtime=20 --time_based > /dev/null 2>&1 &
sleep 2
if [ "${kill_server}" = "yes" ]; then
@ -238,15 +269,80 @@ __run_io_and_remove()
wait
}
run_io_and_remove()
{
local size=$1
local dev_id
shift 1
dev_id=$(_add_ublk_dev "$@")
_check_add_dev "$TID" $?
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
if ! __run_io_and_remove "$dev_id" "${size}" "no"; then
echo "/dev/ublkc$dev_id isn't removed"
exit 255
fi
}
run_io_and_kill_daemon()
{
local size=$1
local dev_id
shift 1
dev_id=$(_add_ublk_dev "$@")
_check_add_dev "$TID" $?
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)"
if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then
echo "/dev/ublkc$dev_id isn't removed res ${res}"
exit 255
fi
}
run_io_and_recover()
{
local state
local dev_id
dev_id=$(_add_ublk_dev "$@")
_check_add_dev "$TID" $?
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
--rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
--runtime=20 --time_based > /dev/null 2>&1 &
sleep 4
state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED")
if [ "$state" != "QUIESCED" ]; then
echo "device isn't quiesced($state) after killing daemon"
return 255
fi
state=$(_recover_ublk_dev -n "$dev_id" "$@")
if [ "$state" != "LIVE" ]; then
echo "faile to recover to LIVE($state)"
return 255
fi
if ! __remove_ublk_dev_return "${dev_id}"; then
echo "delete dev ${dev_id} failed"
return 255
fi
wait
}
_ublk_test_top_dir()
{
cd "$(dirname "$0")" && pwd
}
UBLK_TMP=$(mktemp ublk_test_XXXXX)
UBLK_PROG=$(_ublk_test_top_dir)/kublk
UBLK_TEST_QUIET=1
UBLK_TEST_SHOW_RESULT=1
UBLK_BACKFILES=()
export UBLK_PROG
export UBLK_TEST_QUIET
export UBLK_TEST_SHOW_RESULT

View File

@ -0,0 +1,40 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="generic_04"
ERR_CODE=0
ublk_run_recover_test()
{
run_io_and_recover "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
_show_result $TID $ERR_CODE
fi
}
if ! _have_program fio; then
exit "$UBLK_SKIP_CODE"
fi
_prep_test "recover" "basic recover function verification"
_create_backfile 0 256M
_create_backfile 1 128M
_create_backfile 2 128M
ublk_run_recover_test -t null -q 2 -r 1 &
ublk_run_recover_test -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" &
ublk_run_recover_test -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
wait
ublk_run_recover_test -t null -q 2 -r 1 -i 1 &
ublk_run_recover_test -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" &
ublk_run_recover_test -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
wait
_cleanup_test "recover"
_show_result $TID $ERR_CODE

View File

@ -0,0 +1,44 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="generic_04"
ERR_CODE=0
ublk_run_recover_test()
{
run_io_and_recover "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
_show_result $TID $ERR_CODE
fi
}
if ! _have_program fio; then
exit "$UBLK_SKIP_CODE"
fi
if ! _have_feature "ZERO_COPY"; then
exit "$UBLK_SKIP_CODE"
fi
_prep_test "recover" "basic recover function verification (zero copy)"
_create_backfile 0 256M
_create_backfile 1 128M
_create_backfile 2 128M
ublk_run_recover_test -t null -q 2 -r 1 -z &
ublk_run_recover_test -t loop -q 2 -r 1 -z "${UBLK_BACKFILES[0]}" &
ublk_run_recover_test -t stripe -q 2 -r 1 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
wait
ublk_run_recover_test -t null -q 2 -r 1 -z -i 1 &
ublk_run_recover_test -t loop -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[0]}" &
ublk_run_recover_test -t stripe -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
wait
_cleanup_test "recover"
_show_result $TID $ERR_CODE

View File

@ -0,0 +1,41 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="generic_06"
ERR_CODE=0
_prep_test "fault_inject" "fast cleanup when all I/Os of one hctx are in server"
# configure ublk server to sleep 2s before completing each I/O
dev_id=$(_add_ublk_dev -t fault_inject -q 2 -d 1 --delay_us 2000000)
_check_add_dev $TID $?
STARTTIME=${SECONDS}
dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 &
dd_pid=$!
__ublk_kill_daemon ${dev_id} "DEAD"
wait $dd_pid
dd_exitcode=$?
ENDTIME=${SECONDS}
ELAPSED=$(($ENDTIME - $STARTTIME))
# assert that dd sees an error and exits quickly after ublk server is
# killed. previously this relied on seeing an I/O timeout and so would
# take ~30s
if [ $dd_exitcode -eq 0 ]; then
echo "dd unexpectedly exited successfully!"
ERR_CODE=255
fi
if [ $ELAPSED -ge 5 ]; then
echo "dd took $ELAPSED seconds to exit (>= 5s tolerance)!"
ERR_CODE=255
fi
_cleanup_test "fault_inject"
_show_result $TID $ERR_CODE

View File

@ -12,10 +12,10 @@ fi
_prep_test "loop" "write and verify test"
backfile_0=$(_create_backfile 256M)
_create_backfile 0 256M
dev_id=$(_add_ublk_dev -t loop "$backfile_0")
_check_add_dev $TID $? "${backfile_0}"
dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}")
_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
@ -23,6 +23,4 @@ ERR_CODE=$?
_cleanup_test "loop"
_remove_backfile "$backfile_0"
_show_result $TID $ERR_CODE

View File

@ -8,15 +8,13 @@ ERR_CODE=0
_prep_test "loop" "mkfs & mount & umount"
backfile_0=$(_create_backfile 256M)
dev_id=$(_add_ublk_dev -t loop "$backfile_0")
_check_add_dev $TID $? "$backfile_0"
_create_backfile 0 256M
dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}")
_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "loop"
_remove_backfile "$backfile_0"
_show_result $TID $ERR_CODE

View File

@ -12,9 +12,9 @@ fi
_prep_test "loop" "write and verify over zero copy"
backfile_0=$(_create_backfile 256M)
dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
_check_add_dev $TID $? "$backfile_0"
_create_backfile 0 256M
dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}")
_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
@ -22,6 +22,4 @@ ERR_CODE=$?
_cleanup_test "loop"
_remove_backfile "$backfile_0"
_show_result $TID $ERR_CODE

View File

@ -8,15 +8,14 @@ ERR_CODE=0
_prep_test "loop" "mkfs & mount & umount with zero copy"
backfile_0=$(_create_backfile 256M)
dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
_check_add_dev $TID $? "$backfile_0"
_create_backfile 0 256M
dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}")
_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "loop"
_remove_backfile "$backfile_0"
_show_result $TID $ERR_CODE

View File

@ -12,10 +12,10 @@ fi
_prep_test "loop" "write and verify test"
backfile_0=$(_create_backfile 256M)
_create_backfile 0 256M
dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0")
_check_add_dev $TID $? "${backfile_0}"
dev_id=$(_add_ublk_dev -q 2 -t loop "${UBLK_BACKFILES[0]}")
_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
@ -23,6 +23,4 @@ ERR_CODE=$?
_cleanup_test "loop"
_remove_backfile "$backfile_0"
_show_result $TID $ERR_CODE

View File

@ -4,44 +4,31 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="stress_01"
ERR_CODE=0
DEV_ID=-1
ublk_io_and_remove()
{
local size=$1
shift 1
local backfile=""
if echo "$@" | grep -q "loop"; then
backfile=${*: -1}
fi
DEV_ID=$(_add_ublk_dev "$@")
_check_add_dev $TID $? "${backfile}"
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then
echo "/dev/ublkc${DEV_ID} isn't removed"
_remove_backfile "${backfile}"
exit 255
run_io_and_remove "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
_show_result $TID $ERR_CODE
fi
}
if ! _have_program fio; then
exit "$UBLK_SKIP_CODE"
fi
_prep_test "stress" "run IO and remove device"
ublk_io_and_remove 8G -t null -q 4
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
_show_result $TID $ERR_CODE
fi
_create_backfile 0 256M
_create_backfile 1 128M
_create_backfile 2 128M
BACK_FILE=$(_create_backfile 256M)
ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
_show_result $TID $ERR_CODE
fi
ublk_io_and_remove 8G -t null -q 4 &
ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" &
ublk_io_and_remove 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
wait
ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}"
ERR_CODE=$?
_cleanup_test "stress"
_remove_backfile "${BACK_FILE}"
_show_result $TID $ERR_CODE

View File

@ -4,44 +4,31 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="stress_02"
ERR_CODE=0
DEV_ID=-1
if ! _have_program fio; then
exit "$UBLK_SKIP_CODE"
fi
ublk_io_and_kill_daemon()
{
local size=$1
shift 1
local backfile=""
if echo "$@" | grep -q "loop"; then
backfile=${*: -1}
fi
DEV_ID=$(_add_ublk_dev "$@")
_check_add_dev $TID $? "${backfile}"
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)"
if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then
echo "/dev/ublkc${DEV_ID} isn't removed res ${res}"
_remove_backfile "${backfile}"
exit 255
run_io_and_kill_daemon "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
_show_result $TID $ERR_CODE
fi
}
_prep_test "stress" "run IO and kill ublk server"
ublk_io_and_kill_daemon 8G -t null -q 4
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
_show_result $TID $ERR_CODE
fi
_create_backfile 0 256M
_create_backfile 1 128M
_create_backfile 2 128M
BACK_FILE=$(_create_backfile 256M)
ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
_show_result $TID $ERR_CODE
fi
ublk_io_and_kill_daemon 8G -t null -q 4 &
ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" &
ublk_io_and_kill_daemon 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
wait
ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}"
ERR_CODE=$?
_cleanup_test "stress"
_remove_backfile "${BACK_FILE}"
_show_result $TID $ERR_CODE

View File

@ -0,0 +1,38 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="stress_03"
ERR_CODE=0
ublk_io_and_remove()
{
run_io_and_remove "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
_show_result $TID $ERR_CODE
fi
}
if ! _have_program fio; then
exit "$UBLK_SKIP_CODE"
fi
if ! _have_feature "ZERO_COPY"; then
exit "$UBLK_SKIP_CODE"
fi
_prep_test "stress" "run IO and remove device(zero copy)"
_create_backfile 0 256M
_create_backfile 1 128M
_create_backfile 2 128M
ublk_io_and_remove 8G -t null -q 4 -z &
ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
wait
_cleanup_test "stress"
_show_result $TID $ERR_CODE

View File

@ -0,0 +1,37 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="stress_04"
ERR_CODE=0
ublk_io_and_kill_daemon()
{
run_io_and_kill_daemon "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
_show_result $TID $ERR_CODE
fi
}
if ! _have_program fio; then
exit "$UBLK_SKIP_CODE"
fi
if ! _have_feature "ZERO_COPY"; then
exit "$UBLK_SKIP_CODE"
fi
_prep_test "stress" "run IO and kill ublk server(zero copy)"
_create_backfile 0 256M
_create_backfile 1 128M
_create_backfile 2 128M
ublk_io_and_kill_daemon 8G -t null -q 4 -z &
ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
wait
_cleanup_test "stress"
_show_result $TID $ERR_CODE

View File

@ -0,0 +1,64 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="stress_05"
ERR_CODE=0
run_io_and_remove()
{
local size=$1
local dev_id
local dev_pid
shift 1
dev_id=$(_add_ublk_dev "$@")
_check_add_dev $TID $?
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
--rw=readwrite --iodepth=128 --size="${size}" --numjobs=4 \
--runtime=40 --time_based > /dev/null 2>&1 &
sleep 4
dev_pid=$(_get_ublk_daemon_pid "$dev_id")
kill -9 "$dev_pid"
if ! __remove_ublk_dev_return "${dev_id}"; then
echo "delete dev ${dev_id} failed"
return 255
fi
}
ublk_io_and_remove()
{
run_io_and_remove "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
_show_result $TID $ERR_CODE
fi
}
_prep_test "stress" "run IO and remove device with recovery enabled"
_create_backfile 0 256M
_create_backfile 1 256M
for reissue in $(seq 0 1); do
ublk_io_and_remove 8G -t null -q 4 -g 1 -r 1 -i "$reissue" &
ublk_io_and_remove 256M -t loop -q 4 -g 1 -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" &
wait
done
if _have_feature "ZERO_COPY"; then
for reissue in $(seq 0 1); do
ublk_io_and_remove 8G -t null -q 4 -g 1 -z -r 1 -i "$reissue" &
ublk_io_and_remove 256M -t loop -q 4 -g 1 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
wait
done
fi
_cleanup_test "stress"
_show_result $TID $ERR_CODE

View File

@ -12,19 +12,15 @@ fi
_prep_test "stripe" "write and verify test"
backfile_0=$(_create_backfile 256M)
backfile_1=$(_create_backfile 256M)
_create_backfile 0 256M
_create_backfile 1 256M
dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
_check_add_dev $TID $? "${backfile_0}"
dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
ERR_CODE=$?
_cleanup_test "stripe"
_remove_backfile "$backfile_0"
_remove_backfile "$backfile_1"
_show_result $TID $ERR_CODE

View File

@ -8,17 +8,14 @@ ERR_CODE=0
_prep_test "stripe" "mkfs & mount & umount"
backfile_0=$(_create_backfile 256M)
backfile_1=$(_create_backfile 256M)
dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
_check_add_dev $TID $? "$backfile_0" "$backfile_1"
_create_backfile 0 256M
_create_backfile 1 256M
dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "stripe"
_remove_backfile "$backfile_0"
_remove_backfile "$backfile_1"
_show_result $TID $ERR_CODE

View File

@ -12,19 +12,15 @@ fi
_prep_test "stripe" "write and verify test"
backfile_0=$(_create_backfile 256M)
backfile_1=$(_create_backfile 256M)
_create_backfile 0 256M
_create_backfile 1 256M
dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1")
_check_add_dev $TID $? "${backfile_0}"
dev_id=$(_add_ublk_dev -q 2 -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
ERR_CODE=$?
_cleanup_test "stripe"
_remove_backfile "$backfile_0"
_remove_backfile "$backfile_1"
_show_result $TID $ERR_CODE

View File

@ -8,17 +8,14 @@ ERR_CODE=0
_prep_test "stripe" "mkfs & mount & umount on zero copy"
backfile_0=$(_create_backfile 256M)
backfile_1=$(_create_backfile 256M)
dev_id=$(_add_ublk_dev -t stripe -z -q 2 "$backfile_0" "$backfile_1")
_check_add_dev $TID $? "$backfile_0" "$backfile_1"
_create_backfile 0 256M
_create_backfile 1 256M
dev_id=$(_add_ublk_dev -t stripe -z -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "stripe"
_remove_backfile "$backfile_0"
_remove_backfile "$backfile_1"
_show_result $TID $ERR_CODE