mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/
synced 2025-04-19 20:58:31 +09:00

-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmfjTP8QHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpm6oEACnpGL52FAKTVj14GDqFo6Pq0Jmnh07x8qj mpHFPwxfWAzRiuNyji2iS9ecS2cnlkixNyMWZipXRi4KJAUjJH6YDd7IofUI3Glf 6v7b6srFSvsWJIJ8LdkJHLHAJuzYnJvFZ8apwgQczEDqgHq7BAunM1sVQ+mydjYk EXT4kN6DSBOPzwr9GAay52f8nXhbqdHfT+YTGHPHg+QToojL6gD7vvW57w/QqD/x 91hJef1z01cSIsDZOxA0EUeD+9bBAHpoamr/e3IOOCVYCN6hy0dGa9g0QGbbpVyE AeU4FGZLV9J8OOfvHVraDt5Wn3IXxYaMu22dSn1S6tVinwjXhJR2LAA+t4fGHAkt i38LjOsIbopSQn/cNhzwC8UZcHLqnVsdDolHlHzsVFVfcpck2/4JFpUeP8QhWgrk f9tY12QUf/oEaWm0/sUCHZNFxpIGeFA5FFXf0Z92clnzBuiuWoesBNvxqY/2DeZn IDNXiv+Trxr6kFEjTpzPeuxbWrn4PJ7afQSAFcEmOCguk5riM+zJZNIKg0TxUHSS tt6sfxmTP1DhgDKad5kT3MLyzOcx47Kbjf4dj6KmRnD+3DGwwN2F7X7R1GJylPSp RLOzJ+Ouuy9UmBN6JMsT4BmR9+FJTVirADU926d/ZqCTtRV8Tnq/6HHmKmmr4CR0 THJ0PJqQjg== =MOve -----END PGP SIGNATURE----- Merge tag 'for-6.15/io_uring-rx-zc-20250325' of git://git.kernel.dk/linux Pull io_uring zero-copy receive support from Jens Axboe: "This adds support for zero-copy receive with io_uring, enabling fast bulk receive of data directly into application memory, rather than needing to copy the data out of kernel memory. While this version only supports host memory as that was the initial target, other memory types are planned as well, with notably GPU memory coming next. This work depends on some networking components which were queued up on the networking side, but have now landed in your tree. This is the work of Pavel Begunkov and David Wei. From the v14 posting: 'We configure a page pool that a driver uses to fill a hw rx queue to hand out user pages instead of kernel pages. Any data that ends up hitting this hw rx queue will thus be dma'd into userspace memory directly, without needing to be bounced through kernel memory. 'Reading' data out of a socket instead becomes a _notification_ mechanism, where the kernel tells userspace where the data is. The overall approach is similar to the devmem TCP proposal This relies on hw header/data split, flow steering and RSS to ensure packet headers remain in kernel memory and only desired flows hit a hw rx queue configured for zero copy. Configuring this is outside of the scope of this patchset. We share netdev core infra with devmem TCP. The main difference is that io_uring is used for the uAPI and the lifetime of all objects are bound to an io_uring instance. Data is 'read' using a new io_uring request type. When done, data is returned via a new shared refill queue. A zero copy page pool refills a hw rx queue from this refill queue directly. Of course, the lifetime of these data buffers are managed by io_uring rather than the networking stack, with different refcounting rules. This patchset is the first step adding basic zero copy support. We will extend this iteratively with new features e.g. dynamically allocated zero copy areas, THP support, dmabuf support, improved copy fallback, general optimisations and more' In a local setup, I was able to saturate a 200G link with a single CPU core, and at netdev conf 0x19 earlier this month, Jamal reported 188Gbit of bandwidth using a single core (no HT, including soft-irq). Safe to say the efficiency is there, as bigger links would be needed to find the per-core limit, and it's considerably more efficient and faster than the existing devmem solution" * tag 'for-6.15/io_uring-rx-zc-20250325' of git://git.kernel.dk/linux: io_uring/zcrx: add selftest case for recvzc with read limit io_uring/zcrx: add a read limit to recvzc requests io_uring: add missing IORING_MAP_OFF_ZCRX_REGION in io_uring_mmap io_uring: Rename KConfig to Kconfig io_uring/zcrx: fix leaks on failed registration io_uring/zcrx: recheck ifq on shutdown io_uring/zcrx: add selftest net: add documentation for io_uring zcrx io_uring/zcrx: add copy fallback io_uring/zcrx: throttle receive requests io_uring/zcrx: set pp memory provider for an rx queue io_uring/zcrx: add io_recvzc request io_uring/zcrx: dma-map area for the device io_uring/zcrx: implement zerocopy receive pp memory provider io_uring/zcrx: grab a net device io_uring/zcrx: add io_zcrx_area io_uring/zcrx: add interface queue and refill queue
173 lines
4.7 KiB
C
173 lines
4.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#ifndef IOU_RSRC_H
|
|
#define IOU_RSRC_H
|
|
|
|
#include <linux/io_uring_types.h>
|
|
#include <linux/lockdep.h>
|
|
|
|
#define IO_VEC_CACHE_SOFT_CAP 256
|
|
|
|
enum {
|
|
IORING_RSRC_FILE = 0,
|
|
IORING_RSRC_BUFFER = 1,
|
|
};
|
|
|
|
struct io_rsrc_node {
|
|
unsigned char type;
|
|
int refs;
|
|
|
|
u64 tag;
|
|
union {
|
|
unsigned long file_ptr;
|
|
struct io_mapped_ubuf *buf;
|
|
};
|
|
};
|
|
|
|
enum {
|
|
IO_IMU_DEST = 1 << ITER_DEST,
|
|
IO_IMU_SOURCE = 1 << ITER_SOURCE,
|
|
};
|
|
|
|
struct io_mapped_ubuf {
|
|
u64 ubuf;
|
|
unsigned int len;
|
|
unsigned int nr_bvecs;
|
|
unsigned int folio_shift;
|
|
refcount_t refs;
|
|
unsigned long acct_pages;
|
|
void (*release)(void *);
|
|
void *priv;
|
|
bool is_kbuf;
|
|
u8 dir;
|
|
struct bio_vec bvec[] __counted_by(nr_bvecs);
|
|
};
|
|
|
|
struct io_imu_folio_data {
|
|
/* Head folio can be partially included in the fixed buf */
|
|
unsigned int nr_pages_head;
|
|
/* For non-head/tail folios, has to be fully included */
|
|
unsigned int nr_pages_mid;
|
|
unsigned int folio_shift;
|
|
unsigned int nr_folios;
|
|
};
|
|
|
|
bool io_rsrc_cache_init(struct io_ring_ctx *ctx);
|
|
void io_rsrc_cache_free(struct io_ring_ctx *ctx);
|
|
struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type);
|
|
void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node);
|
|
void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data);
|
|
int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr);
|
|
|
|
struct io_rsrc_node *io_find_buf_node(struct io_kiocb *req,
|
|
unsigned issue_flags);
|
|
int io_import_reg_buf(struct io_kiocb *req, struct iov_iter *iter,
|
|
u64 buf_addr, size_t len, int ddir,
|
|
unsigned issue_flags);
|
|
int io_import_reg_vec(int ddir, struct iov_iter *iter,
|
|
struct io_kiocb *req, struct iou_vec *vec,
|
|
unsigned nr_iovs, unsigned issue_flags);
|
|
int io_prep_reg_iovec(struct io_kiocb *req, struct iou_vec *iv,
|
|
const struct iovec __user *uvec, size_t uvec_segs);
|
|
|
|
int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg);
|
|
int io_sqe_buffers_unregister(struct io_ring_ctx *ctx);
|
|
int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
|
|
unsigned int nr_args, u64 __user *tags);
|
|
int io_sqe_files_unregister(struct io_ring_ctx *ctx);
|
|
int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
|
|
unsigned nr_args, u64 __user *tags);
|
|
|
|
int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg,
|
|
unsigned nr_args);
|
|
int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
|
|
unsigned size, unsigned type);
|
|
int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
|
|
unsigned int size, unsigned int type);
|
|
int io_buffer_validate(struct iovec *iov);
|
|
|
|
bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,
|
|
struct io_imu_folio_data *data);
|
|
|
|
static inline struct io_rsrc_node *io_rsrc_node_lookup(struct io_rsrc_data *data,
|
|
int index)
|
|
{
|
|
if (index < data->nr)
|
|
return data->nodes[array_index_nospec(index, data->nr)];
|
|
return NULL;
|
|
}
|
|
|
|
static inline void io_put_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
|
|
{
|
|
lockdep_assert_held(&ctx->uring_lock);
|
|
if (!--node->refs)
|
|
io_free_rsrc_node(ctx, node);
|
|
}
|
|
|
|
static inline bool io_reset_rsrc_node(struct io_ring_ctx *ctx,
|
|
struct io_rsrc_data *data, int index)
|
|
{
|
|
struct io_rsrc_node *node = data->nodes[index];
|
|
|
|
if (!node)
|
|
return false;
|
|
io_put_rsrc_node(ctx, node);
|
|
data->nodes[index] = NULL;
|
|
return true;
|
|
}
|
|
|
|
static inline void io_req_put_rsrc_nodes(struct io_kiocb *req)
|
|
{
|
|
if (req->file_node) {
|
|
io_put_rsrc_node(req->ctx, req->file_node);
|
|
req->file_node = NULL;
|
|
}
|
|
if (req->flags & REQ_F_BUF_NODE) {
|
|
io_put_rsrc_node(req->ctx, req->buf_node);
|
|
req->buf_node = NULL;
|
|
}
|
|
}
|
|
|
|
static inline void io_req_assign_rsrc_node(struct io_rsrc_node **dst_node,
|
|
struct io_rsrc_node *node)
|
|
{
|
|
node->refs++;
|
|
*dst_node = node;
|
|
}
|
|
|
|
static inline void io_req_assign_buf_node(struct io_kiocb *req,
|
|
struct io_rsrc_node *node)
|
|
{
|
|
io_req_assign_rsrc_node(&req->buf_node, node);
|
|
req->flags |= REQ_F_BUF_NODE;
|
|
}
|
|
|
|
int io_files_update(struct io_kiocb *req, unsigned int issue_flags);
|
|
int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
|
|
|
int __io_account_mem(struct user_struct *user, unsigned long nr_pages);
|
|
|
|
static inline void __io_unaccount_mem(struct user_struct *user,
|
|
unsigned long nr_pages)
|
|
{
|
|
atomic_long_sub(nr_pages, &user->locked_vm);
|
|
}
|
|
|
|
void io_vec_free(struct iou_vec *iv);
|
|
int io_vec_realloc(struct iou_vec *iv, unsigned nr_entries);
|
|
|
|
static inline void io_vec_reset_iovec(struct iou_vec *iv,
|
|
struct iovec *iovec, unsigned nr)
|
|
{
|
|
io_vec_free(iv);
|
|
iv->iovec = iovec;
|
|
iv->nr = nr;
|
|
}
|
|
|
|
static inline void io_alloc_cache_vec_kasan(struct iou_vec *iv)
|
|
{
|
|
if (IS_ENABLED(CONFIG_KASAN))
|
|
io_vec_free(iv);
|
|
}
|
|
|
|
#endif
|