mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/
synced 2025-04-19 20:58:31 +09:00
RDMA v6.15 merge window pull request
- Usual minor updates and fixes for bnxt_re, hfi1, rxe, mana, iser, mlx5, vmw_pvrdma, hns - Make rxe work on tun devices - mana gains more standard verbs as it moves toward supporting in-kernel verbs - DMABUF support for mana - Fix page size calculations when memory registration exceeds 4G - On Demand Paging support for rxe - mlx5 support for RDMA TRANSPORT flow tables and a new ucap mechanism to access control use of them - Optional RDMA_TX/RX counters per QP in mlx5 -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRRRCHOFoQz/8F5bUaFwuHvBreFYQUCZ+ap4gAKCRCFwuHvBreF YaFHAP9wyeZCZIbnWaGcbNdbsmkEgy7aTVILRHf1NA7VSJ211gD9Ha60E+mkwtvA i7IJ49R2BdqzKaO9oTutj2Lw+8rABwQ= =qXhh -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma Pull rdma updates from Jason Gunthorpe: - Usual minor updates and fixes for bnxt_re, hfi1, rxe, mana, iser, mlx5, vmw_pvrdma, hns - Make rxe work on tun devices - mana gains more standard verbs as it moves toward supporting in-kernel verbs - DMABUF support for mana - Fix page size calculations when memory registration exceeds 4G - On Demand Paging support for rxe - mlx5 support for RDMA TRANSPORT flow tables and a new ucap mechanism to access control use of them - Optional RDMA_TX/RX counters per QP in mlx5 * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (73 commits) IB/mad: Check available slots before posting receive WRs RDMA/mana_ib: Fix integer overflow during queue creation RDMA/mlx5: Fix calculation of total invalidated pages RDMA/mlx5: Fix mlx5_poll_one() cur_qp update flow RDMA/mlx5: Fix page_size variable overflow RDMA/mlx5: Drop access_flags from _mlx5_mr_cache_alloc() RDMA/mlx5: Fix cache entry update on dereg error RDMA/mlx5: Fix MR cache initialization error flow RDMA/mlx5: Support optional-counters binding for QPs RDMA/mlx5: Compile fs.c regardless of INFINIBAND_USER_ACCESS config RDMA/core: Pass port to counter bind/unbind operations RDMA/core: Add support to optional-counters binding configuration RDMA/core: Create and destroy rdma_counter using rdma_zalloc_drv_obj() RDMA/mlx5: Add optional counters for RDMA_TX/RX_packets/bytes RDMA/core: Fix use-after-free when rename device name RDMA/bnxt_re: Support perf management counters RDMA/rxe: Fix incorrect return value of rxe_odp_atomic_op() RDMA/uverbs: Propagate errors from rdma_lookup_get_uobject() RDMA/mana_ib: Handle net event for pointing to the current netdev net: mana: Change the function signature of mana_get_primary_netdev_rcu ...
This commit is contained in:
commit
092e335082
@ -12,6 +12,7 @@ InfiniBand
|
||||
opa_vnic
|
||||
sysfs
|
||||
tag_matching
|
||||
ucaps
|
||||
user_mad
|
||||
user_verbs
|
||||
|
||||
|
71
Documentation/infiniband/ucaps.rst
Normal file
71
Documentation/infiniband/ucaps.rst
Normal file
@ -0,0 +1,71 @@
|
||||
=================================
|
||||
Infiniband Userspace Capabilities
|
||||
=================================
|
||||
|
||||
User CAPabilities (UCAPs) provide fine-grained control over specific
|
||||
firmware features in Infiniband (IB) devices. This approach offers
|
||||
more granular capabilities than the existing Linux capabilities,
|
||||
which may be too generic for certain FW features.
|
||||
|
||||
Each user capability is represented as a character device with root
|
||||
read-write access. Root processes can grant users special privileges
|
||||
by allowing access to these character devices (e.g., using chown).
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
UCAPs allow control over specific features of an IB device using file
|
||||
descriptors of UCAP character devices. Here is how a user enables
|
||||
specific features of an IB device:
|
||||
|
||||
* A root process grants the user access to the UCAP files that
|
||||
represents the capabilities (e.g., using chown).
|
||||
* The user opens the UCAP files, obtaining file descriptors.
|
||||
* When opening an IB device, include an array of the UCAP file
|
||||
descriptors as an attribute.
|
||||
* The ib_uverbs driver recognizes the UCAP file descriptors and enables
|
||||
the corresponding capabilities for the IB device.
|
||||
|
||||
Creating UCAPs
|
||||
==============
|
||||
|
||||
To create a new UCAP, drivers must first define a type in the
|
||||
rdma_user_cap enum in rdma/ib_ucaps.h. The name of the UCAP character
|
||||
device should be added to the ucap_names array in
|
||||
drivers/infiniband/core/ucaps.c. Then, the driver can create the UCAP
|
||||
character device by calling the ib_create_ucap API with the UCAP
|
||||
type.
|
||||
|
||||
A reference count is stored for each UCAP to track creations and
|
||||
removals of the UCAP device. If multiple creation calls are made with
|
||||
the same type (e.g., for two IB devices), the UCAP character device
|
||||
is created during the first call and subsequent calls increment the
|
||||
reference count.
|
||||
|
||||
The UCAP character device is created under /dev/infiniband, and its
|
||||
permissions are set to allow root read and write access only.
|
||||
|
||||
Removing UCAPs
|
||||
==============
|
||||
|
||||
Each removal decrements the reference count of the UCAP. The UCAP
|
||||
character device is removed from the filesystem only when the
|
||||
reference count is decreased to 0.
|
||||
|
||||
/dev and /sys/class files
|
||||
=========================
|
||||
|
||||
The class::
|
||||
|
||||
/sys/class/infiniband_ucaps
|
||||
|
||||
is created when the first UCAP character device is created.
|
||||
|
||||
The UCAP character device is created under /dev/infiniband.
|
||||
|
||||
For example, if mlx5_ib adds the rdma_user_cap
|
||||
RDMA_UCAP_MLX5_CTRL_LOCAL with name "mlx5_perm_ctrl_local", this will
|
||||
create the device node::
|
||||
|
||||
/dev/infiniband/mlx5_perm_ctrl_local
|
||||
|
@ -39,6 +39,7 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
|
||||
uverbs_std_types_async_fd.o \
|
||||
uverbs_std_types_srq.o \
|
||||
uverbs_std_types_wq.o \
|
||||
uverbs_std_types_qp.o
|
||||
uverbs_std_types_qp.o \
|
||||
ucaps.o
|
||||
ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o
|
||||
ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
|
||||
|
@ -1501,6 +1501,12 @@ ib_cache_update(struct ib_device *device, u32 port, bool update_gids,
|
||||
device->port_data[port].cache.pkey = pkey_cache;
|
||||
}
|
||||
device->port_data[port].cache.lmc = tprops->lmc;
|
||||
|
||||
if (device->port_data[port].cache.port_state != IB_PORT_NOP &&
|
||||
device->port_data[port].cache.port_state != tprops->state)
|
||||
ibdev_info(device, "Port: %d Link %s\n", port,
|
||||
ib_port_state_to_str(tprops->state));
|
||||
|
||||
device->port_data[port].cache.port_state = tprops->state;
|
||||
|
||||
device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
|
||||
|
@ -739,12 +739,26 @@ cma_validate_port(struct ib_device *device, u32 port,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
|
||||
ndev = dev_get_by_index(dev_addr->net, bound_if_index);
|
||||
if (!ndev)
|
||||
goto out;
|
||||
/*
|
||||
* For a RXE device, it should work with TUN device and normal ethernet
|
||||
* devices. Use driver_id to check if a device is a RXE device or not.
|
||||
* ARPHDR_NONE means a TUN device.
|
||||
*/
|
||||
if (device->ops.driver_id == RDMA_DRIVER_RXE) {
|
||||
if ((dev_type == ARPHRD_NONE || dev_type == ARPHRD_ETHER)
|
||||
&& rdma_protocol_roce(device, port)) {
|
||||
ndev = dev_get_by_index(dev_addr->net, bound_if_index);
|
||||
if (!ndev)
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
gid_type = IB_GID_TYPE_IB;
|
||||
if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
|
||||
ndev = dev_get_by_index(dev_addr->net, bound_if_index);
|
||||
if (!ndev)
|
||||
goto out;
|
||||
} else {
|
||||
gid_type = IB_GID_TYPE_IB;
|
||||
}
|
||||
}
|
||||
|
||||
sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
|
||||
|
@ -12,7 +12,8 @@
|
||||
|
||||
static int __counter_set_mode(struct rdma_port_counter *port_counter,
|
||||
enum rdma_nl_counter_mode new_mode,
|
||||
enum rdma_nl_counter_mask new_mask)
|
||||
enum rdma_nl_counter_mask new_mask,
|
||||
bool bind_opcnt)
|
||||
{
|
||||
if (new_mode == RDMA_COUNTER_MODE_AUTO) {
|
||||
if (new_mask & (~ALL_AUTO_MODE_MASKS))
|
||||
@ -23,6 +24,7 @@ static int __counter_set_mode(struct rdma_port_counter *port_counter,
|
||||
|
||||
port_counter->mode.mode = new_mode;
|
||||
port_counter->mode.mask = new_mask;
|
||||
port_counter->mode.bind_opcnt = bind_opcnt;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -41,6 +43,7 @@ static int __counter_set_mode(struct rdma_port_counter *port_counter,
|
||||
*/
|
||||
int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
|
||||
enum rdma_nl_counter_mask mask,
|
||||
bool bind_opcnt,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct rdma_port_counter *port_counter;
|
||||
@ -59,12 +62,13 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
|
||||
RDMA_COUNTER_MODE_NONE;
|
||||
|
||||
if (port_counter->mode.mode == mode &&
|
||||
port_counter->mode.mask == mask) {
|
||||
port_counter->mode.mask == mask &&
|
||||
port_counter->mode.bind_opcnt == bind_opcnt) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = __counter_set_mode(port_counter, mode, mask);
|
||||
ret = __counter_set_mode(port_counter, mode, mask, bind_opcnt);
|
||||
|
||||
out:
|
||||
mutex_unlock(&port_counter->lock);
|
||||
@ -89,7 +93,7 @@ static void auto_mode_init_counter(struct rdma_counter *counter,
|
||||
}
|
||||
|
||||
static int __rdma_counter_bind_qp(struct rdma_counter *counter,
|
||||
struct ib_qp *qp)
|
||||
struct ib_qp *qp, u32 port)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -100,7 +104,7 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter,
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
mutex_lock(&counter->lock);
|
||||
ret = qp->device->ops.counter_bind_qp(counter, qp);
|
||||
ret = qp->device->ops.counter_bind_qp(counter, qp, port);
|
||||
mutex_unlock(&counter->lock);
|
||||
|
||||
return ret;
|
||||
@ -140,7 +144,8 @@ out:
|
||||
|
||||
static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
|
||||
struct ib_qp *qp,
|
||||
enum rdma_nl_counter_mode mode)
|
||||
enum rdma_nl_counter_mode mode,
|
||||
bool bind_opcnt)
|
||||
{
|
||||
struct rdma_port_counter *port_counter;
|
||||
struct rdma_counter *counter;
|
||||
@ -149,13 +154,15 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
|
||||
if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats)
|
||||
return NULL;
|
||||
|
||||
counter = kzalloc(sizeof(*counter), GFP_KERNEL);
|
||||
counter = rdma_zalloc_drv_obj(dev, rdma_counter);
|
||||
if (!counter)
|
||||
return NULL;
|
||||
|
||||
counter->device = dev;
|
||||
counter->port = port;
|
||||
|
||||
dev->ops.counter_init(counter);
|
||||
|
||||
rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
|
||||
counter->stats = dev->ops.counter_alloc_stats(counter);
|
||||
if (!counter->stats)
|
||||
@ -166,7 +173,7 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
|
||||
switch (mode) {
|
||||
case RDMA_COUNTER_MODE_MANUAL:
|
||||
ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL,
|
||||
0);
|
||||
0, bind_opcnt);
|
||||
if (ret) {
|
||||
mutex_unlock(&port_counter->lock);
|
||||
goto err_mode;
|
||||
@ -185,10 +192,11 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
|
||||
mutex_unlock(&port_counter->lock);
|
||||
|
||||
counter->mode.mode = mode;
|
||||
counter->mode.bind_opcnt = bind_opcnt;
|
||||
kref_init(&counter->kref);
|
||||
mutex_init(&counter->lock);
|
||||
|
||||
ret = __rdma_counter_bind_qp(counter, qp);
|
||||
ret = __rdma_counter_bind_qp(counter, qp, port);
|
||||
if (ret)
|
||||
goto err_mode;
|
||||
|
||||
@ -213,7 +221,8 @@ static void rdma_counter_free(struct rdma_counter *counter)
|
||||
port_counter->num_counters--;
|
||||
if (!port_counter->num_counters &&
|
||||
(port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
|
||||
__counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0);
|
||||
__counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0,
|
||||
false);
|
||||
|
||||
mutex_unlock(&port_counter->lock);
|
||||
|
||||
@ -238,7 +247,7 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
|
||||
return match;
|
||||
}
|
||||
|
||||
static int __rdma_counter_unbind_qp(struct ib_qp *qp)
|
||||
static int __rdma_counter_unbind_qp(struct ib_qp *qp, u32 port)
|
||||
{
|
||||
struct rdma_counter *counter = qp->counter;
|
||||
int ret;
|
||||
@ -247,7 +256,7 @@ static int __rdma_counter_unbind_qp(struct ib_qp *qp)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
mutex_lock(&counter->lock);
|
||||
ret = qp->device->ops.counter_unbind_qp(qp);
|
||||
ret = qp->device->ops.counter_unbind_qp(qp, port);
|
||||
mutex_unlock(&counter->lock);
|
||||
|
||||
return ret;
|
||||
@ -339,13 +348,14 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port)
|
||||
|
||||
counter = rdma_get_counter_auto_mode(qp, port);
|
||||
if (counter) {
|
||||
ret = __rdma_counter_bind_qp(counter, qp);
|
||||
ret = __rdma_counter_bind_qp(counter, qp, port);
|
||||
if (ret) {
|
||||
kref_put(&counter->kref, counter_release);
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO);
|
||||
counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO,
|
||||
port_counter->mode.bind_opcnt);
|
||||
if (!counter)
|
||||
return -ENOMEM;
|
||||
}
|
||||
@ -358,7 +368,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port)
|
||||
* @force:
|
||||
* true - Decrease the counter ref-count anyway (e.g., qp destroy)
|
||||
*/
|
||||
int rdma_counter_unbind_qp(struct ib_qp *qp, bool force)
|
||||
int rdma_counter_unbind_qp(struct ib_qp *qp, u32 port, bool force)
|
||||
{
|
||||
struct rdma_counter *counter = qp->counter;
|
||||
int ret;
|
||||
@ -366,7 +376,7 @@ int rdma_counter_unbind_qp(struct ib_qp *qp, bool force)
|
||||
if (!counter)
|
||||
return -EINVAL;
|
||||
|
||||
ret = __rdma_counter_unbind_qp(qp);
|
||||
ret = __rdma_counter_unbind_qp(qp, port);
|
||||
if (ret && !force)
|
||||
return ret;
|
||||
|
||||
@ -513,7 +523,7 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u32 port,
|
||||
goto err_task;
|
||||
}
|
||||
|
||||
ret = __rdma_counter_bind_qp(counter, qp);
|
||||
ret = __rdma_counter_bind_qp(counter, qp, port);
|
||||
if (ret)
|
||||
goto err_task;
|
||||
|
||||
@ -558,7 +568,7 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port,
|
||||
goto err;
|
||||
}
|
||||
|
||||
counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL);
|
||||
counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL, true);
|
||||
if (!counter) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
@ -604,7 +614,7 @@ int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = rdma_counter_unbind_qp(qp, false);
|
||||
ret = rdma_counter_unbind_qp(qp, port, false);
|
||||
|
||||
out:
|
||||
rdma_restrack_put(&qp->res);
|
||||
@ -613,13 +623,15 @@ out:
|
||||
|
||||
int rdma_counter_get_mode(struct ib_device *dev, u32 port,
|
||||
enum rdma_nl_counter_mode *mode,
|
||||
enum rdma_nl_counter_mask *mask)
|
||||
enum rdma_nl_counter_mask *mask,
|
||||
bool *opcnt)
|
||||
{
|
||||
struct rdma_port_counter *port_counter;
|
||||
|
||||
port_counter = &dev->port_data[port].port_counter;
|
||||
*mode = port_counter->mode.mode;
|
||||
*mask = port_counter->mode.mask;
|
||||
*opcnt = port_counter->mode.bind_opcnt;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -528,6 +528,8 @@ static struct class ib_class = {
|
||||
static void rdma_init_coredev(struct ib_core_device *coredev,
|
||||
struct ib_device *dev, struct net *net)
|
||||
{
|
||||
bool is_full_dev = &dev->coredev == coredev;
|
||||
|
||||
/* This BUILD_BUG_ON is intended to catch layout change
|
||||
* of union of ib_core_device and device.
|
||||
* dev must be the first element as ib_core and providers
|
||||
@ -539,6 +541,13 @@ static void rdma_init_coredev(struct ib_core_device *coredev,
|
||||
|
||||
coredev->dev.class = &ib_class;
|
||||
coredev->dev.groups = dev->groups;
|
||||
|
||||
/*
|
||||
* Don't expose hw counters outside of the init namespace.
|
||||
*/
|
||||
if (!is_full_dev && dev->hw_stats_attr_index)
|
||||
coredev->dev.groups[dev->hw_stats_attr_index] = NULL;
|
||||
|
||||
device_initialize(&coredev->dev);
|
||||
coredev->owner = dev;
|
||||
INIT_LIST_HEAD(&coredev->port_list);
|
||||
@ -1341,9 +1350,11 @@ static void ib_device_notify_register(struct ib_device *device)
|
||||
u32 port;
|
||||
int ret;
|
||||
|
||||
down_read(&devices_rwsem);
|
||||
|
||||
ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT);
|
||||
if (ret)
|
||||
return;
|
||||
goto out;
|
||||
|
||||
rdma_for_each_port(device, port) {
|
||||
netdev = ib_device_get_netdev(device, port);
|
||||
@ -1354,8 +1365,11 @@ static void ib_device_notify_register(struct ib_device *device)
|
||||
RDMA_NETDEV_ATTACH_EVENT);
|
||||
dev_put(netdev);
|
||||
if (ret)
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
up_read(&devices_rwsem);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2669,6 +2683,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
|
||||
SET_DEVICE_OP(dev_ops, counter_alloc_stats);
|
||||
SET_DEVICE_OP(dev_ops, counter_bind_qp);
|
||||
SET_DEVICE_OP(dev_ops, counter_dealloc);
|
||||
SET_DEVICE_OP(dev_ops, counter_init);
|
||||
SET_DEVICE_OP(dev_ops, counter_unbind_qp);
|
||||
SET_DEVICE_OP(dev_ops, counter_update_stats);
|
||||
SET_DEVICE_OP(dev_ops, create_ah);
|
||||
@ -2783,6 +2798,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
|
||||
SET_OBJ_SIZE(dev_ops, ib_srq);
|
||||
SET_OBJ_SIZE(dev_ops, ib_ucontext);
|
||||
SET_OBJ_SIZE(dev_ops, ib_xrcd);
|
||||
SET_OBJ_SIZE(dev_ops, rdma_counter);
|
||||
}
|
||||
EXPORT_SYMBOL(ib_set_device_ops);
|
||||
|
||||
|
@ -109,7 +109,9 @@ static struct ctl_table iwcm_ctl_table[] = {
|
||||
.data = &default_backlog,
|
||||
.maxlen = sizeof(default_backlog),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_INT_MAX,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -2671,11 +2671,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
|
||||
struct ib_mad_private *mad)
|
||||
{
|
||||
unsigned long flags;
|
||||
int post, ret;
|
||||
struct ib_mad_private *mad_priv;
|
||||
struct ib_sge sg_list;
|
||||
struct ib_recv_wr recv_wr;
|
||||
struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
|
||||
int ret = 0;
|
||||
|
||||
/* Initialize common scatter list fields */
|
||||
sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey;
|
||||
@ -2685,7 +2685,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
|
||||
recv_wr.sg_list = &sg_list;
|
||||
recv_wr.num_sge = 1;
|
||||
|
||||
do {
|
||||
while (true) {
|
||||
/* Allocate and map receive buffer */
|
||||
if (mad) {
|
||||
mad_priv = mad;
|
||||
@ -2693,10 +2693,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
|
||||
} else {
|
||||
mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
|
||||
GFP_ATOMIC);
|
||||
if (!mad_priv) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
if (!mad_priv)
|
||||
return -ENOMEM;
|
||||
}
|
||||
sg_list.length = mad_priv_dma_size(mad_priv);
|
||||
sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
|
||||
@ -2705,37 +2703,41 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
|
||||
DMA_FROM_DEVICE);
|
||||
if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
|
||||
sg_list.addr))) {
|
||||
kfree(mad_priv);
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
goto free_mad_priv;
|
||||
}
|
||||
mad_priv->header.mapping = sg_list.addr;
|
||||
mad_priv->header.mad_list.mad_queue = recv_queue;
|
||||
mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
|
||||
recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
|
||||
|
||||
/* Post receive WR */
|
||||
spin_lock_irqsave(&recv_queue->lock, flags);
|
||||
post = (++recv_queue->count < recv_queue->max_active);
|
||||
list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
|
||||
if (recv_queue->count >= recv_queue->max_active) {
|
||||
/* Fully populated the receive queue */
|
||||
spin_unlock_irqrestore(&recv_queue->lock, flags);
|
||||
break;
|
||||
}
|
||||
recv_queue->count++;
|
||||
list_add_tail(&mad_priv->header.mad_list.list,
|
||||
&recv_queue->list);
|
||||
spin_unlock_irqrestore(&recv_queue->lock, flags);
|
||||
|
||||
ret = ib_post_recv(qp_info->qp, &recv_wr, NULL);
|
||||
if (ret) {
|
||||
spin_lock_irqsave(&recv_queue->lock, flags);
|
||||
list_del(&mad_priv->header.mad_list.list);
|
||||
recv_queue->count--;
|
||||
spin_unlock_irqrestore(&recv_queue->lock, flags);
|
||||
ib_dma_unmap_single(qp_info->port_priv->device,
|
||||
mad_priv->header.mapping,
|
||||
mad_priv_dma_size(mad_priv),
|
||||
DMA_FROM_DEVICE);
|
||||
kfree(mad_priv);
|
||||
dev_err(&qp_info->port_priv->device->dev,
|
||||
"ib_post_recv failed: %d\n", ret);
|
||||
break;
|
||||
}
|
||||
} while (post);
|
||||
}
|
||||
|
||||
ib_dma_unmap_single(qp_info->port_priv->device,
|
||||
mad_priv->header.mapping,
|
||||
mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE);
|
||||
free_mad_priv:
|
||||
kfree(mad_priv);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -171,6 +171,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
|
||||
[RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING },
|
||||
[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 },
|
||||
[RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 },
|
||||
[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 },
|
||||
};
|
||||
|
||||
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
|
||||
@ -2028,6 +2029,7 @@ static int nldev_stat_set_mode_doit(struct sk_buff *msg,
|
||||
struct ib_device *device, u32 port)
|
||||
{
|
||||
u32 mode, mask = 0, qpn, cntn = 0;
|
||||
bool opcnt = false;
|
||||
int ret;
|
||||
|
||||
/* Currently only counter for QP is supported */
|
||||
@ -2035,12 +2037,17 @@ static int nldev_stat_set_mode_doit(struct sk_buff *msg,
|
||||
nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
|
||||
return -EINVAL;
|
||||
|
||||
if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED])
|
||||
opcnt = !!nla_get_u8(
|
||||
tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]);
|
||||
|
||||
mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
|
||||
if (mode == RDMA_COUNTER_MODE_AUTO) {
|
||||
if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
|
||||
mask = nla_get_u32(
|
||||
tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
|
||||
return rdma_counter_set_auto_mode(device, port, mask, extack);
|
||||
return rdma_counter_set_auto_mode(device, port, mask, opcnt,
|
||||
extack);
|
||||
}
|
||||
|
||||
if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
|
||||
@ -2358,6 +2365,7 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||
struct ib_device *device;
|
||||
struct sk_buff *msg;
|
||||
u32 index, port;
|
||||
bool opcnt;
|
||||
int ret;
|
||||
|
||||
if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
|
||||
@ -2393,7 +2401,7 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||
goto err_msg;
|
||||
}
|
||||
|
||||
ret = rdma_counter_get_mode(device, port, &mode, &mask);
|
||||
ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt);
|
||||
if (ret)
|
||||
goto err_msg;
|
||||
|
||||
@ -2410,6 +2418,12 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||
goto err_msg;
|
||||
}
|
||||
|
||||
if ((mode == RDMA_COUNTER_MODE_AUTO) &&
|
||||
nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) {
|
||||
ret = -EMSGSIZE;
|
||||
goto err_msg;
|
||||
}
|
||||
|
||||
nlmsg_end(msg, nlh);
|
||||
ib_device_put(device);
|
||||
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
|
||||
|
@ -216,24 +216,12 @@ static ssize_t state_show(struct ib_device *ibdev, u32 port_num,
|
||||
struct ib_port_attr attr;
|
||||
ssize_t ret;
|
||||
|
||||
static const char *state_name[] = {
|
||||
[IB_PORT_NOP] = "NOP",
|
||||
[IB_PORT_DOWN] = "DOWN",
|
||||
[IB_PORT_INIT] = "INIT",
|
||||
[IB_PORT_ARMED] = "ARMED",
|
||||
[IB_PORT_ACTIVE] = "ACTIVE",
|
||||
[IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER"
|
||||
};
|
||||
|
||||
ret = ib_query_port(ibdev, port_num, &attr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return sysfs_emit(buf, "%d: %s\n", attr.state,
|
||||
attr.state >= 0 &&
|
||||
attr.state < ARRAY_SIZE(state_name) ?
|
||||
state_name[attr.state] :
|
||||
"UNKNOWN");
|
||||
ib_port_state_to_str(attr.state));
|
||||
}
|
||||
|
||||
static ssize_t lid_show(struct ib_device *ibdev, u32 port_num,
|
||||
@ -988,6 +976,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
|
||||
for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++)
|
||||
if (!ibdev->groups[i]) {
|
||||
ibdev->groups[i] = &data->group;
|
||||
ibdev->hw_stats_attr_index = i;
|
||||
return 0;
|
||||
}
|
||||
WARN(true, "struct ib_device->groups is too small");
|
||||
|
267
drivers/infiniband/core/ucaps.c
Normal file
267
drivers/infiniband/core/ucaps.c
Normal file
@ -0,0 +1,267 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
||||
/*
|
||||
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
|
||||
*/
|
||||
|
||||
#include <linux/kref.h>
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <rdma/ib_ucaps.h>
|
||||
|
||||
#define RDMA_UCAP_FIRST RDMA_UCAP_MLX5_CTRL_LOCAL
|
||||
|
||||
static DEFINE_MUTEX(ucaps_mutex);
|
||||
static struct ib_ucap *ucaps_list[RDMA_UCAP_MAX];
|
||||
static bool ucaps_class_is_registered;
|
||||
static dev_t ucaps_base_dev;
|
||||
|
||||
struct ib_ucap {
|
||||
struct cdev cdev;
|
||||
struct device dev;
|
||||
struct kref ref;
|
||||
};
|
||||
|
||||
static const char *ucap_names[RDMA_UCAP_MAX] = {
|
||||
[RDMA_UCAP_MLX5_CTRL_LOCAL] = "mlx5_perm_ctrl_local",
|
||||
[RDMA_UCAP_MLX5_CTRL_OTHER_VHCA] = "mlx5_perm_ctrl_other_vhca"
|
||||
};
|
||||
|
||||
static char *ucaps_devnode(const struct device *dev, umode_t *mode)
|
||||
{
|
||||
if (mode)
|
||||
*mode = 0600;
|
||||
|
||||
return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
|
||||
}
|
||||
|
||||
static const struct class ucaps_class = {
|
||||
.name = "infiniband_ucaps",
|
||||
.devnode = ucaps_devnode,
|
||||
};
|
||||
|
||||
static const struct file_operations ucaps_cdev_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = simple_open,
|
||||
};
|
||||
|
||||
/**
|
||||
* ib_cleanup_ucaps - cleanup all API resources and class.
|
||||
*
|
||||
* This is called once, when removing the ib_uverbs module.
|
||||
*/
|
||||
void ib_cleanup_ucaps(void)
|
||||
{
|
||||
mutex_lock(&ucaps_mutex);
|
||||
if (!ucaps_class_is_registered) {
|
||||
mutex_unlock(&ucaps_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = RDMA_UCAP_FIRST; i < RDMA_UCAP_MAX; i++)
|
||||
WARN_ON(ucaps_list[i]);
|
||||
|
||||
class_unregister(&ucaps_class);
|
||||
ucaps_class_is_registered = false;
|
||||
unregister_chrdev_region(ucaps_base_dev, RDMA_UCAP_MAX);
|
||||
mutex_unlock(&ucaps_mutex);
|
||||
}
|
||||
|
||||
static int get_ucap_from_devt(dev_t devt, u64 *idx_mask)
|
||||
{
|
||||
for (int type = RDMA_UCAP_FIRST; type < RDMA_UCAP_MAX; type++) {
|
||||
if (ucaps_list[type] && ucaps_list[type]->dev.devt == devt) {
|
||||
*idx_mask |= 1 << type;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int get_devt_from_fd(unsigned int fd, dev_t *ret_dev)
|
||||
{
|
||||
struct file *file;
|
||||
|
||||
file = fget(fd);
|
||||
if (!file)
|
||||
return -EBADF;
|
||||
|
||||
*ret_dev = file_inode(file)->i_rdev;
|
||||
fput(file);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ib_ucaps_init - Initialization required before ucap creation.
|
||||
*
|
||||
* Return: 0 on success, or a negative errno value on failure
|
||||
*/
|
||||
static int ib_ucaps_init(void)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (ucaps_class_is_registered)
|
||||
return ret;
|
||||
|
||||
ret = class_register(&ucaps_class);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = alloc_chrdev_region(&ucaps_base_dev, 0, RDMA_UCAP_MAX,
|
||||
ucaps_class.name);
|
||||
if (ret < 0) {
|
||||
class_unregister(&ucaps_class);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ucaps_class_is_registered = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ucap_dev_release(struct device *device)
|
||||
{
|
||||
struct ib_ucap *ucap = container_of(device, struct ib_ucap, dev);
|
||||
|
||||
kfree(ucap);
|
||||
}
|
||||
|
||||
/**
|
||||
* ib_create_ucap - Add a ucap character device
|
||||
* @type: UCAP type
|
||||
*
|
||||
* Creates a ucap character device in the /dev/infiniband directory. By default,
|
||||
* the device has root-only read-write access.
|
||||
*
|
||||
* A driver may call this multiple times with the same UCAP type. A reference
|
||||
* count tracks creations and deletions.
|
||||
*
|
||||
* Return: 0 on success, or a negative errno value on failure
|
||||
*/
|
||||
int ib_create_ucap(enum rdma_user_cap type)
|
||||
{
|
||||
struct ib_ucap *ucap;
|
||||
int ret;
|
||||
|
||||
if (type >= RDMA_UCAP_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&ucaps_mutex);
|
||||
ret = ib_ucaps_init();
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ucap = ucaps_list[type];
|
||||
if (ucap) {
|
||||
kref_get(&ucap->ref);
|
||||
mutex_unlock(&ucaps_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ucap = kzalloc(sizeof(*ucap), GFP_KERNEL);
|
||||
if (!ucap) {
|
||||
ret = -ENOMEM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
device_initialize(&ucap->dev);
|
||||
ucap->dev.class = &ucaps_class;
|
||||
ucap->dev.devt = MKDEV(MAJOR(ucaps_base_dev), type);
|
||||
ucap->dev.release = ucap_dev_release;
|
||||
ret = dev_set_name(&ucap->dev, ucap_names[type]);
|
||||
if (ret)
|
||||
goto err_device;
|
||||
|
||||
cdev_init(&ucap->cdev, &ucaps_cdev_fops);
|
||||
ucap->cdev.owner = THIS_MODULE;
|
||||
|
||||
ret = cdev_device_add(&ucap->cdev, &ucap->dev);
|
||||
if (ret)
|
||||
goto err_device;
|
||||
|
||||
kref_init(&ucap->ref);
|
||||
ucaps_list[type] = ucap;
|
||||
mutex_unlock(&ucaps_mutex);
|
||||
|
||||
return 0;
|
||||
|
||||
err_device:
|
||||
put_device(&ucap->dev);
|
||||
unlock:
|
||||
mutex_unlock(&ucaps_mutex);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(ib_create_ucap);
|
||||
|
||||
static void ib_release_ucap(struct kref *ref)
|
||||
{
|
||||
struct ib_ucap *ucap = container_of(ref, struct ib_ucap, ref);
|
||||
enum rdma_user_cap type;
|
||||
|
||||
for (type = RDMA_UCAP_FIRST; type < RDMA_UCAP_MAX; type++) {
|
||||
if (ucaps_list[type] == ucap)
|
||||
break;
|
||||
}
|
||||
WARN_ON(type == RDMA_UCAP_MAX);
|
||||
|
||||
ucaps_list[type] = NULL;
|
||||
cdev_device_del(&ucap->cdev, &ucap->dev);
|
||||
put_device(&ucap->dev);
|
||||
}
|
||||
|
||||
/**
|
||||
* ib_remove_ucap - Remove a ucap character device
|
||||
* @type: User cap type
|
||||
*
|
||||
* Removes the ucap character device according to type. The device is completely
|
||||
* removed from the filesystem when its reference count reaches 0.
|
||||
*/
|
||||
void ib_remove_ucap(enum rdma_user_cap type)
|
||||
{
|
||||
struct ib_ucap *ucap;
|
||||
|
||||
mutex_lock(&ucaps_mutex);
|
||||
ucap = ucaps_list[type];
|
||||
if (WARN_ON(!ucap))
|
||||
goto end;
|
||||
|
||||
kref_put(&ucap->ref, ib_release_ucap);
|
||||
end:
|
||||
mutex_unlock(&ucaps_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL(ib_remove_ucap);
|
||||
|
||||
/**
|
||||
* ib_get_ucaps - Get bitmask of ucap types from file descriptors
|
||||
* @fds: Array of file descriptors
|
||||
* @fd_count: Number of file descriptors in the array
|
||||
* @idx_mask: Bitmask to be updated based on the ucaps in the fd list
|
||||
*
|
||||
* Given an array of file descriptors, this function returns a bitmask of
|
||||
* the ucaps where a bit is set if an FD for that ucap type was in the array.
|
||||
*
|
||||
* Return: 0 on success, or a negative errno value on failure
|
||||
*/
|
||||
int ib_get_ucaps(int *fds, int fd_count, uint64_t *idx_mask)
|
||||
{
|
||||
int ret = 0;
|
||||
dev_t dev;
|
||||
|
||||
*idx_mask = 0;
|
||||
mutex_lock(&ucaps_mutex);
|
||||
for (int i = 0; i < fd_count; i++) {
|
||||
ret = get_devt_from_fd(fds[i], &dev);
|
||||
if (ret)
|
||||
goto end;
|
||||
|
||||
ret = get_ucap_from_devt(dev, idx_mask);
|
||||
if (ret)
|
||||
goto end;
|
||||
}
|
||||
|
||||
end:
|
||||
mutex_unlock(&ucaps_mutex);
|
||||
return ret;
|
||||
}
|
@ -69,7 +69,9 @@ static struct ctl_table ucma_ctl_table[] = {
|
||||
.data = &max_backlog,
|
||||
.maxlen = sizeof max_backlog,
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_INT_MAX,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -80,9 +80,12 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
|
||||
unsigned long pgsz_bitmap,
|
||||
unsigned long virt)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
unsigned long curr_len = 0;
|
||||
dma_addr_t curr_base = ~0;
|
||||
unsigned long va, pgoff;
|
||||
struct scatterlist *sg;
|
||||
dma_addr_t mask;
|
||||
dma_addr_t end;
|
||||
int i;
|
||||
|
||||
umem->iova = va = virt;
|
||||
@ -107,17 +110,30 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
|
||||
pgoff = umem->address & ~PAGE_MASK;
|
||||
|
||||
for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
|
||||
/* Walk SGL and reduce max page size if VA/PA bits differ
|
||||
* for any address.
|
||||
/* If the current entry is physically contiguous with the previous
|
||||
* one, no need to take its start addresses into consideration.
|
||||
*/
|
||||
mask |= (sg_dma_address(sg) + pgoff) ^ va;
|
||||
if (check_add_overflow(curr_base, curr_len, &end) ||
|
||||
end != sg_dma_address(sg)) {
|
||||
|
||||
curr_base = sg_dma_address(sg);
|
||||
curr_len = 0;
|
||||
|
||||
/* Reduce max page size if VA/PA bits differ */
|
||||
mask |= (curr_base + pgoff) ^ va;
|
||||
|
||||
/* The alignment of any VA matching a discontinuity point
|
||||
* in the physical memory sets the maximum possible page
|
||||
* size as this must be a starting point of a new page that
|
||||
* needs to be aligned.
|
||||
*/
|
||||
if (i != 0)
|
||||
mask |= va;
|
||||
}
|
||||
|
||||
curr_len += sg_dma_len(sg);
|
||||
va += sg_dma_len(sg) - pgoff;
|
||||
/* Except for the last entry, the ending iova alignment sets
|
||||
* the maximum possible page size as the low bits of the iova
|
||||
* must be zero when starting the next chunk.
|
||||
*/
|
||||
if (i != (umem->sgt_append.sgt.nents - 1))
|
||||
mask |= va;
|
||||
|
||||
pgoff = 0;
|
||||
}
|
||||
|
||||
|
@ -42,6 +42,7 @@
|
||||
|
||||
#include <rdma/uverbs_types.h>
|
||||
#include <rdma/uverbs_std_types.h>
|
||||
#include <rdma/ib_ucaps.h>
|
||||
#include "rdma_core.h"
|
||||
|
||||
#include "uverbs.h"
|
||||
@ -232,6 +233,8 @@ int ib_init_ucontext(struct uverbs_attr_bundle *attrs)
|
||||
{
|
||||
struct ib_ucontext *ucontext = attrs->context;
|
||||
struct ib_uverbs_file *file = attrs->ufile;
|
||||
int *fd_array;
|
||||
int fd_count;
|
||||
int ret;
|
||||
|
||||
if (!down_read_trylock(&file->hw_destroy_rwsem))
|
||||
@ -247,6 +250,22 @@ int ib_init_ucontext(struct uverbs_attr_bundle *attrs)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_GET_CONTEXT_FD_ARR)) {
|
||||
fd_count = uverbs_attr_ptr_get_array_size(attrs,
|
||||
UVERBS_ATTR_GET_CONTEXT_FD_ARR,
|
||||
sizeof(int));
|
||||
if (fd_count < 0) {
|
||||
ret = fd_count;
|
||||
goto err_uncharge;
|
||||
}
|
||||
|
||||
fd_array = uverbs_attr_get_alloced_ptr(attrs,
|
||||
UVERBS_ATTR_GET_CONTEXT_FD_ARR);
|
||||
ret = ib_get_ucaps(fd_array, fd_count, &ucontext->enabled_caps);
|
||||
if (ret)
|
||||
goto err_uncharge;
|
||||
}
|
||||
|
||||
ret = ucontext->device->ops.alloc_ucontext(ucontext,
|
||||
&attrs->driver_udata);
|
||||
if (ret)
|
||||
@ -716,8 +735,8 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
|
||||
goto err_free;
|
||||
|
||||
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
|
||||
if (!pd) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(pd)) {
|
||||
ret = PTR_ERR(pd);
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
@ -807,8 +826,8 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
|
||||
if (cmd.flags & IB_MR_REREG_PD) {
|
||||
new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
|
||||
attrs);
|
||||
if (!new_pd) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(new_pd)) {
|
||||
ret = PTR_ERR(new_pd);
|
||||
goto put_uobjs;
|
||||
}
|
||||
} else {
|
||||
@ -917,8 +936,8 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
|
||||
return PTR_ERR(uobj);
|
||||
|
||||
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
|
||||
if (!pd) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(pd)) {
|
||||
ret = PTR_ERR(pd);
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
@ -1125,8 +1144,8 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
|
||||
return ret;
|
||||
|
||||
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
|
||||
if (!cq)
|
||||
return -EINVAL;
|
||||
if (IS_ERR(cq))
|
||||
return PTR_ERR(cq);
|
||||
|
||||
ret = cq->device->ops.resize_cq(cq, cmd.cqe, &attrs->driver_udata);
|
||||
if (ret)
|
||||
@ -1187,8 +1206,8 @@ static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
|
||||
return ret;
|
||||
|
||||
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
|
||||
if (!cq)
|
||||
return -EINVAL;
|
||||
if (IS_ERR(cq))
|
||||
return PTR_ERR(cq);
|
||||
|
||||
/* we copy a struct ib_uverbs_poll_cq_resp to user space */
|
||||
header_ptr = attrs->ucore.outbuf;
|
||||
@ -1236,8 +1255,8 @@ static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs)
|
||||
return ret;
|
||||
|
||||
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
|
||||
if (!cq)
|
||||
return -EINVAL;
|
||||
if (IS_ERR(cq))
|
||||
return PTR_ERR(cq);
|
||||
|
||||
ib_req_notify_cq(cq, cmd.solicited_only ?
|
||||
IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
|
||||
@ -1319,8 +1338,8 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
|
||||
ind_tbl = uobj_get_obj_read(rwq_ind_table,
|
||||
UVERBS_OBJECT_RWQ_IND_TBL,
|
||||
cmd->rwq_ind_tbl_handle, attrs);
|
||||
if (!ind_tbl) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(ind_tbl)) {
|
||||
ret = PTR_ERR(ind_tbl);
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
@ -1358,8 +1377,10 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
|
||||
if (cmd->is_srq) {
|
||||
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
|
||||
cmd->srq_handle, attrs);
|
||||
if (!srq || srq->srq_type == IB_SRQT_XRC) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(srq) ||
|
||||
srq->srq_type == IB_SRQT_XRC) {
|
||||
ret = IS_ERR(srq) ? PTR_ERR(srq) :
|
||||
-EINVAL;
|
||||
goto err_put;
|
||||
}
|
||||
}
|
||||
@ -1369,23 +1390,29 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
|
||||
rcq = uobj_get_obj_read(
|
||||
cq, UVERBS_OBJECT_CQ,
|
||||
cmd->recv_cq_handle, attrs);
|
||||
if (!rcq) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(rcq)) {
|
||||
ret = PTR_ERR(rcq);
|
||||
goto err_put;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (has_sq)
|
||||
if (has_sq) {
|
||||
scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
|
||||
cmd->send_cq_handle, attrs);
|
||||
if (IS_ERR(scq)) {
|
||||
ret = PTR_ERR(scq);
|
||||
goto err_put;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ind_tbl && cmd->qp_type != IB_QPT_XRC_INI)
|
||||
rcq = rcq ?: scq;
|
||||
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
|
||||
attrs);
|
||||
if (!pd || (!scq && has_sq)) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(pd)) {
|
||||
ret = PTR_ERR(pd);
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
@ -1480,18 +1507,18 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
|
||||
err_put:
|
||||
if (!IS_ERR(xrcd_uobj))
|
||||
uobj_put_read(xrcd_uobj);
|
||||
if (pd)
|
||||
if (!IS_ERR_OR_NULL(pd))
|
||||
uobj_put_obj_read(pd);
|
||||
if (scq)
|
||||
if (!IS_ERR_OR_NULL(scq))
|
||||
rdma_lookup_put_uobject(&scq->uobject->uevent.uobject,
|
||||
UVERBS_LOOKUP_READ);
|
||||
if (rcq && rcq != scq)
|
||||
if (!IS_ERR_OR_NULL(rcq) && rcq != scq)
|
||||
rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject,
|
||||
UVERBS_LOOKUP_READ);
|
||||
if (srq)
|
||||
if (!IS_ERR_OR_NULL(srq))
|
||||
rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
|
||||
UVERBS_LOOKUP_READ);
|
||||
if (ind_tbl)
|
||||
if (!IS_ERR_OR_NULL(ind_tbl))
|
||||
uobj_put_obj_read(ind_tbl);
|
||||
|
||||
uobj_alloc_abort(&obj->uevent.uobject, attrs);
|
||||
@ -1653,8 +1680,8 @@ static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs)
|
||||
}
|
||||
|
||||
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
|
||||
if (!qp) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(qp)) {
|
||||
ret = PTR_ERR(qp);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1759,8 +1786,8 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
|
||||
|
||||
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle,
|
||||
attrs);
|
||||
if (!qp) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(qp)) {
|
||||
ret = PTR_ERR(qp);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2026,8 +2053,8 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
|
||||
return -ENOMEM;
|
||||
|
||||
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
|
||||
if (!qp) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(qp)) {
|
||||
ret = PTR_ERR(qp);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2064,9 +2091,9 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
|
||||
|
||||
ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
|
||||
user_wr->wr.ud.ah, attrs);
|
||||
if (!ud->ah) {
|
||||
if (IS_ERR(ud->ah)) {
|
||||
ret = PTR_ERR(ud->ah);
|
||||
kfree(ud);
|
||||
ret = -EINVAL;
|
||||
goto out_put;
|
||||
}
|
||||
ud->remote_qpn = user_wr->wr.ud.remote_qpn;
|
||||
@ -2303,8 +2330,8 @@ static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs)
|
||||
return PTR_ERR(wr);
|
||||
|
||||
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
|
||||
if (!qp) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(qp)) {
|
||||
ret = PTR_ERR(qp);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2354,8 +2381,8 @@ static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs)
|
||||
return PTR_ERR(wr);
|
||||
|
||||
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
|
||||
if (!srq) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(srq)) {
|
||||
ret = PTR_ERR(srq);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2411,8 +2438,8 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
|
||||
}
|
||||
|
||||
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
|
||||
if (!pd) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(pd)) {
|
||||
ret = PTR_ERR(pd);
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -2481,8 +2508,8 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
|
||||
return ret;
|
||||
|
||||
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
|
||||
if (!qp)
|
||||
return -EINVAL;
|
||||
if (IS_ERR(qp))
|
||||
return PTR_ERR(qp);
|
||||
|
||||
obj = qp->uobject;
|
||||
|
||||
@ -2531,8 +2558,8 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
|
||||
return ret;
|
||||
|
||||
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
|
||||
if (!qp)
|
||||
return -EINVAL;
|
||||
if (IS_ERR(qp))
|
||||
return PTR_ERR(qp);
|
||||
|
||||
obj = qp->uobject;
|
||||
mutex_lock(&obj->mcast_lock);
|
||||
@ -2666,8 +2693,8 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
|
||||
UVERBS_OBJECT_FLOW_ACTION,
|
||||
kern_spec->action.handle,
|
||||
attrs);
|
||||
if (!ib_spec->action.act)
|
||||
return -EINVAL;
|
||||
if (IS_ERR(ib_spec->action.act))
|
||||
return PTR_ERR(ib_spec->action.act);
|
||||
ib_spec->action.size =
|
||||
sizeof(struct ib_flow_spec_action_handle);
|
||||
flow_resources_add(uflow_res,
|
||||
@ -2684,8 +2711,8 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
|
||||
UVERBS_OBJECT_COUNTERS,
|
||||
kern_spec->flow_count.handle,
|
||||
attrs);
|
||||
if (!ib_spec->flow_count.counters)
|
||||
return -EINVAL;
|
||||
if (IS_ERR(ib_spec->flow_count.counters))
|
||||
return PTR_ERR(ib_spec->flow_count.counters);
|
||||
ib_spec->flow_count.size =
|
||||
sizeof(struct ib_flow_spec_action_count);
|
||||
flow_resources_add(uflow_res,
|
||||
@ -2903,14 +2930,14 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
|
||||
return PTR_ERR(obj);
|
||||
|
||||
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
|
||||
if (!pd) {
|
||||
err = -EINVAL;
|
||||
if (IS_ERR(pd)) {
|
||||
err = PTR_ERR(pd);
|
||||
goto err_uobj;
|
||||
}
|
||||
|
||||
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
|
||||
if (!cq) {
|
||||
err = -EINVAL;
|
||||
if (IS_ERR(cq)) {
|
||||
err = PTR_ERR(cq);
|
||||
goto err_put_pd;
|
||||
}
|
||||
|
||||
@ -3011,8 +3038,8 @@ static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs)
|
||||
return -EINVAL;
|
||||
|
||||
wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
|
||||
if (!wq)
|
||||
return -EINVAL;
|
||||
if (IS_ERR(wq))
|
||||
return PTR_ERR(wq);
|
||||
|
||||
if (cmd.attr_mask & IB_WQ_FLAGS) {
|
||||
wq_attr.flags = cmd.flags;
|
||||
@ -3095,8 +3122,8 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
|
||||
num_read_wqs++) {
|
||||
wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
|
||||
wqs_handles[num_read_wqs], attrs);
|
||||
if (!wq) {
|
||||
err = -EINVAL;
|
||||
if (IS_ERR(wq)) {
|
||||
err = PTR_ERR(wq);
|
||||
goto put_wqs;
|
||||
}
|
||||
|
||||
@ -3251,8 +3278,8 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
|
||||
}
|
||||
|
||||
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
|
||||
if (!qp) {
|
||||
err = -EINVAL;
|
||||
if (IS_ERR(qp)) {
|
||||
err = PTR_ERR(qp);
|
||||
goto err_uobj;
|
||||
}
|
||||
|
||||
@ -3398,15 +3425,15 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
|
||||
if (ib_srq_has_cq(cmd->srq_type)) {
|
||||
attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
|
||||
cmd->cq_handle, attrs);
|
||||
if (!attr.ext.cq) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(attr.ext.cq)) {
|
||||
ret = PTR_ERR(attr.ext.cq);
|
||||
goto err_put_xrcd;
|
||||
}
|
||||
}
|
||||
|
||||
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs);
|
||||
if (!pd) {
|
||||
ret = -EINVAL;
|
||||
if (IS_ERR(pd)) {
|
||||
ret = PTR_ERR(pd);
|
||||
goto err_put_cq;
|
||||
}
|
||||
|
||||
@ -3513,8 +3540,8 @@ static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs)
|
||||
return ret;
|
||||
|
||||
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
|
||||
if (!srq)
|
||||
return -EINVAL;
|
||||
if (IS_ERR(srq))
|
||||
return PTR_ERR(srq);
|
||||
|
||||
attr.max_wr = cmd.max_wr;
|
||||
attr.srq_limit = cmd.srq_limit;
|
||||
@ -3541,8 +3568,8 @@ static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs)
|
||||
return ret;
|
||||
|
||||
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
|
||||
if (!srq)
|
||||
return -EINVAL;
|
||||
if (IS_ERR(srq))
|
||||
return PTR_ERR(srq);
|
||||
|
||||
ret = ib_query_srq(srq, &attr);
|
||||
|
||||
@ -3667,8 +3694,8 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
|
||||
if (!cq)
|
||||
return -EINVAL;
|
||||
if (IS_ERR(cq))
|
||||
return PTR_ERR(cq);
|
||||
|
||||
ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period);
|
||||
|
||||
|
@ -52,6 +52,7 @@
|
||||
#include <rdma/ib.h>
|
||||
#include <rdma/uverbs_std_types.h>
|
||||
#include <rdma/rdma_netlink.h>
|
||||
#include <rdma/ib_ucaps.h>
|
||||
|
||||
#include "uverbs.h"
|
||||
#include "core_priv.h"
|
||||
@ -1345,6 +1346,7 @@ static void __exit ib_uverbs_cleanup(void)
|
||||
IB_UVERBS_NUM_FIXED_MINOR);
|
||||
unregister_chrdev_region(dynamic_uverbs_dev,
|
||||
IB_UVERBS_NUM_DYNAMIC_MINOR);
|
||||
ib_cleanup_ucaps();
|
||||
mmu_notifier_synchronize();
|
||||
}
|
||||
|
||||
|
@ -437,6 +437,10 @@ DECLARE_UVERBS_NAMED_METHOD(
|
||||
UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
|
||||
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
|
||||
UVERBS_ATTR_TYPE(u64), UA_OPTIONAL),
|
||||
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_GET_CONTEXT_FD_ARR,
|
||||
UVERBS_ATTR_MIN_SIZE(sizeof(int)),
|
||||
UA_OPTIONAL,
|
||||
UA_ALLOC_AND_COPY),
|
||||
UVERBS_ATTR_UHW());
|
||||
|
||||
DECLARE_UVERBS_NAMED_METHOD(
|
||||
|
@ -2105,7 +2105,7 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
|
||||
if (!qp->uobject)
|
||||
rdma_rw_cleanup_mrs(qp);
|
||||
|
||||
rdma_counter_unbind_qp(qp, true);
|
||||
rdma_counter_unbind_qp(qp, qp->port, true);
|
||||
ret = qp->device->ops.destroy_qp(qp, udata);
|
||||
if (ret) {
|
||||
if (sec)
|
||||
@ -3109,22 +3109,23 @@ EXPORT_SYMBOL(__rdma_block_iter_start);
|
||||
bool __rdma_block_iter_next(struct ib_block_iter *biter)
|
||||
{
|
||||
unsigned int block_offset;
|
||||
unsigned int sg_delta;
|
||||
unsigned int delta;
|
||||
|
||||
if (!biter->__sg_nents || !biter->__sg)
|
||||
return false;
|
||||
|
||||
biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance;
|
||||
block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1);
|
||||
sg_delta = BIT_ULL(biter->__pg_bit) - block_offset;
|
||||
delta = BIT_ULL(biter->__pg_bit) - block_offset;
|
||||
|
||||
if (sg_dma_len(biter->__sg) - biter->__sg_advance > sg_delta) {
|
||||
biter->__sg_advance += sg_delta;
|
||||
} else {
|
||||
while (biter->__sg_nents && biter->__sg &&
|
||||
sg_dma_len(biter->__sg) - biter->__sg_advance <= delta) {
|
||||
delta -= sg_dma_len(biter->__sg) - biter->__sg_advance;
|
||||
biter->__sg_advance = 0;
|
||||
biter->__sg = sg_next(biter->__sg);
|
||||
biter->__sg_nents--;
|
||||
}
|
||||
biter->__sg_advance += delta;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -225,6 +225,8 @@ struct bnxt_re_dev {
|
||||
unsigned long event_bitmap;
|
||||
struct bnxt_qplib_cc_param cc_param;
|
||||
struct workqueue_struct *dcb_wq;
|
||||
struct dentry *cc_config;
|
||||
struct bnxt_re_dbg_cc_config_params *cc_config_params;
|
||||
};
|
||||
|
||||
#define to_bnxt_re_dev(ptr, member) \
|
||||
@ -237,6 +239,10 @@ struct bnxt_re_dev {
|
||||
#define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT))
|
||||
void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev);
|
||||
|
||||
int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad);
|
||||
int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev,
|
||||
struct ib_mad *out_mad);
|
||||
|
||||
static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
|
||||
{
|
||||
if (rdev)
|
||||
|
@ -22,6 +22,23 @@
|
||||
|
||||
static struct dentry *bnxt_re_debugfs_root;
|
||||
|
||||
static const char * const bnxt_re_cc_gen0_name[] = {
|
||||
"enable_cc",
|
||||
"run_avg_weight_g",
|
||||
"num_phase_per_state",
|
||||
"init_cr",
|
||||
"init_tr",
|
||||
"tos_ecn",
|
||||
"tos_dscp",
|
||||
"alt_vlan_pcp",
|
||||
"alt_vlan_dscp",
|
||||
"rtt",
|
||||
"cc_mode",
|
||||
"tcp_cp",
|
||||
"tx_queue",
|
||||
"inactivity_cp",
|
||||
};
|
||||
|
||||
static inline const char *bnxt_re_qp_state_str(u8 state)
|
||||
{
|
||||
switch (state) {
|
||||
@ -110,19 +127,215 @@ void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
|
||||
debugfs_remove(qp->dentry);
|
||||
}
|
||||
|
||||
static int map_cc_config_offset_gen0_ext0(u32 offset, struct bnxt_qplib_cc_param *ccparam, u32 *val)
|
||||
{
|
||||
u64 map_offset;
|
||||
|
||||
map_offset = BIT(offset);
|
||||
|
||||
switch (map_offset) {
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC:
|
||||
*val = ccparam->enable;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G:
|
||||
*val = ccparam->g;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE:
|
||||
*val = ccparam->nph_per_state;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR:
|
||||
*val = ccparam->init_cr;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR:
|
||||
*val = ccparam->init_tr;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN:
|
||||
*val = ccparam->tos_ecn;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP:
|
||||
*val = ccparam->tos_dscp;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP:
|
||||
*val = ccparam->alt_vlan_pcp;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP:
|
||||
*val = ccparam->alt_tos_dscp;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT:
|
||||
*val = ccparam->rtt;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE:
|
||||
*val = ccparam->cc_mode;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP:
|
||||
*val = ccparam->tcp_cp;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t bnxt_re_cc_config_get(struct file *filp, char __user *buffer,
|
||||
size_t usr_buf_len, loff_t *ppos)
|
||||
{
|
||||
struct bnxt_re_cc_param *dbg_cc_param = filp->private_data;
|
||||
struct bnxt_re_dev *rdev = dbg_cc_param->rdev;
|
||||
struct bnxt_qplib_cc_param ccparam = {};
|
||||
u32 offset = dbg_cc_param->offset;
|
||||
char buf[16];
|
||||
u32 val;
|
||||
int rc;
|
||||
|
||||
rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &ccparam);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
rc = map_cc_config_offset_gen0_ext0(offset, &ccparam, &val);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
rc = snprintf(buf, sizeof(buf), "%d\n", val);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
|
||||
return simple_read_from_buffer(buffer, usr_buf_len, ppos, (u8 *)(buf), rc);
|
||||
}
|
||||
|
||||
static void bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offset, u32 val)
|
||||
{
|
||||
u32 modify_mask;
|
||||
|
||||
modify_mask = BIT(offset);
|
||||
|
||||
switch (modify_mask) {
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC:
|
||||
ccparam->enable = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G:
|
||||
ccparam->g = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE:
|
||||
ccparam->nph_per_state = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR:
|
||||
ccparam->init_cr = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR:
|
||||
ccparam->init_tr = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN:
|
||||
ccparam->tos_ecn = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP:
|
||||
ccparam->tos_dscp = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP:
|
||||
ccparam->alt_vlan_pcp = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP:
|
||||
ccparam->alt_tos_dscp = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT:
|
||||
ccparam->rtt = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE:
|
||||
ccparam->cc_mode = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP:
|
||||
ccparam->tcp_cp = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE:
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP:
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TIME_PER_PHASE:
|
||||
ccparam->time_pph = val;
|
||||
break;
|
||||
case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_PKTS_PER_PHASE:
|
||||
ccparam->pkts_pph = val;
|
||||
break;
|
||||
}
|
||||
|
||||
ccparam->mask = modify_mask;
|
||||
}
|
||||
|
||||
static int bnxt_re_configure_cc(struct bnxt_re_dev *rdev, u32 gen_ext, u32 offset, u32 val)
|
||||
{
|
||||
struct bnxt_qplib_cc_param ccparam = { };
|
||||
|
||||
/* Supporting only Gen 0 now */
|
||||
if (gen_ext == CC_CONFIG_GEN0_EXT0)
|
||||
bnxt_re_fill_gen0_ext0(&ccparam, offset, val);
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
bnxt_qplib_modify_cc(&rdev->qplib_res, &ccparam);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t bnxt_re_cc_config_set(struct file *filp, const char __user *buffer,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct bnxt_re_cc_param *dbg_cc_param = filp->private_data;
|
||||
struct bnxt_re_dev *rdev = dbg_cc_param->rdev;
|
||||
u32 offset = dbg_cc_param->offset;
|
||||
u8 cc_gen = dbg_cc_param->cc_gen;
|
||||
char buf[16];
|
||||
u32 val;
|
||||
int rc;
|
||||
|
||||
if (count >= sizeof(buf))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(buf, buffer, count))
|
||||
return -EFAULT;
|
||||
|
||||
buf[count] = '\0';
|
||||
if (kstrtou32(buf, 0, &val))
|
||||
return -EINVAL;
|
||||
|
||||
rc = bnxt_re_configure_cc(rdev, cc_gen, offset, val);
|
||||
return rc ? rc : count;
|
||||
}
|
||||
|
||||
static const struct file_operations bnxt_re_cc_config_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = simple_open,
|
||||
.read = bnxt_re_cc_config_get,
|
||||
.write = bnxt_re_cc_config_set,
|
||||
};
|
||||
|
||||
void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev)
|
||||
{
|
||||
struct pci_dev *pdev = rdev->en_dev->pdev;
|
||||
struct bnxt_re_dbg_cc_config_params *cc_params;
|
||||
int i;
|
||||
|
||||
rdev->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), bnxt_re_debugfs_root);
|
||||
|
||||
rdev->qp_debugfs = debugfs_create_dir("QPs", rdev->dbg_root);
|
||||
rdev->cc_config = debugfs_create_dir("cc_config", rdev->dbg_root);
|
||||
|
||||
rdev->cc_config_params = kzalloc(sizeof(*cc_params), GFP_KERNEL);
|
||||
|
||||
for (i = 0; i < BNXT_RE_CC_PARAM_GEN0; i++) {
|
||||
struct bnxt_re_cc_param *tmp_params = &rdev->cc_config_params->gen0_parms[i];
|
||||
|
||||
tmp_params->rdev = rdev;
|
||||
tmp_params->offset = i;
|
||||
tmp_params->cc_gen = CC_CONFIG_GEN0_EXT0;
|
||||
tmp_params->dentry = debugfs_create_file(bnxt_re_cc_gen0_name[i], 0400,
|
||||
rdev->cc_config, tmp_params,
|
||||
&bnxt_re_cc_config_ops);
|
||||
}
|
||||
}
|
||||
|
||||
void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev)
|
||||
{
|
||||
debugfs_remove_recursive(rdev->qp_debugfs);
|
||||
|
||||
debugfs_remove_recursive(rdev->cc_config);
|
||||
kfree(rdev->cc_config_params);
|
||||
debugfs_remove_recursive(rdev->dbg_root);
|
||||
rdev->dbg_root = NULL;
|
||||
}
|
||||
|
@ -18,4 +18,19 @@ void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev);
|
||||
void bnxt_re_register_debugfs(void);
|
||||
void bnxt_re_unregister_debugfs(void);
|
||||
|
||||
#define CC_CONFIG_GEN_EXT(x, y) (((x) << 16) | (y))
|
||||
#define CC_CONFIG_GEN0_EXT0 CC_CONFIG_GEN_EXT(0, 0)
|
||||
|
||||
#define BNXT_RE_CC_PARAM_GEN0 14
|
||||
|
||||
struct bnxt_re_cc_param {
|
||||
struct bnxt_re_dev *rdev;
|
||||
struct dentry *dentry;
|
||||
u32 offset;
|
||||
u8 cc_gen;
|
||||
};
|
||||
|
||||
struct bnxt_re_dbg_cc_config_params {
|
||||
struct bnxt_re_cc_param gen0_parms[BNXT_RE_CC_PARAM_GEN0];
|
||||
};
|
||||
#endif
|
||||
|
@ -39,6 +39,8 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/pci.h>
|
||||
#include <rdma/ib_mad.h>
|
||||
#include <rdma/ib_pma.h>
|
||||
|
||||
#include "roce_hsi.h"
|
||||
#include "qplib_res.h"
|
||||
@ -285,6 +287,96 @@ static void bnxt_re_copy_db_pacing_stats(struct bnxt_re_dev *rdev,
|
||||
readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
|
||||
}
|
||||
|
||||
int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad)
|
||||
{
|
||||
struct ib_pma_portcounters_ext *pma_cnt_ext;
|
||||
struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
|
||||
struct ctx_hw_stats *hw_stats = NULL;
|
||||
int rc;
|
||||
|
||||
hw_stats = rdev->qplib_ctx.stats.dma;
|
||||
|
||||
pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
|
||||
if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) {
|
||||
u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
|
||||
|
||||
rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
|
||||
pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
|
||||
if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) ||
|
||||
!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
|
||||
pma_cnt_ext->port_xmit_data =
|
||||
cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_bytes) / 4);
|
||||
pma_cnt_ext->port_rcv_data =
|
||||
cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_bytes) / 4);
|
||||
pma_cnt_ext->port_xmit_packets =
|
||||
cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts));
|
||||
pma_cnt_ext->port_rcv_packets =
|
||||
cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts));
|
||||
pma_cnt_ext->port_unicast_rcv_packets =
|
||||
cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts));
|
||||
pma_cnt_ext->port_unicast_xmit_packets =
|
||||
cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts));
|
||||
|
||||
} else {
|
||||
pma_cnt_ext->port_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts);
|
||||
pma_cnt_ext->port_rcv_data = cpu_to_be64(estat->rx_roce_good_bytes / 4);
|
||||
pma_cnt_ext->port_xmit_packets = cpu_to_be64(estat->tx_roce_pkts);
|
||||
pma_cnt_ext->port_xmit_data = cpu_to_be64(estat->tx_roce_bytes / 4);
|
||||
pma_cnt_ext->port_unicast_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts);
|
||||
pma_cnt_ext->port_unicast_xmit_packets = cpu_to_be64(estat->tx_roce_pkts);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad)
|
||||
{
|
||||
struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
|
||||
struct ib_pma_portcounters *pma_cnt;
|
||||
struct ctx_hw_stats *hw_stats = NULL;
|
||||
int rc;
|
||||
|
||||
hw_stats = rdev->qplib_ctx.stats.dma;
|
||||
|
||||
pma_cnt = (struct ib_pma_portcounters *)(out_mad->data + 40);
|
||||
if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) {
|
||||
u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
|
||||
|
||||
rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) ||
|
||||
!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
|
||||
pma_cnt->port_rcv_packets =
|
||||
cpu_to_be32((u32)(le64_to_cpu(hw_stats->rx_ucast_pkts)) & 0xFFFFFFFF);
|
||||
pma_cnt->port_rcv_data =
|
||||
cpu_to_be32((u32)((le64_to_cpu(hw_stats->rx_ucast_bytes) &
|
||||
0xFFFFFFFF) / 4));
|
||||
pma_cnt->port_xmit_packets =
|
||||
cpu_to_be32((u32)(le64_to_cpu(hw_stats->tx_ucast_pkts)) & 0xFFFFFFFF);
|
||||
pma_cnt->port_xmit_data =
|
||||
cpu_to_be32((u32)((le64_to_cpu(hw_stats->tx_ucast_bytes)
|
||||
& 0xFFFFFFFF) / 4));
|
||||
} else {
|
||||
pma_cnt->port_rcv_packets = cpu_to_be32(estat->rx_roce_good_pkts);
|
||||
pma_cnt->port_rcv_data = cpu_to_be32((estat->rx_roce_good_bytes / 4));
|
||||
pma_cnt->port_xmit_packets = cpu_to_be32(estat->tx_roce_pkts);
|
||||
pma_cnt->port_xmit_data = cpu_to_be32((estat->tx_roce_bytes / 4));
|
||||
}
|
||||
pma_cnt->port_rcv_constraint_errors = (u8)(le64_to_cpu(hw_stats->rx_discard_pkts) & 0xFF);
|
||||
pma_cnt->port_rcv_errors = cpu_to_be16((u16)(le64_to_cpu(hw_stats->rx_error_pkts)
|
||||
& 0xFFFF));
|
||||
pma_cnt->port_xmit_constraint_errors = (u8)(le64_to_cpu(hw_stats->tx_error_pkts) & 0xFF);
|
||||
pma_cnt->port_xmit_discards = cpu_to_be16((u16)(le64_to_cpu(hw_stats->tx_discard_pkts)
|
||||
& 0xFFFF));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
|
||||
struct rdma_hw_stats *stats,
|
||||
u32 port, int index)
|
||||
|
@ -49,6 +49,7 @@
|
||||
#include <rdma/ib_addr.h>
|
||||
#include <rdma/ib_mad.h>
|
||||
#include <rdma/ib_cache.h>
|
||||
#include <rdma/ib_pma.h>
|
||||
#include <rdma/uverbs_ioctl.h>
|
||||
#include <linux/hashtable.h>
|
||||
|
||||
@ -4491,6 +4492,41 @@ void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
|
||||
kfree(bnxt_entry);
|
||||
}
|
||||
|
||||
int bnxt_re_process_mad(struct ib_device *ibdev, int mad_flags,
|
||||
u32 port_num, const struct ib_wc *in_wc,
|
||||
const struct ib_grh *in_grh,
|
||||
const struct ib_mad *in_mad, struct ib_mad *out_mad,
|
||||
size_t *out_mad_size, u16 *out_mad_pkey_index)
|
||||
{
|
||||
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
|
||||
struct ib_class_port_info cpi = {};
|
||||
int ret = IB_MAD_RESULT_SUCCESS;
|
||||
int rc = 0;
|
||||
|
||||
if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
|
||||
return ret;
|
||||
|
||||
switch (in_mad->mad_hdr.attr_id) {
|
||||
case IB_PMA_CLASS_PORT_INFO:
|
||||
cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
|
||||
memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
|
||||
break;
|
||||
case IB_PMA_PORT_COUNTERS_EXT:
|
||||
rc = bnxt_re_assign_pma_port_ext_counters(rdev, out_mad);
|
||||
break;
|
||||
case IB_PMA_PORT_COUNTERS:
|
||||
rc = bnxt_re_assign_pma_port_counters(rdev, out_mad);
|
||||
break;
|
||||
default:
|
||||
rc = -EINVAL;
|
||||
break;
|
||||
}
|
||||
if (rc)
|
||||
return IB_MAD_RESULT_FAILURE;
|
||||
ret |= IB_MAD_RESULT_REPLY;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int UVERBS_HANDLER(BNXT_RE_METHOD_NOTIFY_DRV)(struct uverbs_attr_bundle *attrs)
|
||||
{
|
||||
struct bnxt_re_ucontext *uctx;
|
||||
|
@ -268,6 +268,12 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
|
||||
int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
|
||||
void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
|
||||
|
||||
int bnxt_re_process_mad(struct ib_device *device, int process_mad_flags,
|
||||
u32 port_num, const struct ib_wc *in_wc,
|
||||
const struct ib_grh *in_grh,
|
||||
const struct ib_mad *in_mad, struct ib_mad *out_mad,
|
||||
size_t *out_mad_size, u16 *out_mad_pkey_index);
|
||||
|
||||
static inline u32 __to_ib_port_num(u16 port_id)
|
||||
{
|
||||
return (u32)port_id + 1;
|
||||
|
@ -1285,6 +1285,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
|
||||
.post_recv = bnxt_re_post_recv,
|
||||
.post_send = bnxt_re_post_send,
|
||||
.post_srq_recv = bnxt_re_post_srq_recv,
|
||||
.process_mad = bnxt_re_process_mad,
|
||||
.query_ah = bnxt_re_query_ah,
|
||||
.query_device = bnxt_re_query_device,
|
||||
.modify_device = bnxt_re_modify_device,
|
||||
|
@ -709,7 +709,6 @@ error:
|
||||
erdma_cancel_mpatimer(new_cep);
|
||||
|
||||
erdma_cep_put(new_cep);
|
||||
new_cep->sock = NULL;
|
||||
}
|
||||
|
||||
if (new_s) {
|
||||
|
@ -12882,22 +12882,6 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
|
||||
}
|
||||
}
|
||||
|
||||
/* return the OPA port logical state name */
|
||||
const char *opa_lstate_name(u32 lstate)
|
||||
{
|
||||
static const char * const port_logical_names[] = {
|
||||
"PORT_NOP",
|
||||
"PORT_DOWN",
|
||||
"PORT_INIT",
|
||||
"PORT_ARMED",
|
||||
"PORT_ACTIVE",
|
||||
"PORT_ACTIVE_DEFER",
|
||||
};
|
||||
if (lstate < ARRAY_SIZE(port_logical_names))
|
||||
return port_logical_names[lstate];
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
/* return the OPA port physical state name */
|
||||
const char *opa_pstate_name(u32 pstate)
|
||||
{
|
||||
@ -12956,8 +12940,6 @@ static void update_statusp(struct hfi1_pportdata *ppd, u32 state)
|
||||
break;
|
||||
}
|
||||
}
|
||||
dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
|
||||
opa_lstate_name(state), state);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -771,7 +771,6 @@ int is_bx(struct hfi1_devdata *dd);
|
||||
bool is_urg_masked(struct hfi1_ctxtdata *rcd);
|
||||
u32 read_physical_state(struct hfi1_devdata *dd);
|
||||
u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
|
||||
const char *opa_lstate_name(u32 lstate);
|
||||
const char *opa_pstate_name(u32 pstate);
|
||||
u32 driver_pstate(struct hfi1_pportdata *ppd);
|
||||
u32 driver_lstate(struct hfi1_pportdata *ppd);
|
||||
|
@ -968,7 +968,7 @@ static bool __set_armed_to_active(struct hfi1_packet *packet)
|
||||
if (hwstate != IB_PORT_ACTIVE) {
|
||||
dd_dev_info(packet->rcd->dd,
|
||||
"Unexpected link state %s\n",
|
||||
opa_lstate_name(hwstate));
|
||||
ib_port_state_to_str(hwstate));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1160,8 +1160,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
|
||||
if (ret == HFI_TRANSITION_DISALLOWED ||
|
||||
ret == HFI_TRANSITION_UNDEFINED) {
|
||||
pr_warn("invalid logical state transition %s -> %s\n",
|
||||
opa_lstate_name(logical_old),
|
||||
opa_lstate_name(logical_new));
|
||||
ib_port_state_to_str(logical_old),
|
||||
ib_port_state_to_str(logical_new));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -404,26 +404,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
|
||||
return count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a stand-alone single QSFP write. Acquire the resource, do the
|
||||
* write, then release the resource.
|
||||
*/
|
||||
int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
|
||||
int len)
|
||||
{
|
||||
struct hfi1_devdata *dd = ppd->dd;
|
||||
u32 resource = qsfp_resource(dd);
|
||||
int ret;
|
||||
|
||||
ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = qsfp_write(ppd, target, addr, bp, len);
|
||||
release_chip_resource(dd, resource);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Access page n, offset m of QSFP memory as defined by SFF 8636
|
||||
* by reading @addr = ((256 * n) + m)
|
||||
|
@ -195,8 +195,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
|
||||
int len);
|
||||
int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
|
||||
int len);
|
||||
int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
|
||||
int len);
|
||||
int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
|
||||
int len);
|
||||
struct hfi1_asic_data;
|
||||
|
@ -998,7 +998,7 @@ static bool is_buf_attr_valid(struct hns_roce_dev *hr_dev,
|
||||
if (attr->region_count > ARRAY_SIZE(attr->region) ||
|
||||
attr->region_count < 1 || attr->page_shift < HNS_HW_PAGE_SHIFT) {
|
||||
ibdev_err(ibdev,
|
||||
"invalid buf attr, region count %d, page shift %u.\n",
|
||||
"invalid buf attr, region count %u, page shift %u.\n",
|
||||
attr->region_count, attr->page_shift);
|
||||
return false;
|
||||
}
|
||||
|
@ -1320,7 +1320,7 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
|
||||
|
||||
ret = hns_roce_create_qp_common(hr_dev, init_attr, udata, hr_qp);
|
||||
if (ret)
|
||||
ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n",
|
||||
ibdev_err(ibdev, "create QP type %d failed(%d)\n",
|
||||
init_attr->qp_type, ret);
|
||||
|
||||
err_out:
|
||||
|
@ -51,7 +51,7 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
|
||||
break;
|
||||
default:
|
||||
dev_err(hr_dev->dev,
|
||||
"hns_roce:Unexpected event type 0x%x on SRQ %06lx\n",
|
||||
"hns_roce:Unexpected event type %d on SRQ %06lx\n",
|
||||
event_type, srq->srqn);
|
||||
return;
|
||||
}
|
||||
|
@ -7,6 +7,7 @@ config INFINIBAND_IRDMA
|
||||
depends on ICE && I40E
|
||||
select GENERIC_ALLOCATOR
|
||||
select AUXILIARY_BUS
|
||||
select CRC32
|
||||
help
|
||||
This is an Intel(R) Ethernet Protocol Driver for RDMA driver
|
||||
that support E810 (iWARP/RoCE) and X722 (iWARP) network devices.
|
||||
|
@ -30,7 +30,6 @@
|
||||
#endif
|
||||
#include <linux/auxiliary_bus.h>
|
||||
#include <linux/net/intel/iidc.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <rdma/ib_smi.h>
|
||||
#include <rdma/ib_verbs.h>
|
||||
#include <rdma/ib_pack.h>
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <linux/pci.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/net/intel/iidc.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <rdma/ib_verbs.h>
|
||||
|
||||
#define STATS_TIMER_DELAY 60000
|
||||
@ -43,15 +42,12 @@ enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev);
|
||||
bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev);
|
||||
void irdma_add_dev_ref(struct irdma_sc_dev *dev);
|
||||
void irdma_put_dev_ref(struct irdma_sc_dev *dev);
|
||||
int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len,
|
||||
u32 val);
|
||||
int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val);
|
||||
struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
|
||||
struct irdma_puda_buf *buf);
|
||||
void irdma_send_ieq_ack(struct irdma_sc_qp *qp);
|
||||
void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len,
|
||||
u32 seqnum);
|
||||
void irdma_free_hash_desc(struct shash_desc *hash_desc);
|
||||
int irdma_init_hash_desc(struct shash_desc **hash_desc);
|
||||
int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
|
||||
struct irdma_puda_buf *buf);
|
||||
int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
|
||||
|
@ -923,8 +923,6 @@ void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type,
|
||||
|
||||
switch (rsrc->cmpl) {
|
||||
case PUDA_HASH_CRC_COMPLETE:
|
||||
irdma_free_hash_desc(rsrc->hash_desc);
|
||||
fallthrough;
|
||||
case PUDA_QP_CREATED:
|
||||
irdma_qp_rem_qos(&rsrc->qp);
|
||||
|
||||
@ -1095,15 +1093,12 @@ int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
|
||||
goto error;
|
||||
|
||||
if (info->type == IRDMA_PUDA_RSRC_TYPE_IEQ) {
|
||||
if (!irdma_init_hash_desc(&rsrc->hash_desc)) {
|
||||
rsrc->check_crc = true;
|
||||
rsrc->cmpl = PUDA_HASH_CRC_COMPLETE;
|
||||
ret = 0;
|
||||
}
|
||||
rsrc->check_crc = true;
|
||||
rsrc->cmpl = PUDA_HASH_CRC_COMPLETE;
|
||||
}
|
||||
|
||||
irdma_sc_ccq_arm(&rsrc->cq);
|
||||
return ret;
|
||||
return 0;
|
||||
|
||||
error:
|
||||
irdma_puda_dele_rsrc(vsi, info->type, false);
|
||||
@ -1396,8 +1391,8 @@ static int irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq,
|
||||
crcptr = txbuf->data + fpdu_len - 4;
|
||||
mpacrc = *(u32 *)crcptr;
|
||||
if (ieq->check_crc) {
|
||||
status = irdma_ieq_check_mpacrc(ieq->hash_desc, txbuf->data,
|
||||
(fpdu_len - 4), mpacrc);
|
||||
status = irdma_ieq_check_mpacrc(txbuf->data, fpdu_len - 4,
|
||||
mpacrc);
|
||||
if (status) {
|
||||
ibdev_dbg(to_ibdev(ieq->dev), "IEQ: error bad crc\n");
|
||||
goto error;
|
||||
@ -1465,8 +1460,8 @@ static int irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq,
|
||||
crcptr = datap + fpdu_len - 4;
|
||||
mpacrc = *(u32 *)crcptr;
|
||||
if (ieq->check_crc)
|
||||
ret = irdma_ieq_check_mpacrc(ieq->hash_desc, datap,
|
||||
fpdu_len - 4, mpacrc);
|
||||
ret = irdma_ieq_check_mpacrc(datap, fpdu_len - 4,
|
||||
mpacrc);
|
||||
if (ret) {
|
||||
list_add(&buf->list, rxlist);
|
||||
ibdev_dbg(to_ibdev(ieq->dev),
|
||||
|
@ -119,7 +119,6 @@ struct irdma_puda_rsrc {
|
||||
u32 rx_wqe_idx;
|
||||
u32 rxq_invalid_cnt;
|
||||
u32 tx_wqe_avail_cnt;
|
||||
struct shash_desc *hash_desc;
|
||||
struct list_head txpend;
|
||||
struct list_head bufpool; /* free buffers pool list for recv and xmit */
|
||||
u32 alloc_buf_count;
|
||||
@ -163,10 +162,8 @@ struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
|
||||
struct irdma_puda_buf *buf);
|
||||
int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
|
||||
struct irdma_puda_buf *buf);
|
||||
int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, u32 val);
|
||||
int irdma_init_hash_desc(struct shash_desc **desc);
|
||||
int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val);
|
||||
void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
|
||||
void irdma_free_hash_desc(struct shash_desc *desc);
|
||||
void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum);
|
||||
int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
|
||||
int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq);
|
||||
|
@ -1273,58 +1273,15 @@ void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
|
||||
irdma_gen_ae(rf, qp, &info, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* irdma_init_hash_desc - initialize hash for crc calculation
|
||||
* @desc: cryption type
|
||||
*/
|
||||
int irdma_init_hash_desc(struct shash_desc **desc)
|
||||
{
|
||||
struct crypto_shash *tfm;
|
||||
struct shash_desc *tdesc;
|
||||
|
||||
tfm = crypto_alloc_shash("crc32c", 0, 0);
|
||||
if (IS_ERR(tfm))
|
||||
return -EINVAL;
|
||||
|
||||
tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm),
|
||||
GFP_KERNEL);
|
||||
if (!tdesc) {
|
||||
crypto_free_shash(tfm);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
tdesc->tfm = tfm;
|
||||
*desc = tdesc;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* irdma_free_hash_desc - free hash desc
|
||||
* @desc: to be freed
|
||||
*/
|
||||
void irdma_free_hash_desc(struct shash_desc *desc)
|
||||
{
|
||||
if (desc) {
|
||||
crypto_free_shash(desc->tfm);
|
||||
kfree(desc);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* irdma_ieq_check_mpacrc - check if mpa crc is OK
|
||||
* @desc: desc for hash
|
||||
* @addr: address of buffer for crc
|
||||
* @len: length of buffer
|
||||
* @val: value to be compared
|
||||
*/
|
||||
int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len,
|
||||
u32 val)
|
||||
int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val)
|
||||
{
|
||||
u32 crc = 0;
|
||||
|
||||
crypto_shash_digest(desc, addr, len, (u8 *)&crc);
|
||||
if (crc != val)
|
||||
if ((__force u32)cpu_to_le32(~crc32c(~0, addr, len)) != val)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@ -1,4 +1,4 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
|
||||
|
||||
mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o
|
||||
mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o counters.o
|
||||
|
58
drivers/infiniband/hw/mana/ah.c
Normal file
58
drivers/infiniband/hw/mana/ah.c
Normal file
@ -0,0 +1,58 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2024, Microsoft Corporation. All rights reserved.
|
||||
*/
|
||||
|
||||
#include "mana_ib.h"
|
||||
|
||||
int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr,
|
||||
struct ib_udata *udata)
|
||||
{
|
||||
struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
|
||||
struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
|
||||
struct rdma_ah_attr *ah_attr = attr->ah_attr;
|
||||
const struct ib_global_route *grh;
|
||||
enum rdma_network_type ntype;
|
||||
|
||||
if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE ||
|
||||
!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
|
||||
return -EINVAL;
|
||||
|
||||
if (udata)
|
||||
return -EINVAL;
|
||||
|
||||
ah->av = dma_pool_zalloc(mdev->av_pool, GFP_ATOMIC, &ah->dma_handle);
|
||||
if (!ah->av)
|
||||
return -ENOMEM;
|
||||
|
||||
grh = rdma_ah_read_grh(ah_attr);
|
||||
ntype = rdma_gid_attr_network_type(grh->sgid_attr);
|
||||
|
||||
copy_in_reverse(ah->av->dest_mac, ah_attr->roce.dmac, ETH_ALEN);
|
||||
ah->av->udp_src_port = rdma_flow_label_to_udp_sport(grh->flow_label);
|
||||
ah->av->hop_limit = grh->hop_limit;
|
||||
ah->av->dscp = (grh->traffic_class >> 2) & 0x3f;
|
||||
ah->av->is_ipv6 = (ntype == RDMA_NETWORK_IPV6);
|
||||
|
||||
if (ah->av->is_ipv6) {
|
||||
copy_in_reverse(ah->av->dest_ip, grh->dgid.raw, 16);
|
||||
copy_in_reverse(ah->av->src_ip, grh->sgid_attr->gid.raw, 16);
|
||||
} else {
|
||||
ah->av->dest_ip[10] = 0xFF;
|
||||
ah->av->dest_ip[11] = 0xFF;
|
||||
copy_in_reverse(&ah->av->dest_ip[12], &grh->dgid.raw[12], 4);
|
||||
copy_in_reverse(&ah->av->src_ip[12], &grh->sgid_attr->gid.raw[12], 4);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mana_ib_destroy_ah(struct ib_ah *ibah, u32 flags)
|
||||
{
|
||||
struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
|
||||
struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
|
||||
|
||||
dma_pool_free(mdev->av_pool, ah->av, ah->dma_handle);
|
||||
|
||||
return 0;
|
||||
}
|
105
drivers/infiniband/hw/mana/counters.c
Normal file
105
drivers/infiniband/hw/mana/counters.c
Normal file
@ -0,0 +1,105 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2024, Microsoft Corporation. All rights reserved.
|
||||
*/
|
||||
|
||||
#include "counters.h"
|
||||
|
||||
static const struct rdma_stat_desc mana_ib_port_stats_desc[] = {
|
||||
[MANA_IB_REQUESTER_TIMEOUT].name = "requester_timeout",
|
||||
[MANA_IB_REQUESTER_OOS_NAK].name = "requester_oos_nak",
|
||||
[MANA_IB_REQUESTER_RNR_NAK].name = "requester_rnr_nak",
|
||||
[MANA_IB_RESPONDER_RNR_NAK].name = "responder_rnr_nak",
|
||||
[MANA_IB_RESPONDER_OOS].name = "responder_oos",
|
||||
[MANA_IB_RESPONDER_DUP_REQUEST].name = "responder_dup_request",
|
||||
[MANA_IB_REQUESTER_IMPLICIT_NAK].name = "requester_implicit_nak",
|
||||
[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH].name = "requester_readresp_psn_mismatch",
|
||||
[MANA_IB_NAK_INV_REQ].name = "nak_inv_req",
|
||||
[MANA_IB_NAK_ACCESS_ERR].name = "nak_access_error",
|
||||
[MANA_IB_NAK_OPP_ERR].name = "nak_opp_error",
|
||||
[MANA_IB_NAK_INV_READ].name = "nak_inv_read",
|
||||
[MANA_IB_RESPONDER_LOCAL_LEN_ERR].name = "responder_local_len_error",
|
||||
[MANA_IB_REQUESTOR_LOCAL_PROT_ERR].name = "requestor_local_prot_error",
|
||||
[MANA_IB_RESPONDER_REM_ACCESS_ERR].name = "responder_rem_access_error",
|
||||
[MANA_IB_RESPONDER_LOCAL_QP_ERR].name = "responder_local_qp_error",
|
||||
[MANA_IB_RESPONDER_MALFORMED_WQE].name = "responder_malformed_wqe",
|
||||
[MANA_IB_GENERAL_HW_ERR].name = "general_hw_error",
|
||||
[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED].name = "requester_rnr_nak_retries_exceeded",
|
||||
[MANA_IB_REQUESTER_RETRIES_EXCEEDED].name = "requester_retries_exceeded",
|
||||
[MANA_IB_TOTAL_FATAL_ERR].name = "total_fatal_error",
|
||||
[MANA_IB_RECEIVED_CNPS].name = "received_cnps",
|
||||
[MANA_IB_NUM_QPS_CONGESTED].name = "num_qps_congested",
|
||||
[MANA_IB_RATE_INC_EVENTS].name = "rate_inc_events",
|
||||
[MANA_IB_NUM_QPS_RECOVERED].name = "num_qps_recovered",
|
||||
[MANA_IB_CURRENT_RATE].name = "current_rate",
|
||||
};
|
||||
|
||||
struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
|
||||
u32 port_num)
|
||||
{
|
||||
return rdma_alloc_hw_stats_struct(mana_ib_port_stats_desc,
|
||||
ARRAY_SIZE(mana_ib_port_stats_desc),
|
||||
RDMA_HW_STATS_DEFAULT_LIFESPAN);
|
||||
}
|
||||
|
||||
int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
|
||||
u32 port_num, int index)
|
||||
{
|
||||
struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
|
||||
ib_dev);
|
||||
struct mana_rnic_query_vf_cntrs_resp resp = {};
|
||||
struct mana_rnic_query_vf_cntrs_req req = {};
|
||||
int err;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_VF_COUNTERS,
|
||||
sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
|
||||
err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req,
|
||||
sizeof(resp), &resp);
|
||||
if (err) {
|
||||
ibdev_err(&mdev->ib_dev, "Failed to query vf counters err %d",
|
||||
err);
|
||||
return err;
|
||||
}
|
||||
|
||||
stats->value[MANA_IB_REQUESTER_TIMEOUT] = resp.requester_timeout;
|
||||
stats->value[MANA_IB_REQUESTER_OOS_NAK] = resp.requester_oos_nak;
|
||||
stats->value[MANA_IB_REQUESTER_RNR_NAK] = resp.requester_rnr_nak;
|
||||
stats->value[MANA_IB_RESPONDER_RNR_NAK] = resp.responder_rnr_nak;
|
||||
stats->value[MANA_IB_RESPONDER_OOS] = resp.responder_oos;
|
||||
stats->value[MANA_IB_RESPONDER_DUP_REQUEST] = resp.responder_dup_request;
|
||||
stats->value[MANA_IB_REQUESTER_IMPLICIT_NAK] =
|
||||
resp.requester_implicit_nak;
|
||||
stats->value[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH] =
|
||||
resp.requester_readresp_psn_mismatch;
|
||||
stats->value[MANA_IB_NAK_INV_REQ] = resp.nak_inv_req;
|
||||
stats->value[MANA_IB_NAK_ACCESS_ERR] = resp.nak_access_err;
|
||||
stats->value[MANA_IB_NAK_OPP_ERR] = resp.nak_opp_err;
|
||||
stats->value[MANA_IB_NAK_INV_READ] = resp.nak_inv_read;
|
||||
stats->value[MANA_IB_RESPONDER_LOCAL_LEN_ERR] =
|
||||
resp.responder_local_len_err;
|
||||
stats->value[MANA_IB_REQUESTOR_LOCAL_PROT_ERR] =
|
||||
resp.requestor_local_prot_err;
|
||||
stats->value[MANA_IB_RESPONDER_REM_ACCESS_ERR] =
|
||||
resp.responder_rem_access_err;
|
||||
stats->value[MANA_IB_RESPONDER_LOCAL_QP_ERR] =
|
||||
resp.responder_local_qp_err;
|
||||
stats->value[MANA_IB_RESPONDER_MALFORMED_WQE] =
|
||||
resp.responder_malformed_wqe;
|
||||
stats->value[MANA_IB_GENERAL_HW_ERR] = resp.general_hw_err;
|
||||
stats->value[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED] =
|
||||
resp.requester_rnr_nak_retries_exceeded;
|
||||
stats->value[MANA_IB_REQUESTER_RETRIES_EXCEEDED] =
|
||||
resp.requester_retries_exceeded;
|
||||
stats->value[MANA_IB_TOTAL_FATAL_ERR] = resp.total_fatal_err;
|
||||
|
||||
stats->value[MANA_IB_RECEIVED_CNPS] = resp.received_cnps;
|
||||
stats->value[MANA_IB_NUM_QPS_CONGESTED] = resp.num_qps_congested;
|
||||
stats->value[MANA_IB_RATE_INC_EVENTS] = resp.rate_inc_events;
|
||||
stats->value[MANA_IB_NUM_QPS_RECOVERED] = resp.num_qps_recovered;
|
||||
stats->value[MANA_IB_CURRENT_RATE] = resp.current_rate;
|
||||
|
||||
return ARRAY_SIZE(mana_ib_port_stats_desc);
|
||||
}
|
44
drivers/infiniband/hw/mana/counters.h
Normal file
44
drivers/infiniband/hw/mana/counters.h
Normal file
@ -0,0 +1,44 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2024 Microsoft Corporation. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _COUNTERS_H_
|
||||
#define _COUNTERS_H_
|
||||
|
||||
#include "mana_ib.h"
|
||||
|
||||
enum mana_ib_port_counters {
|
||||
MANA_IB_REQUESTER_TIMEOUT,
|
||||
MANA_IB_REQUESTER_OOS_NAK,
|
||||
MANA_IB_REQUESTER_RNR_NAK,
|
||||
MANA_IB_RESPONDER_RNR_NAK,
|
||||
MANA_IB_RESPONDER_OOS,
|
||||
MANA_IB_RESPONDER_DUP_REQUEST,
|
||||
MANA_IB_REQUESTER_IMPLICIT_NAK,
|
||||
MANA_IB_REQUESTER_READRESP_PSN_MISMATCH,
|
||||
MANA_IB_NAK_INV_REQ,
|
||||
MANA_IB_NAK_ACCESS_ERR,
|
||||
MANA_IB_NAK_OPP_ERR,
|
||||
MANA_IB_NAK_INV_READ,
|
||||
MANA_IB_RESPONDER_LOCAL_LEN_ERR,
|
||||
MANA_IB_REQUESTOR_LOCAL_PROT_ERR,
|
||||
MANA_IB_RESPONDER_REM_ACCESS_ERR,
|
||||
MANA_IB_RESPONDER_LOCAL_QP_ERR,
|
||||
MANA_IB_RESPONDER_MALFORMED_WQE,
|
||||
MANA_IB_GENERAL_HW_ERR,
|
||||
MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED,
|
||||
MANA_IB_REQUESTER_RETRIES_EXCEEDED,
|
||||
MANA_IB_TOTAL_FATAL_ERR,
|
||||
MANA_IB_RECEIVED_CNPS,
|
||||
MANA_IB_NUM_QPS_CONGESTED,
|
||||
MANA_IB_RATE_INC_EVENTS,
|
||||
MANA_IB_NUM_QPS_RECOVERED,
|
||||
MANA_IB_CURRENT_RATE,
|
||||
};
|
||||
|
||||
struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
|
||||
u32 port_num);
|
||||
int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
|
||||
u32 port_num, int index);
|
||||
#endif /* _COUNTERS_H_ */
|
@ -15,43 +15,59 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
|
||||
struct ib_device *ibdev = ibcq->device;
|
||||
struct mana_ib_create_cq ucmd = {};
|
||||
struct mana_ib_dev *mdev;
|
||||
struct gdma_context *gc;
|
||||
bool is_rnic_cq;
|
||||
u32 doorbell;
|
||||
u32 buf_size;
|
||||
int err;
|
||||
|
||||
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
|
||||
gc = mdev_to_gc(mdev);
|
||||
|
||||
cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
|
||||
cq->cq_handle = INVALID_MANA_HANDLE;
|
||||
|
||||
if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
|
||||
return -EINVAL;
|
||||
if (udata) {
|
||||
if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
|
||||
return -EINVAL;
|
||||
|
||||
err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
|
||||
if (err) {
|
||||
ibdev_dbg(ibdev,
|
||||
"Failed to copy from udata for create cq, %d\n", err);
|
||||
return err;
|
||||
err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
|
||||
if (err) {
|
||||
ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
|
||||
|
||||
if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
|
||||
attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
|
||||
ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cq->cqe = attr->cqe;
|
||||
err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
|
||||
&cq->queue);
|
||||
if (err) {
|
||||
ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
|
||||
ibucontext);
|
||||
doorbell = mana_ucontext->doorbell;
|
||||
} else {
|
||||
is_rnic_cq = true;
|
||||
buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
|
||||
cq->cqe = buf_size / COMP_ENTRY_SIZE;
|
||||
err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
|
||||
if (err) {
|
||||
ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
|
||||
return err;
|
||||
}
|
||||
doorbell = gc->mana_ib.doorbell;
|
||||
}
|
||||
|
||||
is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
|
||||
|
||||
if (!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) {
|
||||
ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cq->cqe = attr->cqe;
|
||||
err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, &cq->queue);
|
||||
if (err) {
|
||||
ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
|
||||
ibucontext);
|
||||
doorbell = mana_ucontext->doorbell;
|
||||
|
||||
if (is_rnic_cq) {
|
||||
err = mana_ib_gd_create_cq(mdev, cq, doorbell);
|
||||
if (err) {
|
||||
@ -66,13 +82,19 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
|
||||
}
|
||||
}
|
||||
|
||||
resp.cqid = cq->queue.id;
|
||||
err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
|
||||
if (err) {
|
||||
ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
|
||||
goto err_remove_cq_cb;
|
||||
if (udata) {
|
||||
resp.cqid = cq->queue.id;
|
||||
err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
|
||||
if (err) {
|
||||
ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
|
||||
goto err_remove_cq_cb;
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock_init(&cq->cq_lock);
|
||||
INIT_LIST_HEAD(&cq->list_send_qp);
|
||||
INIT_LIST_HEAD(&cq->list_recv_qp);
|
||||
|
||||
return 0;
|
||||
|
||||
err_remove_cq_cb:
|
||||
@ -122,7 +144,10 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
|
||||
return -EINVAL;
|
||||
/* Create CQ table entry */
|
||||
WARN_ON(gc->cq_table[cq->queue.id]);
|
||||
gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
|
||||
if (cq->queue.kmem)
|
||||
gdma_cq = cq->queue.kmem;
|
||||
else
|
||||
gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
|
||||
if (!gdma_cq)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -141,6 +166,153 @@ void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
|
||||
if (cq->queue.id >= gc->max_num_cqs || cq->queue.id == INVALID_QUEUE_ID)
|
||||
return;
|
||||
|
||||
if (cq->queue.kmem)
|
||||
/* Then it will be cleaned and removed by the mana */
|
||||
return;
|
||||
|
||||
kfree(gc->cq_table[cq->queue.id]);
|
||||
gc->cq_table[cq->queue.id] = NULL;
|
||||
}
|
||||
|
||||
int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
|
||||
{
|
||||
struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
|
||||
struct gdma_queue *gdma_cq = cq->queue.kmem;
|
||||
|
||||
if (!gdma_cq)
|
||||
return -EINVAL;
|
||||
|
||||
mana_gd_ring_cq(gdma_cq, SET_ARM_BIT);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void handle_ud_sq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
|
||||
{
|
||||
struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
|
||||
struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
|
||||
struct ud_sq_shadow_wqe *shadow_wqe;
|
||||
|
||||
shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq);
|
||||
if (!shadow_wqe)
|
||||
return;
|
||||
|
||||
shadow_wqe->header.error_code = rdma_cqe->ud_send.vendor_error;
|
||||
|
||||
wq->tail += shadow_wqe->header.posted_wqe_size;
|
||||
shadow_queue_advance_next_to_complete(&qp->shadow_sq);
|
||||
}
|
||||
|
||||
static inline void handle_ud_rq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
|
||||
{
|
||||
struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
|
||||
struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
|
||||
struct ud_rq_shadow_wqe *shadow_wqe;
|
||||
|
||||
shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_rq);
|
||||
if (!shadow_wqe)
|
||||
return;
|
||||
|
||||
shadow_wqe->byte_len = rdma_cqe->ud_recv.msg_len;
|
||||
shadow_wqe->src_qpn = rdma_cqe->ud_recv.src_qpn;
|
||||
shadow_wqe->header.error_code = IB_WC_SUCCESS;
|
||||
|
||||
wq->tail += shadow_wqe->header.posted_wqe_size;
|
||||
shadow_queue_advance_next_to_complete(&qp->shadow_rq);
|
||||
}
|
||||
|
||||
static void mana_handle_cqe(struct mana_ib_dev *mdev, struct gdma_comp *cqe)
|
||||
{
|
||||
struct mana_ib_qp *qp = mana_get_qp_ref(mdev, cqe->wq_num, cqe->is_sq);
|
||||
|
||||
if (!qp)
|
||||
return;
|
||||
|
||||
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) {
|
||||
if (cqe->is_sq)
|
||||
handle_ud_sq_cqe(qp, cqe);
|
||||
else
|
||||
handle_ud_rq_cqe(qp, cqe);
|
||||
}
|
||||
|
||||
mana_put_qp_ref(qp);
|
||||
}
|
||||
|
||||
static void fill_verbs_from_shadow_wqe(struct mana_ib_qp *qp, struct ib_wc *wc,
|
||||
const struct shadow_wqe_header *shadow_wqe)
|
||||
{
|
||||
const struct ud_rq_shadow_wqe *ud_wqe = (const struct ud_rq_shadow_wqe *)shadow_wqe;
|
||||
|
||||
wc->wr_id = shadow_wqe->wr_id;
|
||||
wc->status = shadow_wqe->error_code;
|
||||
wc->opcode = shadow_wqe->opcode;
|
||||
wc->vendor_err = shadow_wqe->error_code;
|
||||
wc->wc_flags = 0;
|
||||
wc->qp = &qp->ibqp;
|
||||
wc->pkey_index = 0;
|
||||
|
||||
if (shadow_wqe->opcode == IB_WC_RECV) {
|
||||
wc->byte_len = ud_wqe->byte_len;
|
||||
wc->src_qp = ud_wqe->src_qpn;
|
||||
wc->wc_flags |= IB_WC_GRH;
|
||||
}
|
||||
}
|
||||
|
||||
static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc *wc)
|
||||
{
|
||||
struct shadow_wqe_header *shadow_wqe;
|
||||
struct mana_ib_qp *qp;
|
||||
int wc_index = 0;
|
||||
|
||||
/* process send shadow queue completions */
|
||||
list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
|
||||
while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq))
|
||||
!= NULL) {
|
||||
if (wc_index >= nwc)
|
||||
goto out;
|
||||
|
||||
fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
|
||||
shadow_queue_advance_consumer(&qp->shadow_sq);
|
||||
wc_index++;
|
||||
}
|
||||
}
|
||||
|
||||
/* process recv shadow queue completions */
|
||||
list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
|
||||
while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq))
|
||||
!= NULL) {
|
||||
if (wc_index >= nwc)
|
||||
goto out;
|
||||
|
||||
fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
|
||||
shadow_queue_advance_consumer(&qp->shadow_rq);
|
||||
wc_index++;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return wc_index;
|
||||
}
|
||||
|
||||
int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
|
||||
{
|
||||
struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
|
||||
struct mana_ib_dev *mdev = container_of(ibcq->device, struct mana_ib_dev, ib_dev);
|
||||
struct gdma_queue *queue = cq->queue.kmem;
|
||||
struct gdma_comp gdma_cqe;
|
||||
unsigned long flags;
|
||||
int num_polled = 0;
|
||||
int comp_read, i;
|
||||
|
||||
spin_lock_irqsave(&cq->cq_lock, flags);
|
||||
for (i = 0; i < num_entries; i++) {
|
||||
comp_read = mana_gd_poll_cq(queue, &gdma_cqe, 1);
|
||||
if (comp_read < 1)
|
||||
break;
|
||||
mana_handle_cqe(mdev, &gdma_cqe);
|
||||
}
|
||||
|
||||
num_polled = mana_process_completions(cq, num_entries, wc);
|
||||
spin_unlock_irqrestore(&cq->cq_lock, flags);
|
||||
|
||||
return num_polled;
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ static const struct ib_device_ops mana_ib_dev_ops = {
|
||||
.add_gid = mana_ib_gd_add_gid,
|
||||
.alloc_pd = mana_ib_alloc_pd,
|
||||
.alloc_ucontext = mana_ib_alloc_ucontext,
|
||||
.create_ah = mana_ib_create_ah,
|
||||
.create_cq = mana_ib_create_cq,
|
||||
.create_qp = mana_ib_create_qp,
|
||||
.create_rwq_ind_table = mana_ib_create_rwq_ind_table,
|
||||
@ -27,22 +28,30 @@ static const struct ib_device_ops mana_ib_dev_ops = {
|
||||
.dealloc_ucontext = mana_ib_dealloc_ucontext,
|
||||
.del_gid = mana_ib_gd_del_gid,
|
||||
.dereg_mr = mana_ib_dereg_mr,
|
||||
.destroy_ah = mana_ib_destroy_ah,
|
||||
.destroy_cq = mana_ib_destroy_cq,
|
||||
.destroy_qp = mana_ib_destroy_qp,
|
||||
.destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table,
|
||||
.destroy_wq = mana_ib_destroy_wq,
|
||||
.disassociate_ucontext = mana_ib_disassociate_ucontext,
|
||||
.get_dma_mr = mana_ib_get_dma_mr,
|
||||
.get_link_layer = mana_ib_get_link_layer,
|
||||
.get_port_immutable = mana_ib_get_port_immutable,
|
||||
.mmap = mana_ib_mmap,
|
||||
.modify_qp = mana_ib_modify_qp,
|
||||
.modify_wq = mana_ib_modify_wq,
|
||||
.poll_cq = mana_ib_poll_cq,
|
||||
.post_recv = mana_ib_post_recv,
|
||||
.post_send = mana_ib_post_send,
|
||||
.query_device = mana_ib_query_device,
|
||||
.query_gid = mana_ib_query_gid,
|
||||
.query_pkey = mana_ib_query_pkey,
|
||||
.query_port = mana_ib_query_port,
|
||||
.reg_user_mr = mana_ib_reg_user_mr,
|
||||
.reg_user_mr_dmabuf = mana_ib_reg_user_mr_dmabuf,
|
||||
.req_notify_cq = mana_ib_arm_cq,
|
||||
|
||||
INIT_RDMA_OBJ_SIZE(ib_ah, mana_ib_ah, ibah),
|
||||
INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq),
|
||||
INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd),
|
||||
INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp),
|
||||
@ -51,6 +60,43 @@ static const struct ib_device_ops mana_ib_dev_ops = {
|
||||
ib_ind_table),
|
||||
};
|
||||
|
||||
static const struct ib_device_ops mana_ib_stats_ops = {
|
||||
.alloc_hw_port_stats = mana_ib_alloc_hw_port_stats,
|
||||
.get_hw_stats = mana_ib_get_hw_stats,
|
||||
};
|
||||
|
||||
static int mana_ib_netdev_event(struct notifier_block *this,
|
||||
unsigned long event, void *ptr)
|
||||
{
|
||||
struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb);
|
||||
struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
|
||||
struct gdma_context *gc = dev->gdma_dev->gdma_context;
|
||||
struct mana_context *mc = gc->mana.driver_data;
|
||||
struct net_device *ndev;
|
||||
|
||||
/* Only process events from our parent device */
|
||||
if (event_dev != mc->ports[0])
|
||||
return NOTIFY_DONE;
|
||||
|
||||
switch (event) {
|
||||
case NETDEV_CHANGEUPPER:
|
||||
ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
|
||||
/*
|
||||
* RDMA core will setup GID based on updated netdev.
|
||||
* It's not possible to race with the core as rtnl lock is being
|
||||
* held.
|
||||
*/
|
||||
ib_device_set_netdev(&dev->ib_dev, ndev, 1);
|
||||
|
||||
/* mana_get_primary_netdev() returns ndev with refcount held */
|
||||
netdev_put(ndev, &dev->dev_tracker);
|
||||
|
||||
return NOTIFY_OK;
|
||||
default:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
}
|
||||
|
||||
static int mana_ib_probe(struct auxiliary_device *adev,
|
||||
const struct auxiliary_device_id *id)
|
||||
{
|
||||
@ -84,10 +130,8 @@ static int mana_ib_probe(struct auxiliary_device *adev,
|
||||
dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues;
|
||||
dev->ib_dev.dev.parent = mdev->gdma_context->dev;
|
||||
|
||||
rcu_read_lock(); /* required to get primary netdev */
|
||||
ndev = mana_get_primary_netdev_rcu(mc, 0);
|
||||
ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
|
||||
if (!ndev) {
|
||||
rcu_read_unlock();
|
||||
ret = -ENODEV;
|
||||
ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
|
||||
goto free_ib_device;
|
||||
@ -95,7 +139,8 @@ static int mana_ib_probe(struct auxiliary_device *adev,
|
||||
ether_addr_copy(mac_addr, ndev->dev_addr);
|
||||
addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
|
||||
ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
|
||||
rcu_read_unlock();
|
||||
/* mana_get_primary_netdev() returns ndev with refcount held */
|
||||
netdev_put(ndev, &dev->dev_tracker);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
|
||||
goto free_ib_device;
|
||||
@ -109,17 +154,27 @@ static int mana_ib_probe(struct auxiliary_device *adev,
|
||||
}
|
||||
dev->gdma_dev = &mdev->gdma_context->mana_ib;
|
||||
|
||||
ret = mana_ib_gd_query_adapter_caps(dev);
|
||||
dev->nb.notifier_call = mana_ib_netdev_event;
|
||||
ret = register_netdevice_notifier(&dev->nb);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
|
||||
ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
|
||||
ret);
|
||||
goto deregister_device;
|
||||
}
|
||||
|
||||
ret = mana_ib_gd_query_adapter_caps(dev);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
|
||||
ret);
|
||||
goto deregister_net_notifier;
|
||||
}
|
||||
|
||||
ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
|
||||
|
||||
ret = mana_ib_create_eqs(dev);
|
||||
if (ret) {
|
||||
ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
|
||||
goto deregister_device;
|
||||
goto deregister_net_notifier;
|
||||
}
|
||||
|
||||
ret = mana_ib_gd_create_rnic_adapter(dev);
|
||||
@ -134,20 +189,31 @@ static int mana_ib_probe(struct auxiliary_device *adev,
|
||||
goto destroy_rnic;
|
||||
}
|
||||
|
||||
dev->av_pool = dma_pool_create("mana_ib_av", mdev->gdma_context->dev,
|
||||
MANA_AV_BUFFER_SIZE, MANA_AV_BUFFER_SIZE, 0);
|
||||
if (!dev->av_pool) {
|
||||
ret = -ENOMEM;
|
||||
goto destroy_rnic;
|
||||
}
|
||||
|
||||
ret = ib_register_device(&dev->ib_dev, "mana_%d",
|
||||
mdev->gdma_context->dev);
|
||||
if (ret)
|
||||
goto destroy_rnic;
|
||||
goto deallocate_pool;
|
||||
|
||||
dev_set_drvdata(&adev->dev, dev);
|
||||
|
||||
return 0;
|
||||
|
||||
deallocate_pool:
|
||||
dma_pool_destroy(dev->av_pool);
|
||||
destroy_rnic:
|
||||
xa_destroy(&dev->qp_table_wq);
|
||||
mana_ib_gd_destroy_rnic_adapter(dev);
|
||||
destroy_eqs:
|
||||
mana_ib_destroy_eqs(dev);
|
||||
deregister_net_notifier:
|
||||
unregister_netdevice_notifier(&dev->nb);
|
||||
deregister_device:
|
||||
mana_gd_deregister_device(dev->gdma_dev);
|
||||
free_ib_device:
|
||||
@ -160,9 +226,11 @@ static void mana_ib_remove(struct auxiliary_device *adev)
|
||||
struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
|
||||
|
||||
ib_unregister_device(&dev->ib_dev);
|
||||
dma_pool_destroy(dev->av_pool);
|
||||
xa_destroy(&dev->qp_table_wq);
|
||||
mana_ib_gd_destroy_rnic_adapter(dev);
|
||||
mana_ib_destroy_eqs(dev);
|
||||
unregister_netdevice_notifier(&dev->nb);
|
||||
mana_gd_deregister_device(dev->gdma_dev);
|
||||
ib_dealloc_device(&dev->ib_dev);
|
||||
}
|
||||
|
@ -82,6 +82,9 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
|
||||
mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
|
||||
sizeof(resp));
|
||||
|
||||
if (!udata)
|
||||
flags |= GDMA_PD_FLAG_ALLOW_GPA_MR;
|
||||
|
||||
req.flags = flags;
|
||||
err = mana_gd_send_request(gc, sizeof(req), &req,
|
||||
sizeof(resp), &resp);
|
||||
@ -237,6 +240,27 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
|
||||
ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
|
||||
}
|
||||
|
||||
int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
|
||||
struct mana_ib_queue *queue)
|
||||
{
|
||||
struct gdma_context *gc = mdev_to_gc(mdev);
|
||||
struct gdma_queue_spec spec = {};
|
||||
int err;
|
||||
|
||||
queue->id = INVALID_QUEUE_ID;
|
||||
queue->gdma_region = GDMA_INVALID_DMA_REGION;
|
||||
spec.type = type;
|
||||
spec.monitor_avl_buf = false;
|
||||
spec.queue_size = size;
|
||||
err = mana_gd_create_mana_wq_cq(&gc->mana_ib, &spec, &queue->kmem);
|
||||
if (err)
|
||||
return err;
|
||||
/* take ownership into mana_ib from mana */
|
||||
queue->gdma_region = queue->kmem->mem_info.dma_region_handle;
|
||||
queue->kmem->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
|
||||
struct mana_ib_queue *queue)
|
||||
{
|
||||
@ -276,6 +300,8 @@ void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue
|
||||
*/
|
||||
mana_ib_gd_destroy_dma_region(mdev, queue->gdma_region);
|
||||
ib_umem_release(queue->umem);
|
||||
if (queue->kmem)
|
||||
mana_gd_destroy_queue(mdev_to_gc(mdev), queue->kmem);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -358,7 +384,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem
|
||||
unsigned int tail = 0;
|
||||
u64 *page_addr_list;
|
||||
void *request_buf;
|
||||
int err;
|
||||
int err = 0;
|
||||
|
||||
gc = mdev_to_gc(dev);
|
||||
hwc = gc->hwc.driver_data;
|
||||
@ -535,8 +561,10 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
|
||||
immutable->pkey_tbl_len = attr.pkey_tbl_len;
|
||||
immutable->gid_tbl_len = attr.gid_tbl_len;
|
||||
immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
|
||||
if (port_num == 1)
|
||||
if (port_num == 1) {
|
||||
immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
|
||||
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -595,8 +623,11 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port,
|
||||
props->active_width = IB_WIDTH_4X;
|
||||
props->active_speed = IB_SPEED_EDR;
|
||||
props->pkey_tbl_len = 1;
|
||||
if (port == 1)
|
||||
if (port == 1) {
|
||||
props->gid_tbl_len = 16;
|
||||
props->port_cap_flags = IB_PORT_CM_SUP;
|
||||
props->ip_gids = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -634,7 +665,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req),
|
||||
sizeof(resp));
|
||||
req.hdr.resp.msg_version = GDMA_MESSAGE_V3;
|
||||
req.hdr.resp.msg_version = GDMA_MESSAGE_V4;
|
||||
req.hdr.dev_id = dev->gdma_dev->dev_id;
|
||||
|
||||
err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req),
|
||||
@ -663,6 +694,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
|
||||
caps->max_inline_data_size = resp.max_inline_data_size;
|
||||
caps->max_send_sge_count = resp.max_send_sge_count;
|
||||
caps->max_recv_sge_count = resp.max_recv_sge_count;
|
||||
caps->feature_flags = resp.feature_flags;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -678,7 +710,7 @@ mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
|
||||
switch (event->type) {
|
||||
case GDMA_EQE_RNIC_QP_FATAL:
|
||||
qpn = event->details[0];
|
||||
qp = mana_get_qp_ref(mdev, qpn);
|
||||
qp = mana_get_qp_ref(mdev, qpn, false);
|
||||
if (!qp)
|
||||
break;
|
||||
if (qp->ibqp.event_handler) {
|
||||
@ -762,6 +794,9 @@ int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev)
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.notify_eq_id = mdev->fatal_err_eq->id;
|
||||
|
||||
if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT)
|
||||
req.feature_flags |= MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST;
|
||||
|
||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||
if (err) {
|
||||
ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err);
|
||||
@ -987,3 +1022,61 @@ int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
|
||||
struct ib_qp_init_attr *attr, u32 doorbell, u32 type)
|
||||
{
|
||||
struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
|
||||
struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
|
||||
struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd);
|
||||
struct gdma_context *gc = mdev_to_gc(mdev);
|
||||
struct mana_rnic_create_udqp_resp resp = {};
|
||||
struct mana_rnic_create_udqp_req req = {};
|
||||
int err, i;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_UD_QP, sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
req.pd_handle = pd->pd_handle;
|
||||
req.send_cq_handle = send_cq->cq_handle;
|
||||
req.recv_cq_handle = recv_cq->cq_handle;
|
||||
for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++)
|
||||
req.dma_region[i] = qp->ud_qp.queues[i].gdma_region;
|
||||
req.doorbell_page = doorbell;
|
||||
req.max_send_wr = attr->cap.max_send_wr;
|
||||
req.max_recv_wr = attr->cap.max_recv_wr;
|
||||
req.max_send_sge = attr->cap.max_send_sge;
|
||||
req.max_recv_sge = attr->cap.max_recv_sge;
|
||||
req.qp_type = type;
|
||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||
if (err) {
|
||||
ibdev_err(&mdev->ib_dev, "Failed to create ud qp err %d", err);
|
||||
return err;
|
||||
}
|
||||
qp->qp_handle = resp.qp_handle;
|
||||
for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) {
|
||||
qp->ud_qp.queues[i].id = resp.queue_ids[i];
|
||||
/* The GDMA regions are now owned by the RNIC QP handle */
|
||||
qp->ud_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
|
||||
{
|
||||
struct mana_rnic_destroy_udqp_resp resp = {0};
|
||||
struct mana_rnic_destroy_udqp_req req = {0};
|
||||
struct gdma_context *gc = mdev_to_gc(mdev);
|
||||
int err;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_UD_QP, sizeof(req), sizeof(resp));
|
||||
req.hdr.dev_id = gc->mana_ib.dev_id;
|
||||
req.adapter = mdev->adapter_handle;
|
||||
req.qp_handle = qp->qp_handle;
|
||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||
if (err) {
|
||||
ibdev_err(&mdev->ib_dev, "Failed to destroy ud qp err %d", err);
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -11,8 +11,11 @@
|
||||
#include <rdma/ib_umem.h>
|
||||
#include <rdma/mana-abi.h>
|
||||
#include <rdma/uverbs_ioctl.h>
|
||||
#include <linux/dmapool.h>
|
||||
|
||||
#include <net/mana/mana.h>
|
||||
#include "shadow_queue.h"
|
||||
#include "counters.h"
|
||||
|
||||
#define PAGE_SZ_BM \
|
||||
(SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \
|
||||
@ -21,6 +24,9 @@
|
||||
/* MANA doesn't have any limit for MR size */
|
||||
#define MANA_IB_MAX_MR_SIZE U64_MAX
|
||||
|
||||
/* Send queue ID mask */
|
||||
#define MANA_SENDQ_MASK BIT(31)
|
||||
|
||||
/*
|
||||
* The hardware limit of number of MRs is greater than maximum number of MRs
|
||||
* that can possibly represent in 24 bits
|
||||
@ -32,6 +38,11 @@
|
||||
*/
|
||||
#define MANA_CA_ACK_DELAY 16
|
||||
|
||||
/*
|
||||
* The buffer used for writing AV
|
||||
*/
|
||||
#define MANA_AV_BUFFER_SIZE 64
|
||||
|
||||
struct mana_ib_adapter_caps {
|
||||
u32 max_sq_id;
|
||||
u32 max_rq_id;
|
||||
@ -48,10 +59,12 @@ struct mana_ib_adapter_caps {
|
||||
u32 max_send_sge_count;
|
||||
u32 max_recv_sge_count;
|
||||
u32 max_inline_data_size;
|
||||
u64 feature_flags;
|
||||
};
|
||||
|
||||
struct mana_ib_queue {
|
||||
struct ib_umem *umem;
|
||||
struct gdma_queue *kmem;
|
||||
u64 gdma_region;
|
||||
u64 id;
|
||||
};
|
||||
@ -64,6 +77,9 @@ struct mana_ib_dev {
|
||||
struct gdma_queue **eqs;
|
||||
struct xarray qp_table_wq;
|
||||
struct mana_ib_adapter_caps adapter_caps;
|
||||
struct dma_pool *av_pool;
|
||||
netdevice_tracker dev_tracker;
|
||||
struct notifier_block nb;
|
||||
};
|
||||
|
||||
struct mana_ib_wq {
|
||||
@ -87,6 +103,25 @@ struct mana_ib_pd {
|
||||
u32 tx_vp_offset;
|
||||
};
|
||||
|
||||
struct mana_ib_av {
|
||||
u8 dest_ip[16];
|
||||
u8 dest_mac[ETH_ALEN];
|
||||
u16 udp_src_port;
|
||||
u8 src_ip[16];
|
||||
u32 hop_limit : 8;
|
||||
u32 reserved1 : 12;
|
||||
u32 dscp : 6;
|
||||
u32 reserved2 : 5;
|
||||
u32 is_ipv6 : 1;
|
||||
u32 reserved3 : 32;
|
||||
};
|
||||
|
||||
struct mana_ib_ah {
|
||||
struct ib_ah ibah;
|
||||
struct mana_ib_av *av;
|
||||
dma_addr_t dma_handle;
|
||||
};
|
||||
|
||||
struct mana_ib_mr {
|
||||
struct ib_mr ibmr;
|
||||
struct ib_umem *umem;
|
||||
@ -96,6 +131,10 @@ struct mana_ib_mr {
|
||||
struct mana_ib_cq {
|
||||
struct ib_cq ibcq;
|
||||
struct mana_ib_queue queue;
|
||||
/* protects CQ polling */
|
||||
spinlock_t cq_lock;
|
||||
struct list_head list_send_qp;
|
||||
struct list_head list_recv_qp;
|
||||
int cqe;
|
||||
u32 comp_vector;
|
||||
mana_handle_t cq_handle;
|
||||
@ -114,6 +153,17 @@ struct mana_ib_rc_qp {
|
||||
struct mana_ib_queue queues[MANA_RC_QUEUE_TYPE_MAX];
|
||||
};
|
||||
|
||||
enum mana_ud_queue_type {
|
||||
MANA_UD_SEND_QUEUE = 0,
|
||||
MANA_UD_RECV_QUEUE,
|
||||
MANA_UD_QUEUE_TYPE_MAX,
|
||||
};
|
||||
|
||||
struct mana_ib_ud_qp {
|
||||
struct mana_ib_queue queues[MANA_UD_QUEUE_TYPE_MAX];
|
||||
u32 sq_psn;
|
||||
};
|
||||
|
||||
struct mana_ib_qp {
|
||||
struct ib_qp ibqp;
|
||||
|
||||
@ -121,11 +171,17 @@ struct mana_ib_qp {
|
||||
union {
|
||||
struct mana_ib_queue raw_sq;
|
||||
struct mana_ib_rc_qp rc_qp;
|
||||
struct mana_ib_ud_qp ud_qp;
|
||||
};
|
||||
|
||||
/* The port on the IB device, starting with 1 */
|
||||
u32 port;
|
||||
|
||||
struct list_head cq_send_list;
|
||||
struct list_head cq_recv_list;
|
||||
struct shadow_queue shadow_rq;
|
||||
struct shadow_queue shadow_sq;
|
||||
|
||||
refcount_t refcount;
|
||||
struct completion free;
|
||||
};
|
||||
@ -145,17 +201,24 @@ enum mana_ib_command_code {
|
||||
MANA_IB_DESTROY_ADAPTER = 0x30003,
|
||||
MANA_IB_CONFIG_IP_ADDR = 0x30004,
|
||||
MANA_IB_CONFIG_MAC_ADDR = 0x30005,
|
||||
MANA_IB_CREATE_UD_QP = 0x30006,
|
||||
MANA_IB_DESTROY_UD_QP = 0x30007,
|
||||
MANA_IB_CREATE_CQ = 0x30008,
|
||||
MANA_IB_DESTROY_CQ = 0x30009,
|
||||
MANA_IB_CREATE_RC_QP = 0x3000a,
|
||||
MANA_IB_DESTROY_RC_QP = 0x3000b,
|
||||
MANA_IB_SET_QP_STATE = 0x3000d,
|
||||
MANA_IB_QUERY_VF_COUNTERS = 0x30022,
|
||||
};
|
||||
|
||||
struct mana_ib_query_adapter_caps_req {
|
||||
struct gdma_req_hdr hdr;
|
||||
}; /*HW Data */
|
||||
|
||||
enum mana_ib_adapter_features {
|
||||
MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4),
|
||||
};
|
||||
|
||||
struct mana_ib_query_adapter_caps_resp {
|
||||
struct gdma_resp_hdr hdr;
|
||||
u32 max_sq_id;
|
||||
@ -176,8 +239,13 @@ struct mana_ib_query_adapter_caps_resp {
|
||||
u32 max_send_sge_count;
|
||||
u32 max_recv_sge_count;
|
||||
u32 max_inline_data_size;
|
||||
u64 feature_flags;
|
||||
}; /* HW Data */
|
||||
|
||||
enum mana_ib_adapter_features_request {
|
||||
MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST = BIT(1),
|
||||
}; /*HW Data */
|
||||
|
||||
struct mana_rnic_create_adapter_req {
|
||||
struct gdma_req_hdr hdr;
|
||||
u32 notify_eq_id;
|
||||
@ -296,6 +364,37 @@ struct mana_rnic_destroy_rc_qp_resp {
|
||||
struct gdma_resp_hdr hdr;
|
||||
}; /* HW Data */
|
||||
|
||||
struct mana_rnic_create_udqp_req {
|
||||
struct gdma_req_hdr hdr;
|
||||
mana_handle_t adapter;
|
||||
mana_handle_t pd_handle;
|
||||
mana_handle_t send_cq_handle;
|
||||
mana_handle_t recv_cq_handle;
|
||||
u64 dma_region[MANA_UD_QUEUE_TYPE_MAX];
|
||||
u32 qp_type;
|
||||
u32 doorbell_page;
|
||||
u32 max_send_wr;
|
||||
u32 max_recv_wr;
|
||||
u32 max_send_sge;
|
||||
u32 max_recv_sge;
|
||||
}; /* HW Data */
|
||||
|
||||
struct mana_rnic_create_udqp_resp {
|
||||
struct gdma_resp_hdr hdr;
|
||||
mana_handle_t qp_handle;
|
||||
u32 queue_ids[MANA_UD_QUEUE_TYPE_MAX];
|
||||
}; /* HW Data*/
|
||||
|
||||
struct mana_rnic_destroy_udqp_req {
|
||||
struct gdma_req_hdr hdr;
|
||||
mana_handle_t adapter;
|
||||
mana_handle_t qp_handle;
|
||||
}; /* HW Data */
|
||||
|
||||
struct mana_rnic_destroy_udqp_resp {
|
||||
struct gdma_resp_hdr hdr;
|
||||
}; /* HW Data */
|
||||
|
||||
struct mana_ib_ah_attr {
|
||||
u8 src_addr[16];
|
||||
u8 dest_addr[16];
|
||||
@ -332,17 +431,104 @@ struct mana_rnic_set_qp_state_resp {
|
||||
struct gdma_resp_hdr hdr;
|
||||
}; /* HW Data */
|
||||
|
||||
enum WQE_OPCODE_TYPES {
|
||||
WQE_TYPE_UD_SEND = 0,
|
||||
WQE_TYPE_UD_RECV = 8,
|
||||
}; /* HW DATA */
|
||||
|
||||
struct rdma_send_oob {
|
||||
u32 wqe_type : 5;
|
||||
u32 fence : 1;
|
||||
u32 signaled : 1;
|
||||
u32 solicited : 1;
|
||||
u32 psn : 24;
|
||||
|
||||
u32 ssn_or_rqpn : 24;
|
||||
u32 reserved1 : 8;
|
||||
union {
|
||||
struct {
|
||||
u32 remote_qkey;
|
||||
u32 immediate;
|
||||
u32 reserved1;
|
||||
u32 reserved2;
|
||||
} ud_send;
|
||||
};
|
||||
}; /* HW DATA */
|
||||
|
||||
struct mana_rdma_cqe {
|
||||
union {
|
||||
struct {
|
||||
u8 cqe_type;
|
||||
u8 data[GDMA_COMP_DATA_SIZE - 1];
|
||||
};
|
||||
struct {
|
||||
u32 cqe_type : 8;
|
||||
u32 vendor_error : 9;
|
||||
u32 reserved1 : 15;
|
||||
u32 sge_offset : 5;
|
||||
u32 tx_wqe_offset : 27;
|
||||
} ud_send;
|
||||
struct {
|
||||
u32 cqe_type : 8;
|
||||
u32 reserved1 : 24;
|
||||
u32 msg_len;
|
||||
u32 src_qpn : 24;
|
||||
u32 reserved2 : 8;
|
||||
u32 imm_data;
|
||||
u32 rx_wqe_offset;
|
||||
} ud_recv;
|
||||
};
|
||||
}; /* HW DATA */
|
||||
|
||||
struct mana_rnic_query_vf_cntrs_req {
|
||||
struct gdma_req_hdr hdr;
|
||||
mana_handle_t adapter;
|
||||
}; /* HW Data */
|
||||
|
||||
struct mana_rnic_query_vf_cntrs_resp {
|
||||
struct gdma_resp_hdr hdr;
|
||||
u64 requester_timeout;
|
||||
u64 requester_oos_nak;
|
||||
u64 requester_rnr_nak;
|
||||
u64 responder_rnr_nak;
|
||||
u64 responder_oos;
|
||||
u64 responder_dup_request;
|
||||
u64 requester_implicit_nak;
|
||||
u64 requester_readresp_psn_mismatch;
|
||||
u64 nak_inv_req;
|
||||
u64 nak_access_err;
|
||||
u64 nak_opp_err;
|
||||
u64 nak_inv_read;
|
||||
u64 responder_local_len_err;
|
||||
u64 requestor_local_prot_err;
|
||||
u64 responder_rem_access_err;
|
||||
u64 responder_local_qp_err;
|
||||
u64 responder_malformed_wqe;
|
||||
u64 general_hw_err;
|
||||
u64 requester_rnr_nak_retries_exceeded;
|
||||
u64 requester_retries_exceeded;
|
||||
u64 total_fatal_err;
|
||||
u64 received_cnps;
|
||||
u64 num_qps_congested;
|
||||
u64 rate_inc_events;
|
||||
u64 num_qps_recovered;
|
||||
u64 current_rate;
|
||||
}; /* HW Data */
|
||||
|
||||
static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
|
||||
{
|
||||
return mdev->gdma_dev->gdma_context;
|
||||
}
|
||||
|
||||
static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev,
|
||||
uint32_t qid)
|
||||
u32 qid, bool is_sq)
|
||||
{
|
||||
struct mana_ib_qp *qp;
|
||||
unsigned long flag;
|
||||
|
||||
if (is_sq)
|
||||
qid |= MANA_SENDQ_MASK;
|
||||
|
||||
xa_lock_irqsave(&mdev->qp_table_wq, flag);
|
||||
qp = xa_load(&mdev->qp_table_wq, qid);
|
||||
if (qp)
|
||||
@ -388,6 +574,8 @@ int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
|
||||
int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
|
||||
mana_handle_t gdma_region);
|
||||
|
||||
int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
|
||||
struct mana_ib_queue *queue);
|
||||
int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
|
||||
struct mana_ib_queue *queue);
|
||||
void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue);
|
||||
@ -480,4 +668,24 @@ int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
|
||||
int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
|
||||
struct ib_qp_init_attr *attr, u32 doorbell, u64 flags);
|
||||
int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
|
||||
|
||||
int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
|
||||
struct ib_qp_init_attr *attr, u32 doorbell, u32 type);
|
||||
int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
|
||||
|
||||
int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
|
||||
struct ib_udata *udata);
|
||||
int mana_ib_destroy_ah(struct ib_ah *ah, u32 flags);
|
||||
|
||||
int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
|
||||
const struct ib_recv_wr **bad_wr);
|
||||
int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
|
||||
const struct ib_send_wr **bad_wr);
|
||||
|
||||
int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
|
||||
int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
|
||||
|
||||
struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
|
||||
u64 iova, int fd, int mr_access_flags,
|
||||
struct uverbs_attr_bundle *attrs);
|
||||
#endif
|
||||
|
@ -8,6 +8,8 @@
|
||||
#define VALID_MR_FLAGS \
|
||||
(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ)
|
||||
|
||||
#define VALID_DMA_MR_FLAGS (IB_ACCESS_LOCAL_WRITE)
|
||||
|
||||
static enum gdma_mr_access_flags
|
||||
mana_ib_verbs_to_gdma_access_flags(int access_flags)
|
||||
{
|
||||
@ -39,6 +41,8 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
|
||||
req.mr_type = mr_params->mr_type;
|
||||
|
||||
switch (mr_params->mr_type) {
|
||||
case GDMA_MR_TYPE_GPA:
|
||||
break;
|
||||
case GDMA_MR_TYPE_GVA:
|
||||
req.gva.dma_region_handle = mr_params->gva.dma_region_handle;
|
||||
req.gva.virtual_address = mr_params->gva.virtual_address;
|
||||
@ -169,6 +173,107 @@ err_free:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
|
||||
u64 iova, int fd, int access_flags,
|
||||
struct uverbs_attr_bundle *attrs)
|
||||
{
|
||||
struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
|
||||
struct gdma_create_mr_params mr_params = {};
|
||||
struct ib_device *ibdev = ibpd->device;
|
||||
struct ib_umem_dmabuf *umem_dmabuf;
|
||||
struct mana_ib_dev *dev;
|
||||
struct mana_ib_mr *mr;
|
||||
u64 dma_region_handle;
|
||||
int err;
|
||||
|
||||
dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
|
||||
|
||||
access_flags &= ~IB_ACCESS_OPTIONAL;
|
||||
if (access_flags & ~VALID_MR_FLAGS)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
|
||||
if (!mr)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
umem_dmabuf = ib_umem_dmabuf_get_pinned(ibdev, start, length, fd, access_flags);
|
||||
if (IS_ERR(umem_dmabuf)) {
|
||||
err = PTR_ERR(umem_dmabuf);
|
||||
ibdev_dbg(ibdev, "Failed to get dmabuf umem, %d\n", err);
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
mr->umem = &umem_dmabuf->umem;
|
||||
|
||||
err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova);
|
||||
if (err) {
|
||||
ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
|
||||
err);
|
||||
goto err_umem;
|
||||
}
|
||||
|
||||
mr_params.pd_handle = pd->pd_handle;
|
||||
mr_params.mr_type = GDMA_MR_TYPE_GVA;
|
||||
mr_params.gva.dma_region_handle = dma_region_handle;
|
||||
mr_params.gva.virtual_address = iova;
|
||||
mr_params.gva.access_flags =
|
||||
mana_ib_verbs_to_gdma_access_flags(access_flags);
|
||||
|
||||
err = mana_ib_gd_create_mr(dev, mr, &mr_params);
|
||||
if (err)
|
||||
goto err_dma_region;
|
||||
|
||||
/*
|
||||
* There is no need to keep track of dma_region_handle after MR is
|
||||
* successfully created. The dma_region_handle is tracked in the PF
|
||||
* as part of the lifecycle of this MR.
|
||||
*/
|
||||
|
||||
return &mr->ibmr;
|
||||
|
||||
err_dma_region:
|
||||
mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle);
|
||||
|
||||
err_umem:
|
||||
ib_umem_release(mr->umem);
|
||||
|
||||
err_free:
|
||||
kfree(mr);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags)
|
||||
{
|
||||
struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
|
||||
struct gdma_create_mr_params mr_params = {};
|
||||
struct ib_device *ibdev = ibpd->device;
|
||||
struct mana_ib_dev *dev;
|
||||
struct mana_ib_mr *mr;
|
||||
int err;
|
||||
|
||||
dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
|
||||
|
||||
if (access_flags & ~VALID_DMA_MR_FLAGS)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
|
||||
if (!mr)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
mr_params.pd_handle = pd->pd_handle;
|
||||
mr_params.mr_type = GDMA_MR_TYPE_GPA;
|
||||
|
||||
err = mana_ib_gd_create_mr(dev, mr, &mr_params);
|
||||
if (err)
|
||||
goto err_free;
|
||||
|
||||
return &mr->ibmr;
|
||||
|
||||
err_free:
|
||||
kfree(mr);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
|
||||
{
|
||||
struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr);
|
||||
|
@ -398,18 +398,128 @@ err_free_vport:
|
||||
return err;
|
||||
}
|
||||
|
||||
static u32 mana_ib_wqe_size(u32 sge, u32 oob_size)
|
||||
{
|
||||
u32 wqe_size = sge * sizeof(struct gdma_sge) + sizeof(struct gdma_wqe) + oob_size;
|
||||
|
||||
return ALIGN(wqe_size, GDMA_WQE_BU_SIZE);
|
||||
}
|
||||
|
||||
static u32 mana_ib_queue_size(struct ib_qp_init_attr *attr, u32 queue_type)
|
||||
{
|
||||
u32 queue_size;
|
||||
|
||||
switch (attr->qp_type) {
|
||||
case IB_QPT_UD:
|
||||
case IB_QPT_GSI:
|
||||
if (queue_type == MANA_UD_SEND_QUEUE)
|
||||
queue_size = attr->cap.max_send_wr *
|
||||
mana_ib_wqe_size(attr->cap.max_send_sge, INLINE_OOB_LARGE_SIZE);
|
||||
else
|
||||
queue_size = attr->cap.max_recv_wr *
|
||||
mana_ib_wqe_size(attr->cap.max_recv_sge, INLINE_OOB_SMALL_SIZE);
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
return MANA_PAGE_ALIGN(roundup_pow_of_two(queue_size));
|
||||
}
|
||||
|
||||
static enum gdma_queue_type mana_ib_queue_type(struct ib_qp_init_attr *attr, u32 queue_type)
|
||||
{
|
||||
enum gdma_queue_type type;
|
||||
|
||||
switch (attr->qp_type) {
|
||||
case IB_QPT_UD:
|
||||
case IB_QPT_GSI:
|
||||
if (queue_type == MANA_UD_SEND_QUEUE)
|
||||
type = GDMA_SQ;
|
||||
else
|
||||
type = GDMA_RQ;
|
||||
break;
|
||||
default:
|
||||
type = GDMA_INVALID_QUEUE;
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
static int mana_table_store_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
|
||||
{
|
||||
return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
|
||||
GFP_KERNEL);
|
||||
}
|
||||
|
||||
static void mana_table_remove_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
|
||||
{
|
||||
xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
|
||||
}
|
||||
|
||||
static int mana_table_store_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
|
||||
{
|
||||
u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
|
||||
u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
|
||||
int err;
|
||||
|
||||
err = xa_insert_irq(&mdev->qp_table_wq, qids, qp, GFP_KERNEL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = xa_insert_irq(&mdev->qp_table_wq, qidr, qp, GFP_KERNEL);
|
||||
if (err)
|
||||
goto remove_sq;
|
||||
|
||||
return 0;
|
||||
|
||||
remove_sq:
|
||||
xa_erase_irq(&mdev->qp_table_wq, qids);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void mana_table_remove_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
|
||||
{
|
||||
u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
|
||||
u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
|
||||
|
||||
xa_erase_irq(&mdev->qp_table_wq, qids);
|
||||
xa_erase_irq(&mdev->qp_table_wq, qidr);
|
||||
}
|
||||
|
||||
static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
|
||||
{
|
||||
refcount_set(&qp->refcount, 1);
|
||||
init_completion(&qp->free);
|
||||
return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
|
||||
GFP_KERNEL);
|
||||
|
||||
switch (qp->ibqp.qp_type) {
|
||||
case IB_QPT_RC:
|
||||
return mana_table_store_rc_qp(mdev, qp);
|
||||
case IB_QPT_UD:
|
||||
case IB_QPT_GSI:
|
||||
return mana_table_store_ud_qp(mdev, qp);
|
||||
default:
|
||||
ibdev_dbg(&mdev->ib_dev, "Unknown QP type for storing in mana table, %d\n",
|
||||
qp->ibqp.qp_type);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void mana_table_remove_qp(struct mana_ib_dev *mdev,
|
||||
struct mana_ib_qp *qp)
|
||||
{
|
||||
xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
|
||||
switch (qp->ibqp.qp_type) {
|
||||
case IB_QPT_RC:
|
||||
mana_table_remove_rc_qp(mdev, qp);
|
||||
break;
|
||||
case IB_QPT_UD:
|
||||
case IB_QPT_GSI:
|
||||
mana_table_remove_ud_qp(mdev, qp);
|
||||
break;
|
||||
default:
|
||||
ibdev_dbg(&mdev->ib_dev, "Unknown QP type for removing from mana table, %d\n",
|
||||
qp->ibqp.qp_type);
|
||||
return;
|
||||
}
|
||||
mana_put_qp_ref(qp);
|
||||
wait_for_completion(&qp->free);
|
||||
}
|
||||
@ -490,6 +600,105 @@ destroy_queues:
|
||||
return err;
|
||||
}
|
||||
|
||||
static void mana_add_qp_to_cqs(struct mana_ib_qp *qp)
|
||||
{
|
||||
struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
|
||||
struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&send_cq->cq_lock, flags);
|
||||
list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
|
||||
spin_unlock_irqrestore(&send_cq->cq_lock, flags);
|
||||
|
||||
spin_lock_irqsave(&recv_cq->cq_lock, flags);
|
||||
list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
|
||||
spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
|
||||
}
|
||||
|
||||
static void mana_remove_qp_from_cqs(struct mana_ib_qp *qp)
|
||||
{
|
||||
struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
|
||||
struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&send_cq->cq_lock, flags);
|
||||
list_del(&qp->cq_send_list);
|
||||
spin_unlock_irqrestore(&send_cq->cq_lock, flags);
|
||||
|
||||
spin_lock_irqsave(&recv_cq->cq_lock, flags);
|
||||
list_del(&qp->cq_recv_list);
|
||||
spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
|
||||
}
|
||||
|
||||
static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
|
||||
struct ib_qp_init_attr *attr, struct ib_udata *udata)
|
||||
{
|
||||
struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
|
||||
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
|
||||
struct gdma_context *gc = mdev_to_gc(mdev);
|
||||
u32 doorbell, queue_size;
|
||||
int i, err;
|
||||
|
||||
if (udata) {
|
||||
ibdev_dbg(&mdev->ib_dev, "User-level UD QPs are not supported\n");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) {
|
||||
queue_size = mana_ib_queue_size(attr, i);
|
||||
err = mana_ib_create_kernel_queue(mdev, queue_size, mana_ib_queue_type(attr, i),
|
||||
&qp->ud_qp.queues[i]);
|
||||
if (err) {
|
||||
ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n",
|
||||
i, err);
|
||||
goto destroy_queues;
|
||||
}
|
||||
}
|
||||
doorbell = gc->mana_ib.doorbell;
|
||||
|
||||
err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr,
|
||||
sizeof(struct ud_rq_shadow_wqe));
|
||||
if (err) {
|
||||
ibdev_err(&mdev->ib_dev, "Failed to create shadow rq err %d\n", err);
|
||||
goto destroy_queues;
|
||||
}
|
||||
err = create_shadow_queue(&qp->shadow_sq, attr->cap.max_send_wr,
|
||||
sizeof(struct ud_sq_shadow_wqe));
|
||||
if (err) {
|
||||
ibdev_err(&mdev->ib_dev, "Failed to create shadow sq err %d\n", err);
|
||||
goto destroy_shadow_queues;
|
||||
}
|
||||
|
||||
err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doorbell, attr->qp_type);
|
||||
if (err) {
|
||||
ibdev_err(&mdev->ib_dev, "Failed to create ud qp %d\n", err);
|
||||
goto destroy_shadow_queues;
|
||||
}
|
||||
qp->ibqp.qp_num = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
|
||||
qp->port = attr->port_num;
|
||||
|
||||
for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
|
||||
qp->ud_qp.queues[i].kmem->id = qp->ud_qp.queues[i].id;
|
||||
|
||||
err = mana_table_store_qp(mdev, qp);
|
||||
if (err)
|
||||
goto destroy_qp;
|
||||
|
||||
mana_add_qp_to_cqs(qp);
|
||||
|
||||
return 0;
|
||||
|
||||
destroy_qp:
|
||||
mana_ib_gd_destroy_ud_qp(mdev, qp);
|
||||
destroy_shadow_queues:
|
||||
destroy_shadow_queue(&qp->shadow_rq);
|
||||
destroy_shadow_queue(&qp->shadow_sq);
|
||||
destroy_queues:
|
||||
while (i-- > 0)
|
||||
mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
|
||||
return err;
|
||||
}
|
||||
|
||||
int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
|
||||
struct ib_udata *udata)
|
||||
{
|
||||
@ -503,6 +712,9 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
|
||||
return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
|
||||
case IB_QPT_RC:
|
||||
return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata);
|
||||
case IB_QPT_UD:
|
||||
case IB_QPT_GSI:
|
||||
return mana_ib_create_ud_qp(ibqp, ibqp->pd, attr, udata);
|
||||
default:
|
||||
ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
|
||||
attr->qp_type);
|
||||
@ -579,6 +791,8 @@ int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
|
||||
{
|
||||
switch (ibqp->qp_type) {
|
||||
case IB_QPT_RC:
|
||||
case IB_QPT_UD:
|
||||
case IB_QPT_GSI:
|
||||
return mana_ib_gd_modify_qp(ibqp, attr, attr_mask, udata);
|
||||
default:
|
||||
ibdev_dbg(ibqp->device, "Modify QP type %u not supported", ibqp->qp_type);
|
||||
@ -652,6 +866,28 @@ static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mana_ib_destroy_ud_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
|
||||
{
|
||||
struct mana_ib_dev *mdev =
|
||||
container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
|
||||
int i;
|
||||
|
||||
mana_remove_qp_from_cqs(qp);
|
||||
mana_table_remove_qp(mdev, qp);
|
||||
|
||||
destroy_shadow_queue(&qp->shadow_rq);
|
||||
destroy_shadow_queue(&qp->shadow_sq);
|
||||
|
||||
/* Ignore return code as there is not much we can do about it.
|
||||
* The error message is printed inside.
|
||||
*/
|
||||
mana_ib_gd_destroy_ud_qp(mdev, qp);
|
||||
for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
|
||||
mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
|
||||
{
|
||||
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
|
||||
@ -665,6 +901,9 @@ int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
|
||||
return mana_ib_destroy_qp_raw(qp, udata);
|
||||
case IB_QPT_RC:
|
||||
return mana_ib_destroy_rc_qp(qp, udata);
|
||||
case IB_QPT_UD:
|
||||
case IB_QPT_GSI:
|
||||
return mana_ib_destroy_ud_qp(qp, udata);
|
||||
default:
|
||||
ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
|
||||
ibqp->qp_type);
|
||||
|
115
drivers/infiniband/hw/mana/shadow_queue.h
Normal file
115
drivers/infiniband/hw/mana/shadow_queue.h
Normal file
@ -0,0 +1,115 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
|
||||
/*
|
||||
* Copyright (c) 2024, Microsoft Corporation. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _MANA_SHADOW_QUEUE_H_
|
||||
#define _MANA_SHADOW_QUEUE_H_
|
||||
|
||||
struct shadow_wqe_header {
|
||||
u16 opcode;
|
||||
u16 error_code;
|
||||
u32 posted_wqe_size;
|
||||
u64 wr_id;
|
||||
};
|
||||
|
||||
struct ud_rq_shadow_wqe {
|
||||
struct shadow_wqe_header header;
|
||||
u32 byte_len;
|
||||
u32 src_qpn;
|
||||
};
|
||||
|
||||
struct ud_sq_shadow_wqe {
|
||||
struct shadow_wqe_header header;
|
||||
};
|
||||
|
||||
struct shadow_queue {
|
||||
/* Unmasked producer index, Incremented on wqe posting */
|
||||
u64 prod_idx;
|
||||
/* Unmasked consumer index, Incremented on cq polling */
|
||||
u64 cons_idx;
|
||||
/* Unmasked index of next-to-complete (from HW) shadow WQE */
|
||||
u64 next_to_complete_idx;
|
||||
/* queue size in wqes */
|
||||
u32 length;
|
||||
/* distance between elements in bytes */
|
||||
u32 stride;
|
||||
/* ring buffer holding wqes */
|
||||
void *buffer;
|
||||
};
|
||||
|
||||
static inline int create_shadow_queue(struct shadow_queue *queue, uint32_t length, uint32_t stride)
|
||||
{
|
||||
queue->buffer = kvmalloc_array(length, stride, GFP_KERNEL);
|
||||
if (!queue->buffer)
|
||||
return -ENOMEM;
|
||||
|
||||
queue->length = length;
|
||||
queue->stride = stride;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void destroy_shadow_queue(struct shadow_queue *queue)
|
||||
{
|
||||
kvfree(queue->buffer);
|
||||
}
|
||||
|
||||
static inline bool shadow_queue_full(struct shadow_queue *queue)
|
||||
{
|
||||
return (queue->prod_idx - queue->cons_idx) >= queue->length;
|
||||
}
|
||||
|
||||
static inline bool shadow_queue_empty(struct shadow_queue *queue)
|
||||
{
|
||||
return queue->prod_idx == queue->cons_idx;
|
||||
}
|
||||
|
||||
static inline void *
|
||||
shadow_queue_get_element(const struct shadow_queue *queue, u64 unmasked_index)
|
||||
{
|
||||
u32 index = unmasked_index % queue->length;
|
||||
|
||||
return ((u8 *)queue->buffer + index * queue->stride);
|
||||
}
|
||||
|
||||
static inline void *
|
||||
shadow_queue_producer_entry(struct shadow_queue *queue)
|
||||
{
|
||||
return shadow_queue_get_element(queue, queue->prod_idx);
|
||||
}
|
||||
|
||||
static inline void *
|
||||
shadow_queue_get_next_to_consume(const struct shadow_queue *queue)
|
||||
{
|
||||
if (queue->cons_idx == queue->next_to_complete_idx)
|
||||
return NULL;
|
||||
|
||||
return shadow_queue_get_element(queue, queue->cons_idx);
|
||||
}
|
||||
|
||||
static inline void *
|
||||
shadow_queue_get_next_to_complete(struct shadow_queue *queue)
|
||||
{
|
||||
if (queue->next_to_complete_idx == queue->prod_idx)
|
||||
return NULL;
|
||||
|
||||
return shadow_queue_get_element(queue, queue->next_to_complete_idx);
|
||||
}
|
||||
|
||||
static inline void shadow_queue_advance_producer(struct shadow_queue *queue)
|
||||
{
|
||||
queue->prod_idx++;
|
||||
}
|
||||
|
||||
static inline void shadow_queue_advance_consumer(struct shadow_queue *queue)
|
||||
{
|
||||
queue->cons_idx++;
|
||||
}
|
||||
|
||||
static inline void shadow_queue_advance_next_to_complete(struct shadow_queue *queue)
|
||||
{
|
||||
queue->next_to_complete_idx++;
|
||||
}
|
||||
|
||||
#endif
|
168
drivers/infiniband/hw/mana/wr.c
Normal file
168
drivers/infiniband/hw/mana/wr.c
Normal file
@ -0,0 +1,168 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2024, Microsoft Corporation. All rights reserved.
|
||||
*/
|
||||
|
||||
#include "mana_ib.h"
|
||||
|
||||
#define MAX_WR_SGL_NUM (2)
|
||||
|
||||
static int mana_ib_post_recv_ud(struct mana_ib_qp *qp, const struct ib_recv_wr *wr)
|
||||
{
|
||||
struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
|
||||
struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
|
||||
struct gdma_posted_wqe_info wqe_info = {0};
|
||||
struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM];
|
||||
struct gdma_wqe_request wqe_req = {0};
|
||||
struct ud_rq_shadow_wqe *shadow_wqe;
|
||||
int err, i;
|
||||
|
||||
if (shadow_queue_full(&qp->shadow_rq))
|
||||
return -EINVAL;
|
||||
|
||||
if (wr->num_sge > MAX_WR_SGL_NUM)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < wr->num_sge; ++i) {
|
||||
gdma_sgl[i].address = wr->sg_list[i].addr;
|
||||
gdma_sgl[i].mem_key = wr->sg_list[i].lkey;
|
||||
gdma_sgl[i].size = wr->sg_list[i].length;
|
||||
}
|
||||
wqe_req.num_sge = wr->num_sge;
|
||||
wqe_req.sgl = gdma_sgl;
|
||||
|
||||
err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
shadow_wqe = shadow_queue_producer_entry(&qp->shadow_rq);
|
||||
memset(shadow_wqe, 0, sizeof(*shadow_wqe));
|
||||
shadow_wqe->header.opcode = IB_WC_RECV;
|
||||
shadow_wqe->header.wr_id = wr->wr_id;
|
||||
shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
|
||||
shadow_queue_advance_producer(&qp->shadow_rq);
|
||||
|
||||
mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
|
||||
const struct ib_recv_wr **bad_wr)
|
||||
{
|
||||
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
|
||||
int err = 0;
|
||||
|
||||
for (; wr; wr = wr->next) {
|
||||
switch (ibqp->qp_type) {
|
||||
case IB_QPT_UD:
|
||||
case IB_QPT_GSI:
|
||||
err = mana_ib_post_recv_ud(qp, wr);
|
||||
if (unlikely(err)) {
|
||||
*bad_wr = wr;
|
||||
return err;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ibdev_dbg(ibqp->device, "Posting recv wr on qp type %u is not supported\n",
|
||||
ibqp->qp_type);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int mana_ib_post_send_ud(struct mana_ib_qp *qp, const struct ib_ud_wr *wr)
|
||||
{
|
||||
struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
|
||||
struct mana_ib_ah *ah = container_of(wr->ah, struct mana_ib_ah, ibah);
|
||||
struct net_device *ndev = mana_ib_get_netdev(&mdev->ib_dev, qp->port);
|
||||
struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
|
||||
struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM + 1];
|
||||
struct gdma_posted_wqe_info wqe_info = {0};
|
||||
struct gdma_wqe_request wqe_req = {0};
|
||||
struct rdma_send_oob send_oob = {0};
|
||||
struct ud_sq_shadow_wqe *shadow_wqe;
|
||||
int err, i;
|
||||
|
||||
if (!ndev) {
|
||||
ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
|
||||
qp->port, qp->ibqp.qp_num);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (wr->wr.opcode != IB_WR_SEND)
|
||||
return -EINVAL;
|
||||
|
||||
if (shadow_queue_full(&qp->shadow_sq))
|
||||
return -EINVAL;
|
||||
|
||||
if (wr->wr.num_sge > MAX_WR_SGL_NUM)
|
||||
return -EINVAL;
|
||||
|
||||
gdma_sgl[0].address = ah->dma_handle;
|
||||
gdma_sgl[0].mem_key = qp->ibqp.pd->local_dma_lkey;
|
||||
gdma_sgl[0].size = sizeof(struct mana_ib_av);
|
||||
for (i = 0; i < wr->wr.num_sge; ++i) {
|
||||
gdma_sgl[i + 1].address = wr->wr.sg_list[i].addr;
|
||||
gdma_sgl[i + 1].mem_key = wr->wr.sg_list[i].lkey;
|
||||
gdma_sgl[i + 1].size = wr->wr.sg_list[i].length;
|
||||
}
|
||||
|
||||
wqe_req.num_sge = wr->wr.num_sge + 1;
|
||||
wqe_req.sgl = gdma_sgl;
|
||||
wqe_req.inline_oob_size = sizeof(struct rdma_send_oob);
|
||||
wqe_req.inline_oob_data = &send_oob;
|
||||
wqe_req.flags = GDMA_WR_OOB_IN_SGL;
|
||||
wqe_req.client_data_unit = ib_mtu_enum_to_int(ib_mtu_int_to_enum(ndev->mtu));
|
||||
|
||||
send_oob.wqe_type = WQE_TYPE_UD_SEND;
|
||||
send_oob.fence = !!(wr->wr.send_flags & IB_SEND_FENCE);
|
||||
send_oob.signaled = !!(wr->wr.send_flags & IB_SEND_SIGNALED);
|
||||
send_oob.solicited = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
|
||||
send_oob.psn = qp->ud_qp.sq_psn;
|
||||
send_oob.ssn_or_rqpn = wr->remote_qpn;
|
||||
send_oob.ud_send.remote_qkey =
|
||||
qp->ibqp.qp_type == IB_QPT_GSI ? IB_QP1_QKEY : wr->remote_qkey;
|
||||
|
||||
err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
qp->ud_qp.sq_psn++;
|
||||
shadow_wqe = shadow_queue_producer_entry(&qp->shadow_sq);
|
||||
memset(shadow_wqe, 0, sizeof(*shadow_wqe));
|
||||
shadow_wqe->header.opcode = IB_WC_SEND;
|
||||
shadow_wqe->header.wr_id = wr->wr.wr_id;
|
||||
shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
|
||||
shadow_queue_advance_producer(&qp->shadow_sq);
|
||||
|
||||
mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
|
||||
const struct ib_send_wr **bad_wr)
|
||||
{
|
||||
int err;
|
||||
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
|
||||
|
||||
for (; wr; wr = wr->next) {
|
||||
switch (ibqp->qp_type) {
|
||||
case IB_QPT_UD:
|
||||
case IB_QPT_GSI:
|
||||
err = mana_ib_post_send_ud(qp, ud_wr(wr));
|
||||
if (unlikely(err)) {
|
||||
*bad_wr = wr;
|
||||
return err;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ibdev_dbg(ibqp->device, "Posting send wr on qp type %u is not supported\n",
|
||||
ibqp->qp_type);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
@ -9,6 +9,7 @@ mlx5_ib-y := ah.o \
|
||||
data_direct.o \
|
||||
dm.o \
|
||||
doorbell.o \
|
||||
fs.o \
|
||||
gsi.o \
|
||||
ib_virt.o \
|
||||
mad.o \
|
||||
@ -26,7 +27,6 @@ mlx5_ib-y := ah.o \
|
||||
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
|
||||
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
|
||||
mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \
|
||||
fs.o \
|
||||
qos.o \
|
||||
std_types.o
|
||||
mlx5_ib-$(CONFIG_MLX5_MACSEC) += macsec.o
|
||||
|
@ -140,6 +140,13 @@ static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
|
||||
INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
|
||||
};
|
||||
|
||||
static const struct mlx5_ib_counter packets_op_cnts[] = {
|
||||
INIT_OP_COUNTER(rdma_tx_packets, RDMA_TX_PACKETS),
|
||||
INIT_OP_COUNTER(rdma_tx_bytes, RDMA_TX_BYTES),
|
||||
INIT_OP_COUNTER(rdma_rx_packets, RDMA_RX_PACKETS),
|
||||
INIT_OP_COUNTER(rdma_rx_bytes, RDMA_RX_BYTES),
|
||||
};
|
||||
|
||||
static int mlx5_ib_read_counters(struct ib_counters *counters,
|
||||
struct ib_counters_read_attr *read_attr,
|
||||
struct uverbs_attr_bundle *attrs)
|
||||
@ -427,6 +434,52 @@ done:
|
||||
return num_counters;
|
||||
}
|
||||
|
||||
static bool is_rdma_bytes_counter(u32 type)
|
||||
{
|
||||
if (type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES ||
|
||||
type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES ||
|
||||
type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP ||
|
||||
type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int do_per_qp_get_op_stat(struct rdma_counter *counter)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = to_mdev(counter->device);
|
||||
const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
|
||||
struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
|
||||
int i, ret, index, num_hw_counters;
|
||||
u64 packets = 0, bytes = 0;
|
||||
|
||||
for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
|
||||
i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
|
||||
if (!mcounter->fc[i])
|
||||
continue;
|
||||
|
||||
ret = mlx5_fc_query(dev->mdev, mcounter->fc[i],
|
||||
&packets, &bytes);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
num_hw_counters = cnts->num_q_counters +
|
||||
cnts->num_cong_counters +
|
||||
cnts->num_ext_ppcnt_counters;
|
||||
|
||||
index = i - MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP +
|
||||
num_hw_counters;
|
||||
|
||||
if (is_rdma_bytes_counter(i))
|
||||
counter->stats->value[index] = bytes;
|
||||
else
|
||||
counter->stats->value[index] = packets;
|
||||
|
||||
clear_bit(index, counter->stats->is_disabled);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int do_get_op_stat(struct ib_device *ibdev,
|
||||
struct rdma_hw_stats *stats,
|
||||
u32 port_num, int index)
|
||||
@ -434,7 +487,7 @@ static int do_get_op_stat(struct ib_device *ibdev,
|
||||
struct mlx5_ib_dev *dev = to_mdev(ibdev);
|
||||
const struct mlx5_ib_counters *cnts;
|
||||
const struct mlx5_ib_op_fc *opfcs;
|
||||
u64 packets = 0, bytes;
|
||||
u64 packets, bytes;
|
||||
u32 type;
|
||||
int ret;
|
||||
|
||||
@ -453,8 +506,11 @@ static int do_get_op_stat(struct ib_device *ibdev,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (is_rdma_bytes_counter(type))
|
||||
stats->value[index] = bytes;
|
||||
else
|
||||
stats->value[index] = packets;
|
||||
out:
|
||||
stats->value[index] = packets;
|
||||
return index;
|
||||
}
|
||||
|
||||
@ -523,19 +579,30 @@ static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = to_mdev(counter->device);
|
||||
const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
|
||||
int ret;
|
||||
|
||||
return mlx5_ib_query_q_counters(dev->mdev, cnts,
|
||||
counter->stats, counter->id);
|
||||
ret = mlx5_ib_query_q_counters(dev->mdev, cnts, counter->stats,
|
||||
counter->id);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!counter->mode.bind_opcnt)
|
||||
return 0;
|
||||
|
||||
return do_per_qp_get_op_stat(counter);
|
||||
}
|
||||
|
||||
static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
|
||||
{
|
||||
struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
|
||||
struct mlx5_ib_dev *dev = to_mdev(counter->device);
|
||||
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
|
||||
|
||||
if (!counter->id)
|
||||
return 0;
|
||||
|
||||
WARN_ON(!xa_empty(&mcounter->qpn_opfc_xa));
|
||||
mlx5r_fs_destroy_fcs(dev, counter);
|
||||
MLX5_SET(dealloc_q_counter_in, in, opcode,
|
||||
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
|
||||
MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
|
||||
@ -543,7 +610,7 @@ static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
|
||||
}
|
||||
|
||||
static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
|
||||
struct ib_qp *qp)
|
||||
struct ib_qp *qp, u32 port)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = to_mdev(qp->device);
|
||||
bool new = false;
|
||||
@ -568,8 +635,14 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
|
||||
if (err)
|
||||
goto fail_set_counter;
|
||||
|
||||
err = mlx5r_fs_bind_op_fc(qp, counter, port);
|
||||
if (err)
|
||||
goto fail_bind_op_fc;
|
||||
|
||||
return 0;
|
||||
|
||||
fail_bind_op_fc:
|
||||
mlx5_ib_qp_set_counter(qp, NULL);
|
||||
fail_set_counter:
|
||||
if (new) {
|
||||
mlx5_ib_counter_dealloc(counter);
|
||||
@ -579,9 +652,22 @@ fail_set_counter:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
|
||||
static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp, u32 port)
|
||||
{
|
||||
return mlx5_ib_qp_set_counter(qp, NULL);
|
||||
struct rdma_counter *counter = qp->counter;
|
||||
int err;
|
||||
|
||||
mlx5r_fs_unbind_op_fc(qp, counter);
|
||||
|
||||
err = mlx5_ib_qp_set_counter(qp, NULL);
|
||||
if (err)
|
||||
goto fail_set_counter;
|
||||
|
||||
return 0;
|
||||
|
||||
fail_set_counter:
|
||||
mlx5r_fs_bind_op_fc(qp, counter, port);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
|
||||
@ -681,6 +767,12 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
|
||||
descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(packets_op_cnts); i++, j++) {
|
||||
descs[j].name = packets_op_cnts[i].name;
|
||||
descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
|
||||
descs[j].priv = &packets_op_cnts[i].type;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -731,6 +823,8 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
|
||||
|
||||
num_op_counters = ARRAY_SIZE(basic_op_cnts);
|
||||
|
||||
num_op_counters += ARRAY_SIZE(packets_op_cnts);
|
||||
|
||||
if (MLX5_CAP_FLOWTABLE(dev->mdev,
|
||||
ft_field_support_2_nic_receive_rdma.bth_opcode))
|
||||
num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
|
||||
@ -760,10 +854,58 @@ err:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks if the given flow counter type should be sharing the same flow counter
|
||||
* with another type and if it should, checks if that other type flow counter
|
||||
* was already created, if both conditions are met return true and the counter
|
||||
* else return false.
|
||||
*/
|
||||
bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type,
|
||||
struct mlx5_ib_op_fc **opfc)
|
||||
{
|
||||
u32 shared_fc_type;
|
||||
|
||||
switch (type) {
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
|
||||
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
|
||||
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
|
||||
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
|
||||
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
|
||||
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
|
||||
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
|
||||
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
|
||||
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
*opfc = &opfcs[shared_fc_type];
|
||||
if (!(*opfc)->fc)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
|
||||
int num_cnt_ports = dev->num_ports;
|
||||
struct mlx5_ib_op_fc *in_use_opfc;
|
||||
int i, j;
|
||||
|
||||
if (is_mdev_switchdev_mode(dev->mdev))
|
||||
@ -785,11 +927,15 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
|
||||
if (!dev->port[i].cnts.opfcs[j].fc)
|
||||
continue;
|
||||
|
||||
if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
|
||||
mlx5_ib_fs_remove_op_fc(dev,
|
||||
&dev->port[i].cnts.opfcs[j], j);
|
||||
if (mlx5r_is_opfc_shared_and_in_use(
|
||||
dev->port[i].cnts.opfcs, j, &in_use_opfc))
|
||||
goto skip;
|
||||
|
||||
mlx5_ib_fs_remove_op_fc(dev,
|
||||
&dev->port[i].cnts.opfcs[j], j);
|
||||
mlx5_fc_destroy(dev->mdev,
|
||||
dev->port[i].cnts.opfcs[j].fc);
|
||||
skip:
|
||||
dev->port[i].cnts.opfcs[j].fc = NULL;
|
||||
}
|
||||
}
|
||||
@ -983,8 +1129,8 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
|
||||
unsigned int index, bool enable)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = to_mdev(device);
|
||||
struct mlx5_ib_op_fc *opfc, *in_use_opfc;
|
||||
struct mlx5_ib_counters *cnts;
|
||||
struct mlx5_ib_op_fc *opfc;
|
||||
u32 num_hw_counters, type;
|
||||
int ret;
|
||||
|
||||
@ -1008,6 +1154,13 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
|
||||
if (opfc->fc)
|
||||
return -EEXIST;
|
||||
|
||||
if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type,
|
||||
&in_use_opfc)) {
|
||||
opfc->fc = in_use_opfc->fc;
|
||||
opfc->rule[0] = in_use_opfc->rule[0];
|
||||
return 0;
|
||||
}
|
||||
|
||||
opfc->fc = mlx5_fc_create(dev->mdev, false);
|
||||
if (IS_ERR(opfc->fc))
|
||||
return PTR_ERR(opfc->fc);
|
||||
@ -1023,12 +1176,23 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
|
||||
if (!opfc->fc)
|
||||
return -EINVAL;
|
||||
|
||||
if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, &in_use_opfc))
|
||||
goto out;
|
||||
|
||||
mlx5_ib_fs_remove_op_fc(dev, opfc, type);
|
||||
mlx5_fc_destroy(dev->mdev, opfc->fc);
|
||||
out:
|
||||
opfc->fc = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mlx5_ib_counter_init(struct rdma_counter *counter)
|
||||
{
|
||||
struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
|
||||
|
||||
xa_init(&mcounter->qpn_opfc_xa);
|
||||
}
|
||||
|
||||
static const struct ib_device_ops hw_stats_ops = {
|
||||
.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
|
||||
.get_hw_stats = mlx5_ib_get_hw_stats,
|
||||
@ -1037,8 +1201,10 @@ static const struct ib_device_ops hw_stats_ops = {
|
||||
.counter_dealloc = mlx5_ib_counter_dealloc,
|
||||
.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
|
||||
.counter_update_stats = mlx5_ib_counter_update_stats,
|
||||
.modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
|
||||
mlx5_ib_modify_stat : NULL,
|
||||
.modify_hw_stat = mlx5_ib_modify_stat,
|
||||
.counter_init = mlx5_ib_counter_init,
|
||||
|
||||
INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
|
||||
};
|
||||
|
||||
static const struct ib_device_ops hw_switchdev_vport_op = {
|
||||
@ -1053,6 +1219,9 @@ static const struct ib_device_ops hw_switchdev_stats_ops = {
|
||||
.counter_dealloc = mlx5_ib_counter_dealloc,
|
||||
.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
|
||||
.counter_update_stats = mlx5_ib_counter_update_stats,
|
||||
.counter_init = mlx5_ib_counter_init,
|
||||
|
||||
INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
|
||||
};
|
||||
|
||||
static const struct ib_device_ops counters_ops = {
|
||||
|
@ -8,10 +8,25 @@
|
||||
|
||||
#include "mlx5_ib.h"
|
||||
|
||||
struct mlx5_rdma_counter {
|
||||
struct rdma_counter rdma_counter;
|
||||
|
||||
struct mlx5_fc *fc[MLX5_IB_OPCOUNTER_MAX];
|
||||
struct xarray qpn_opfc_xa;
|
||||
};
|
||||
|
||||
static inline struct mlx5_rdma_counter *
|
||||
to_mcounter(struct rdma_counter *counter)
|
||||
{
|
||||
return container_of(counter, struct mlx5_rdma_counter, rdma_counter);
|
||||
}
|
||||
|
||||
int mlx5_ib_counters_init(struct mlx5_ib_dev *dev);
|
||||
void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev);
|
||||
void mlx5_ib_counters_clear_description(struct ib_counters *counters);
|
||||
int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
|
||||
struct mlx5_ib_create_flow *ucmd);
|
||||
u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num);
|
||||
bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type,
|
||||
struct mlx5_ib_op_fc **opfc);
|
||||
#endif /* _MLX5_IB_COUNTERS_H */
|
||||
|
@ -490,7 +490,7 @@ repoll:
|
||||
}
|
||||
|
||||
qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
|
||||
if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
|
||||
if (!*cur_qp || (qpn != (*cur_qp)->trans_qp.base.mqp.qpn)) {
|
||||
/* We do not have to take the QP table lock here,
|
||||
* because CQs will be locked while QPs are removed
|
||||
* from the table.
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <rdma/uverbs_std_types.h>
|
||||
#include <linux/mlx5/driver.h>
|
||||
#include <linux/mlx5/fs.h>
|
||||
#include <rdma/ib_ucaps.h>
|
||||
#include "mlx5_ib.h"
|
||||
#include "devx.h"
|
||||
#include "qp.h"
|
||||
@ -122,7 +123,27 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
|
||||
return to_mucontext(ib_uverbs_get_ucontext(attrs));
|
||||
}
|
||||
|
||||
int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
|
||||
static int set_uctx_ucaps(struct mlx5_ib_dev *dev, u64 req_ucaps, u32 *cap)
|
||||
{
|
||||
if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_LOCAL)) {
|
||||
if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
|
||||
*cap |= MLX5_UCTX_CAP_RDMA_CTRL;
|
||||
else
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA)) {
|
||||
if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
|
||||
MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA)
|
||||
*cap |= MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA;
|
||||
else
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps)
|
||||
{
|
||||
u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {};
|
||||
u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {};
|
||||
@ -136,14 +157,22 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
|
||||
return -EINVAL;
|
||||
|
||||
uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
|
||||
if (is_user && capable(CAP_NET_RAW) &&
|
||||
(MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
|
||||
if (is_user &&
|
||||
(MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX) &&
|
||||
capable(CAP_NET_RAW))
|
||||
cap |= MLX5_UCTX_CAP_RAW_TX;
|
||||
if (is_user && capable(CAP_SYS_RAWIO) &&
|
||||
if (is_user &&
|
||||
(MLX5_CAP_GEN(dev->mdev, uctx_cap) &
|
||||
MLX5_UCTX_CAP_INTERNAL_DEV_RES))
|
||||
MLX5_UCTX_CAP_INTERNAL_DEV_RES) &&
|
||||
capable(CAP_SYS_RAWIO))
|
||||
cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES;
|
||||
|
||||
if (req_ucaps) {
|
||||
err = set_uctx_ucaps(dev, req_ucaps, &cap);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
|
||||
MLX5_SET(uctx, uctx, cap, cap);
|
||||
|
||||
@ -2573,7 +2602,7 @@ int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
|
||||
struct mlx5_devx_event_table *table = &dev->devx_event_table;
|
||||
int uid;
|
||||
|
||||
uid = mlx5_ib_devx_create(dev, false);
|
||||
uid = mlx5_ib_devx_create(dev, false, 0);
|
||||
if (uid > 0) {
|
||||
dev->devx_whitelist_uid = uid;
|
||||
xa_init(&table->event_xa);
|
||||
|
@ -24,13 +24,14 @@ struct devx_obj {
|
||||
struct list_head event_sub; /* holds devx_event_subscription entries */
|
||||
};
|
||||
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
|
||||
int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
|
||||
int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps);
|
||||
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
|
||||
int mlx5_ib_devx_init(struct mlx5_ib_dev *dev);
|
||||
void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev);
|
||||
void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile);
|
||||
#else
|
||||
static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
|
||||
static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user,
|
||||
u64 req_ucaps)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <rdma/mlx5_user_ioctl_verbs.h>
|
||||
#include <rdma/ib_hdrs.h>
|
||||
#include <rdma/ib_umem.h>
|
||||
#include <rdma/ib_ucaps.h>
|
||||
#include <linux/mlx5/driver.h>
|
||||
#include <linux/mlx5/fs.h>
|
||||
#include <linux/mlx5/fs_helpers.h>
|
||||
@ -32,6 +33,11 @@ enum {
|
||||
MATCH_CRITERIA_ENABLE_MISC2_BIT
|
||||
};
|
||||
|
||||
|
||||
struct mlx5_per_qp_opfc {
|
||||
struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX];
|
||||
};
|
||||
|
||||
#define HEADER_IS_ZERO(match_criteria, headers) \
|
||||
!(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
|
||||
0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
|
||||
@ -678,7 +684,7 @@ enum flow_table_type {
|
||||
#define MLX5_FS_MAX_TYPES 6
|
||||
#define MLX5_FS_MAX_ENTRIES BIT(16)
|
||||
|
||||
static bool mlx5_ib_shared_ft_allowed(struct ib_device *device)
|
||||
static bool __maybe_unused mlx5_ib_shared_ft_allowed(struct ib_device *device)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = to_mdev(device);
|
||||
|
||||
@ -690,7 +696,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_ib_flow_prio *prio,
|
||||
int priority,
|
||||
int num_entries, int num_groups,
|
||||
u32 flags)
|
||||
u32 flags, u16 vport)
|
||||
{
|
||||
struct mlx5_flow_table_attr ft_attr = {};
|
||||
struct mlx5_flow_table *ft;
|
||||
@ -698,6 +704,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
|
||||
ft_attr.prio = priority;
|
||||
ft_attr.max_fte = num_entries;
|
||||
ft_attr.flags = flags;
|
||||
ft_attr.vport = vport;
|
||||
ft_attr.autogroup.max_num_groups = num_groups;
|
||||
ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
|
||||
if (IS_ERR(ft))
|
||||
@ -792,18 +799,25 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
|
||||
ft = prio->flow_table;
|
||||
if (!ft)
|
||||
return _get_prio(dev, ns, prio, priority, max_table_size,
|
||||
num_groups, flags);
|
||||
num_groups, flags, 0);
|
||||
|
||||
return prio;
|
||||
}
|
||||
|
||||
enum {
|
||||
RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO,
|
||||
RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO,
|
||||
RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
|
||||
RDMA_RX_ECN_OPCOUNTER_PRIO,
|
||||
RDMA_RX_CNP_OPCOUNTER_PRIO,
|
||||
RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO,
|
||||
};
|
||||
|
||||
enum {
|
||||
RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO,
|
||||
RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
|
||||
RDMA_TX_CNP_OPCOUNTER_PRIO,
|
||||
RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO,
|
||||
};
|
||||
|
||||
static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
|
||||
@ -867,6 +881,344 @@ static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Returns the prio we should use for the given optional counter type,
|
||||
* whereas for bytes type we use the packet type, since they share the same
|
||||
* resources.
|
||||
*/
|
||||
static struct mlx5_ib_flow_prio *get_opfc_prio(struct mlx5_ib_dev *dev,
|
||||
u32 type)
|
||||
{
|
||||
u32 prio_type;
|
||||
|
||||
switch (type) {
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
|
||||
prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
|
||||
prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
|
||||
prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
|
||||
prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
|
||||
break;
|
||||
default:
|
||||
prio_type = type;
|
||||
}
|
||||
|
||||
return &dev->flow_db->opfcs[prio_type];
|
||||
}
|
||||
|
||||
static void put_per_qp_prio(struct mlx5_ib_dev *dev,
|
||||
enum mlx5_ib_optional_counter_type type)
|
||||
{
|
||||
enum mlx5_ib_optional_counter_type per_qp_type;
|
||||
struct mlx5_ib_flow_prio *prio;
|
||||
|
||||
switch (type) {
|
||||
case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
prio = get_opfc_prio(dev, per_qp_type);
|
||||
put_flow_table(dev, prio, true);
|
||||
}
|
||||
|
||||
static int get_per_qp_prio(struct mlx5_ib_dev *dev,
|
||||
enum mlx5_ib_optional_counter_type type)
|
||||
{
|
||||
enum mlx5_ib_optional_counter_type per_qp_type;
|
||||
enum mlx5_flow_namespace_type fn_type;
|
||||
struct mlx5_flow_namespace *ns;
|
||||
struct mlx5_ib_flow_prio *prio;
|
||||
int priority;
|
||||
|
||||
switch (type) {
|
||||
case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
|
||||
fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
|
||||
priority = RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO;
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
|
||||
fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
|
||||
priority = RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO;
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
|
||||
fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
|
||||
priority = RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO;
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
|
||||
fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
|
||||
priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
|
||||
fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
|
||||
priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
|
||||
fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
|
||||
priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
|
||||
fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
|
||||
priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
|
||||
per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
|
||||
if (!ns)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
prio = get_opfc_prio(dev, per_qp_type);
|
||||
if (prio->flow_table)
|
||||
return 0;
|
||||
|
||||
prio = _get_prio(dev, ns, prio, priority, MLX5_FS_MAX_POOL_SIZE, 1, 0, 0);
|
||||
if (IS_ERR(prio))
|
||||
return PTR_ERR(prio);
|
||||
|
||||
prio->refcount = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct mlx5_per_qp_opfc *
|
||||
get_per_qp_opfc(struct mlx5_rdma_counter *mcounter, u32 qp_num, bool *new)
|
||||
{
|
||||
struct mlx5_per_qp_opfc *per_qp_opfc;
|
||||
|
||||
*new = false;
|
||||
|
||||
per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp_num);
|
||||
if (per_qp_opfc)
|
||||
return per_qp_opfc;
|
||||
per_qp_opfc = kzalloc(sizeof(*per_qp_opfc), GFP_KERNEL);
|
||||
|
||||
if (!per_qp_opfc)
|
||||
return NULL;
|
||||
|
||||
*new = true;
|
||||
return per_qp_opfc;
|
||||
}
|
||||
|
||||
static int add_op_fc_rules(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_rdma_counter *mcounter,
|
||||
struct mlx5_per_qp_opfc *per_qp_opfc,
|
||||
struct mlx5_ib_flow_prio *prio,
|
||||
enum mlx5_ib_optional_counter_type type,
|
||||
u32 qp_num, u32 port_num)
|
||||
{
|
||||
struct mlx5_ib_op_fc *opfc = &per_qp_opfc->opfcs[type], *in_use_opfc;
|
||||
struct mlx5_flow_act flow_act = {};
|
||||
struct mlx5_flow_destination dst;
|
||||
struct mlx5_flow_spec *spec;
|
||||
int i, err, spec_num;
|
||||
bool is_tx;
|
||||
|
||||
if (opfc->fc)
|
||||
return -EEXIST;
|
||||
|
||||
if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, type,
|
||||
&in_use_opfc)) {
|
||||
opfc->fc = in_use_opfc->fc;
|
||||
opfc->rule[0] = in_use_opfc->rule[0];
|
||||
return 0;
|
||||
}
|
||||
|
||||
opfc->fc = mcounter->fc[type];
|
||||
|
||||
spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
|
||||
if (!spec) {
|
||||
err = -ENOMEM;
|
||||
goto null_fc;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP:
|
||||
if (set_ecn_ce_spec(dev, port_num, &spec[0],
|
||||
MLX5_FS_IPV4_VERSION) ||
|
||||
set_ecn_ce_spec(dev, port_num, &spec[1],
|
||||
MLX5_FS_IPV6_VERSION)) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto free_spec;
|
||||
}
|
||||
spec_num = 2;
|
||||
is_tx = false;
|
||||
|
||||
MLX5_SET_TO_ONES(fte_match_param, spec[1].match_criteria,
|
||||
misc_parameters.bth_dst_qp);
|
||||
MLX5_SET(fte_match_param, spec[1].match_value,
|
||||
misc_parameters.bth_dst_qp, qp_num);
|
||||
spec[1].match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP:
|
||||
if (!MLX5_CAP_FLOWTABLE(
|
||||
dev->mdev,
|
||||
ft_field_support_2_nic_receive_rdma.bth_opcode) ||
|
||||
set_cnp_spec(dev, port_num, &spec[0])) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto free_spec;
|
||||
}
|
||||
spec_num = 1;
|
||||
is_tx = false;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP:
|
||||
if (!MLX5_CAP_FLOWTABLE(
|
||||
dev->mdev,
|
||||
ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
|
||||
set_cnp_spec(dev, port_num, &spec[0])) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto free_spec;
|
||||
}
|
||||
spec_num = 1;
|
||||
is_tx = true;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
|
||||
spec_num = 1;
|
||||
is_tx = true;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
|
||||
spec_num = 1;
|
||||
is_tx = false;
|
||||
break;
|
||||
default:
|
||||
err = -EINVAL;
|
||||
goto free_spec;
|
||||
}
|
||||
|
||||
if (is_tx) {
|
||||
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
|
||||
misc_parameters.source_sqn);
|
||||
MLX5_SET(fte_match_param, spec->match_value,
|
||||
misc_parameters.source_sqn, qp_num);
|
||||
} else {
|
||||
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
|
||||
misc_parameters.bth_dst_qp);
|
||||
MLX5_SET(fte_match_param, spec->match_value,
|
||||
misc_parameters.bth_dst_qp, qp_num);
|
||||
}
|
||||
|
||||
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
|
||||
|
||||
dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
|
||||
dst.counter = opfc->fc;
|
||||
|
||||
flow_act.action =
|
||||
MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
|
||||
|
||||
for (i = 0; i < spec_num; i++) {
|
||||
opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
|
||||
&flow_act, &dst, 1);
|
||||
if (IS_ERR(opfc->rule[i])) {
|
||||
err = PTR_ERR(opfc->rule[i]);
|
||||
goto del_rules;
|
||||
}
|
||||
}
|
||||
prio->refcount += spec_num;
|
||||
|
||||
err = xa_err(xa_store(&mcounter->qpn_opfc_xa, qp_num, per_qp_opfc,
|
||||
GFP_KERNEL));
|
||||
if (err)
|
||||
goto del_rules;
|
||||
|
||||
kfree(spec);
|
||||
|
||||
return 0;
|
||||
|
||||
del_rules:
|
||||
while (i--)
|
||||
mlx5_del_flow_rules(opfc->rule[i]);
|
||||
put_flow_table(dev, prio, false);
|
||||
free_spec:
|
||||
kfree(spec);
|
||||
null_fc:
|
||||
opfc->fc = NULL;
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool is_fc_shared_and_in_use(struct mlx5_rdma_counter *mcounter,
|
||||
u32 type, struct mlx5_fc **fc)
|
||||
{
|
||||
u32 shared_fc_type;
|
||||
|
||||
switch (type) {
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
|
||||
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
|
||||
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
|
||||
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
|
||||
break;
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
|
||||
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
*fc = mcounter->fc[shared_fc_type];
|
||||
if (!(*fc))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
|
||||
struct rdma_counter *counter)
|
||||
{
|
||||
struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
|
||||
struct mlx5_fc *in_use_fc;
|
||||
int i;
|
||||
|
||||
for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
|
||||
i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
|
||||
if (!mcounter->fc[i])
|
||||
continue;
|
||||
|
||||
if (is_fc_shared_and_in_use(mcounter, i, &in_use_fc)) {
|
||||
mcounter->fc[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
mlx5_fc_destroy(dev->mdev, mcounter->fc[i]);
|
||||
mcounter->fc[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
|
||||
struct mlx5_ib_op_fc *opfc,
|
||||
enum mlx5_ib_optional_counter_type type)
|
||||
@ -921,6 +1273,20 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
|
||||
priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
|
||||
break;
|
||||
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
|
||||
case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
|
||||
spec_num = 1;
|
||||
fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
|
||||
priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO;
|
||||
break;
|
||||
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
|
||||
case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
|
||||
spec_num = 1;
|
||||
fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
|
||||
priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO;
|
||||
break;
|
||||
|
||||
default:
|
||||
err = -EOPNOTSUPP;
|
||||
goto free;
|
||||
@ -932,13 +1298,17 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
|
||||
goto free;
|
||||
}
|
||||
|
||||
prio = &dev->flow_db->opfcs[type];
|
||||
prio = get_opfc_prio(dev, type);
|
||||
if (!prio->flow_table) {
|
||||
err = get_per_qp_prio(dev, type);
|
||||
if (err)
|
||||
goto free;
|
||||
|
||||
prio = _get_prio(dev, ns, prio, priority,
|
||||
dev->num_ports * MAX_OPFC_RULES, 1, 0);
|
||||
dev->num_ports * MAX_OPFC_RULES, 1, 0, 0);
|
||||
if (IS_ERR(prio)) {
|
||||
err = PTR_ERR(prio);
|
||||
goto free;
|
||||
goto put_prio;
|
||||
}
|
||||
}
|
||||
|
||||
@ -965,6 +1335,8 @@ del_rules:
|
||||
for (i -= 1; i >= 0; i--)
|
||||
mlx5_del_flow_rules(opfc->rule[i]);
|
||||
put_flow_table(dev, prio, false);
|
||||
put_prio:
|
||||
put_per_qp_prio(dev, type);
|
||||
free:
|
||||
kfree(spec);
|
||||
return err;
|
||||
@ -974,12 +1346,115 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_ib_op_fc *opfc,
|
||||
enum mlx5_ib_optional_counter_type type)
|
||||
{
|
||||
struct mlx5_ib_flow_prio *prio;
|
||||
int i;
|
||||
|
||||
prio = get_opfc_prio(dev, type);
|
||||
|
||||
for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
|
||||
mlx5_del_flow_rules(opfc->rule[i]);
|
||||
put_flow_table(dev, &dev->flow_db->opfcs[type], true);
|
||||
put_flow_table(dev, prio, true);
|
||||
}
|
||||
|
||||
put_per_qp_prio(dev, type);
|
||||
}
|
||||
|
||||
void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter)
|
||||
{
|
||||
struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
|
||||
struct mlx5_ib_dev *dev = to_mdev(counter->device);
|
||||
struct mlx5_per_qp_opfc *per_qp_opfc;
|
||||
struct mlx5_ib_op_fc *in_use_opfc;
|
||||
struct mlx5_ib_flow_prio *prio;
|
||||
int i, j;
|
||||
|
||||
per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp->qp_num);
|
||||
if (!per_qp_opfc)
|
||||
return;
|
||||
|
||||
for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
|
||||
i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
|
||||
if (!per_qp_opfc->opfcs[i].fc)
|
||||
continue;
|
||||
|
||||
if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, i,
|
||||
&in_use_opfc)) {
|
||||
per_qp_opfc->opfcs[i].fc = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (j = 0; j < MAX_OPFC_RULES; j++) {
|
||||
if (!per_qp_opfc->opfcs[i].rule[j])
|
||||
continue;
|
||||
mlx5_del_flow_rules(per_qp_opfc->opfcs[i].rule[j]);
|
||||
prio = get_opfc_prio(dev, i);
|
||||
put_flow_table(dev, prio, true);
|
||||
}
|
||||
per_qp_opfc->opfcs[i].fc = NULL;
|
||||
}
|
||||
|
||||
kfree(per_qp_opfc);
|
||||
xa_erase(&mcounter->qpn_opfc_xa, qp->qp_num);
|
||||
}
|
||||
|
||||
int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
|
||||
u32 port)
|
||||
{
|
||||
struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
|
||||
struct mlx5_ib_dev *dev = to_mdev(qp->device);
|
||||
struct mlx5_per_qp_opfc *per_qp_opfc;
|
||||
struct mlx5_ib_flow_prio *prio;
|
||||
struct mlx5_ib_counters *cnts;
|
||||
struct mlx5_ib_op_fc *opfc;
|
||||
struct mlx5_fc *in_use_fc;
|
||||
int i, err, per_qp_type;
|
||||
bool new;
|
||||
|
||||
if (!counter->mode.bind_opcnt)
|
||||
return 0;
|
||||
|
||||
cnts = &dev->port[port - 1].cnts;
|
||||
|
||||
for (i = 0; i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES; i++) {
|
||||
opfc = &cnts->opfcs[i];
|
||||
if (!opfc->fc)
|
||||
continue;
|
||||
|
||||
per_qp_type = i + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
|
||||
prio = get_opfc_prio(dev, per_qp_type);
|
||||
WARN_ON(!prio->flow_table);
|
||||
|
||||
if (is_fc_shared_and_in_use(mcounter, per_qp_type, &in_use_fc))
|
||||
mcounter->fc[per_qp_type] = in_use_fc;
|
||||
|
||||
if (!mcounter->fc[per_qp_type]) {
|
||||
mcounter->fc[per_qp_type] = mlx5_fc_create(dev->mdev,
|
||||
false);
|
||||
if (IS_ERR(mcounter->fc[per_qp_type]))
|
||||
return PTR_ERR(mcounter->fc[per_qp_type]);
|
||||
}
|
||||
|
||||
per_qp_opfc = get_per_qp_opfc(mcounter, qp->qp_num, &new);
|
||||
if (!per_qp_opfc) {
|
||||
err = -ENOMEM;
|
||||
goto free_fc;
|
||||
}
|
||||
err = add_op_fc_rules(dev, mcounter, per_qp_opfc, prio,
|
||||
per_qp_type, qp->qp_num, port);
|
||||
if (err)
|
||||
goto del_rules;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
del_rules:
|
||||
mlx5r_fs_unbind_op_fc(qp, counter);
|
||||
if (new)
|
||||
kfree(per_qp_opfc);
|
||||
free_fc:
|
||||
if (xa_empty(&mcounter->qpn_opfc_xa))
|
||||
mlx5r_fs_destroy_fcs(dev, counter);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void set_underlay_qp(struct mlx5_ib_dev *dev,
|
||||
@ -1413,17 +1888,51 @@ free_ucmd:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
|
||||
enum mlx5_flow_namespace_type type,
|
||||
u32 *flags, u16 *vport_idx,
|
||||
u16 *vport,
|
||||
struct mlx5_core_dev **ft_mdev,
|
||||
u32 ib_port)
|
||||
{
|
||||
struct mlx5_core_dev *esw_mdev;
|
||||
|
||||
if (!is_mdev_switchdev_mode(dev->mdev))
|
||||
return 0;
|
||||
|
||||
if (!MLX5_CAP_ADV_RDMA(dev->mdev, rdma_transport_manager))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!dev->port[ib_port - 1].rep)
|
||||
return -EINVAL;
|
||||
|
||||
esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw);
|
||||
if (esw_mdev != dev->mdev)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
*flags |= MLX5_FLOW_TABLE_OTHER_VPORT;
|
||||
*ft_mdev = esw_mdev;
|
||||
*vport = dev->port[ib_port - 1].rep->vport;
|
||||
*vport_idx = dev->port[ib_port - 1].rep->vport_index;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct mlx5_ib_flow_prio *
|
||||
_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
|
||||
enum mlx5_flow_namespace_type ns_type,
|
||||
bool mcast)
|
||||
bool mcast, u32 ib_port)
|
||||
{
|
||||
struct mlx5_core_dev *ft_mdev = dev->mdev;
|
||||
struct mlx5_flow_namespace *ns = NULL;
|
||||
struct mlx5_ib_flow_prio *prio = NULL;
|
||||
int max_table_size = 0;
|
||||
u16 vport_idx = 0;
|
||||
bool esw_encap;
|
||||
u32 flags = 0;
|
||||
u16 vport = 0;
|
||||
int priority;
|
||||
int ret;
|
||||
|
||||
if (mcast)
|
||||
priority = MLX5_IB_FLOW_MCAST_PRIO;
|
||||
@ -1471,13 +1980,38 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
|
||||
MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
|
||||
priority = user_priority;
|
||||
break;
|
||||
case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
|
||||
case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
|
||||
if (ib_port == 0 || user_priority > MLX5_RDMA_TRANSPORT_BYPASS_PRIO)
|
||||
return ERR_PTR(-EINVAL);
|
||||
ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags,
|
||||
&vport_idx, &vport,
|
||||
&ft_mdev, ib_port);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX)
|
||||
max_table_size =
|
||||
BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(
|
||||
ft_mdev, log_max_ft_size));
|
||||
else
|
||||
max_table_size =
|
||||
BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(
|
||||
ft_mdev, log_max_ft_size));
|
||||
priority = user_priority;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
|
||||
|
||||
ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
|
||||
if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX ||
|
||||
ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX)
|
||||
ns = mlx5_get_flow_vport_namespace(ft_mdev, ns_type, vport_idx);
|
||||
else
|
||||
ns = mlx5_get_flow_namespace(ft_mdev, ns_type);
|
||||
|
||||
if (!ns)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
@ -1497,6 +2031,12 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
|
||||
case MLX5_FLOW_NAMESPACE_RDMA_TX:
|
||||
prio = &dev->flow_db->rdma_tx[priority];
|
||||
break;
|
||||
case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
|
||||
prio = &dev->flow_db->rdma_transport_rx[ib_port - 1];
|
||||
break;
|
||||
case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
|
||||
prio = &dev->flow_db->rdma_transport_tx[ib_port - 1];
|
||||
break;
|
||||
default: return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
@ -1507,7 +2047,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
|
||||
return prio;
|
||||
|
||||
return _get_prio(dev, ns, prio, priority, max_table_size,
|
||||
MLX5_FS_MAX_TYPES, flags);
|
||||
MLX5_FS_MAX_TYPES, flags, vport);
|
||||
}
|
||||
|
||||
static struct mlx5_ib_flow_handler *
|
||||
@ -1626,7 +2166,8 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
|
||||
mutex_lock(&dev->flow_db->lock);
|
||||
|
||||
ft_prio = _get_flow_table(dev, fs_matcher->priority,
|
||||
fs_matcher->ns_type, mcast);
|
||||
fs_matcher->ns_type, mcast,
|
||||
fs_matcher->ib_port);
|
||||
if (IS_ERR(ft_prio)) {
|
||||
err = PTR_ERR(ft_prio);
|
||||
goto unlock;
|
||||
@ -1742,6 +2283,12 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
|
||||
case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
|
||||
*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
|
||||
break;
|
||||
case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX:
|
||||
*namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX;
|
||||
break;
|
||||
case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX:
|
||||
*namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -1831,7 +2378,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
|
||||
return -EINVAL;
|
||||
|
||||
/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
|
||||
if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
|
||||
if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
|
||||
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
|
||||
((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
|
||||
return -EINVAL;
|
||||
|
||||
@ -1848,7 +2396,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
|
||||
return -EINVAL;
|
||||
/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
|
||||
if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
|
||||
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
|
||||
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
|
||||
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
|
||||
*dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
|
||||
return -EINVAL;
|
||||
} else if (dest_qp) {
|
||||
@ -1869,14 +2418,16 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
|
||||
*dest_id = mqp->raw_packet_qp.rq.tirn;
|
||||
*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
|
||||
} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
|
||||
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
|
||||
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
|
||||
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) &&
|
||||
!(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
|
||||
*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
|
||||
}
|
||||
|
||||
if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
|
||||
(fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
|
||||
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
|
||||
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
|
||||
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
@ -2353,6 +2904,15 @@ static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool verify_context_caps(struct mlx5_ib_dev *dev, u64 enabled_caps)
|
||||
{
|
||||
if (is_mdev_switchdev_mode(dev->mdev))
|
||||
return UCAP_ENABLED(enabled_caps,
|
||||
RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
|
||||
|
||||
return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL);
|
||||
}
|
||||
|
||||
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
|
||||
struct uverbs_attr_bundle *attrs)
|
||||
{
|
||||
@ -2401,6 +2961,26 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT)) {
|
||||
err = uverbs_copy_from(&obj->ib_port, attrs,
|
||||
MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT);
|
||||
if (err)
|
||||
goto end;
|
||||
if (!rdma_is_port_valid(&dev->ib_dev, obj->ib_port)) {
|
||||
err = -EINVAL;
|
||||
goto end;
|
||||
}
|
||||
if (obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX &&
|
||||
obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) {
|
||||
err = -EINVAL;
|
||||
goto end;
|
||||
}
|
||||
if (!verify_context_caps(dev, uobj->context->enabled_caps)) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
uobj->object = obj;
|
||||
obj->mdev = dev->mdev;
|
||||
atomic_set(&obj->usecnt, 0);
|
||||
@ -2448,7 +3028,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
|
||||
|
||||
mutex_lock(&dev->flow_db->lock);
|
||||
|
||||
ft_prio = _get_flow_table(dev, priority, ns_type, 0);
|
||||
ft_prio = _get_flow_table(dev, priority, ns_type, 0, 0);
|
||||
if (IS_ERR(ft_prio)) {
|
||||
err = PTR_ERR(ft_prio);
|
||||
goto free_obj;
|
||||
@ -2834,7 +3414,10 @@ DECLARE_UVERBS_NAMED_METHOD(
|
||||
UA_OPTIONAL),
|
||||
UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
|
||||
enum mlx5_ib_uapi_flow_table_type,
|
||||
UA_OPTIONAL));
|
||||
UA_OPTIONAL),
|
||||
UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
|
||||
UVERBS_ATTR_TYPE(u32),
|
||||
UA_OPTIONAL));
|
||||
|
||||
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
|
||||
MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
|
||||
@ -2878,6 +3461,7 @@ DECLARE_UVERBS_NAMED_OBJECT(
|
||||
&UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY));
|
||||
|
||||
const struct uapi_definition mlx5_ib_flow_defs[] = {
|
||||
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
|
||||
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
|
||||
MLX5_IB_OBJECT_FLOW_MATCHER),
|
||||
UAPI_DEF_CHAIN_OBJ_TREE(
|
||||
@ -2888,6 +3472,7 @@ const struct uapi_definition mlx5_ib_flow_defs[] = {
|
||||
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
|
||||
MLX5_IB_OBJECT_STEERING_ANCHOR,
|
||||
UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)),
|
||||
#endif
|
||||
{},
|
||||
};
|
||||
|
||||
@ -2904,8 +3489,26 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
|
||||
if (!dev->flow_db)
|
||||
return -ENOMEM;
|
||||
|
||||
dev->flow_db->rdma_transport_rx = kcalloc(dev->num_ports,
|
||||
sizeof(struct mlx5_ib_flow_prio),
|
||||
GFP_KERNEL);
|
||||
if (!dev->flow_db->rdma_transport_rx)
|
||||
goto free_flow_db;
|
||||
|
||||
dev->flow_db->rdma_transport_tx = kcalloc(dev->num_ports,
|
||||
sizeof(struct mlx5_ib_flow_prio),
|
||||
GFP_KERNEL);
|
||||
if (!dev->flow_db->rdma_transport_tx)
|
||||
goto free_rdma_transport_rx;
|
||||
|
||||
mutex_init(&dev->flow_db->lock);
|
||||
|
||||
ib_set_device_ops(&dev->ib_dev, &flow_ops);
|
||||
return 0;
|
||||
|
||||
free_rdma_transport_rx:
|
||||
kfree(dev->flow_db->rdma_transport_rx);
|
||||
free_flow_db:
|
||||
kfree(dev->flow_db);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
@ -8,23 +8,8 @@
|
||||
|
||||
#include "mlx5_ib.h"
|
||||
|
||||
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
|
||||
int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
|
||||
void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev);
|
||||
#else
|
||||
static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
|
||||
|
||||
if (!dev->flow_db)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_init(&dev->flow_db->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) {}
|
||||
#endif
|
||||
|
||||
static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
@ -40,6 +25,8 @@ static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
|
||||
* is a safe assumption that all references are gone.
|
||||
*/
|
||||
mlx5_ib_fs_cleanup_anchor(dev);
|
||||
kfree(dev->flow_db->rdma_transport_tx);
|
||||
kfree(dev->flow_db->rdma_transport_rx);
|
||||
kfree(dev->flow_db);
|
||||
}
|
||||
#endif /* _MLX5_IB_FS_H */
|
||||
|
@ -47,6 +47,7 @@
|
||||
#include <rdma/uverbs_ioctl.h>
|
||||
#include <rdma/mlx5_user_ioctl_verbs.h>
|
||||
#include <rdma/mlx5_user_ioctl_cmds.h>
|
||||
#include <rdma/ib_ucaps.h>
|
||||
#include "macsec.h"
|
||||
#include "data_direct.h"
|
||||
|
||||
@ -1934,6 +1935,12 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool uctx_rdma_ctrl_is_enabled(u64 enabled_caps)
|
||||
{
|
||||
return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL) ||
|
||||
UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
|
||||
}
|
||||
|
||||
static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
|
||||
struct ib_udata *udata)
|
||||
{
|
||||
@ -1976,10 +1983,17 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
|
||||
return -EINVAL;
|
||||
|
||||
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
|
||||
err = mlx5_ib_devx_create(dev, true);
|
||||
err = mlx5_ib_devx_create(dev, true, uctx->enabled_caps);
|
||||
if (err < 0)
|
||||
goto out_ctx;
|
||||
context->devx_uid = err;
|
||||
|
||||
if (uctx_rdma_ctrl_is_enabled(uctx->enabled_caps)) {
|
||||
err = mlx5_cmd_add_privileged_uid(dev->mdev,
|
||||
context->devx_uid);
|
||||
if (err)
|
||||
goto out_devx;
|
||||
}
|
||||
}
|
||||
|
||||
lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
|
||||
@ -1994,7 +2008,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
|
||||
/* updates req->total_num_bfregs */
|
||||
err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
|
||||
if (err)
|
||||
goto out_devx;
|
||||
goto out_ucap;
|
||||
|
||||
mutex_init(&bfregi->lock);
|
||||
bfregi->lib_uar_4k = lib_uar_4k;
|
||||
@ -2002,7 +2016,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
|
||||
GFP_KERNEL);
|
||||
if (!bfregi->count) {
|
||||
err = -ENOMEM;
|
||||
goto out_devx;
|
||||
goto out_ucap;
|
||||
}
|
||||
|
||||
bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
|
||||
@ -2066,6 +2080,11 @@ out_sys_pages:
|
||||
out_count:
|
||||
kfree(bfregi->count);
|
||||
|
||||
out_ucap:
|
||||
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX &&
|
||||
uctx_rdma_ctrl_is_enabled(uctx->enabled_caps))
|
||||
mlx5_cmd_remove_privileged_uid(dev->mdev, context->devx_uid);
|
||||
|
||||
out_devx:
|
||||
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
|
||||
mlx5_ib_devx_destroy(dev, context->devx_uid);
|
||||
@ -2110,8 +2129,12 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
|
||||
kfree(bfregi->sys_pages);
|
||||
kfree(bfregi->count);
|
||||
|
||||
if (context->devx_uid)
|
||||
if (context->devx_uid) {
|
||||
if (uctx_rdma_ctrl_is_enabled(ibcontext->enabled_caps))
|
||||
mlx5_cmd_remove_privileged_uid(dev->mdev,
|
||||
context->devx_uid);
|
||||
mlx5_ib_devx_destroy(dev, context->devx_uid);
|
||||
}
|
||||
}
|
||||
|
||||
static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
|
||||
@ -4201,8 +4224,47 @@ static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev)
|
||||
return (var_table->bitmap) ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
static void mlx5_ib_cleanup_ucaps(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
|
||||
ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
|
||||
|
||||
if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
|
||||
MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA)
|
||||
ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
|
||||
}
|
||||
|
||||
static int mlx5_ib_init_ucaps(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) {
|
||||
ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
|
||||
MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA) {
|
||||
ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
|
||||
if (ret)
|
||||
goto remove_local;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
remove_local:
|
||||
if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
|
||||
ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) &
|
||||
MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL)
|
||||
mlx5_ib_cleanup_ucaps(dev);
|
||||
|
||||
bitmap_free(dev->var_table.bitmap);
|
||||
}
|
||||
|
||||
@ -4253,6 +4315,13 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) &
|
||||
MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL) {
|
||||
err = mlx5_ib_init_ucaps(dev);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
dev->ib_dev.use_cq_dim = true;
|
||||
|
||||
return 0;
|
||||
|
@ -276,6 +276,7 @@ struct mlx5_ib_flow_matcher {
|
||||
struct mlx5_core_dev *mdev;
|
||||
atomic_t usecnt;
|
||||
u8 match_criteria_enable;
|
||||
u32 ib_port;
|
||||
};
|
||||
|
||||
struct mlx5_ib_steering_anchor {
|
||||
@ -293,6 +294,18 @@ enum mlx5_ib_optional_counter_type {
|
||||
MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
|
||||
MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
|
||||
MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,
|
||||
MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS,
|
||||
MLX5_IB_OPCOUNTER_RDMA_TX_BYTES,
|
||||
MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS,
|
||||
MLX5_IB_OPCOUNTER_RDMA_RX_BYTES,
|
||||
|
||||
MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP,
|
||||
MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP,
|
||||
MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP,
|
||||
MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP,
|
||||
MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP,
|
||||
MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP,
|
||||
MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP,
|
||||
|
||||
MLX5_IB_OPCOUNTER_MAX,
|
||||
};
|
||||
@ -307,6 +320,8 @@ struct mlx5_ib_flow_db {
|
||||
struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
|
||||
struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
|
||||
struct mlx5_flow_table *lag_demux_ft;
|
||||
struct mlx5_ib_flow_prio *rdma_transport_rx;
|
||||
struct mlx5_ib_flow_prio *rdma_transport_tx;
|
||||
/* Protect flow steering bypass flow tables
|
||||
* when add/del flow rules.
|
||||
* only single add/removal of flow steering rule could be done
|
||||
@ -883,6 +898,14 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_ib_op_fc *opfc,
|
||||
enum mlx5_ib_optional_counter_type type);
|
||||
|
||||
int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
|
||||
u32 port);
|
||||
|
||||
void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter);
|
||||
|
||||
void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
|
||||
struct rdma_counter *counter);
|
||||
|
||||
struct mlx5_ib_multiport_info;
|
||||
|
||||
struct mlx5_ib_multiport {
|
||||
|
@ -56,7 +56,7 @@ static void
|
||||
create_mkey_callback(int status, struct mlx5_async_work *context);
|
||||
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
|
||||
u64 iova, int access_flags,
|
||||
unsigned int page_size, bool populate,
|
||||
unsigned long page_size, bool populate,
|
||||
int access_mode);
|
||||
static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr);
|
||||
|
||||
@ -718,8 +718,7 @@ mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
|
||||
}
|
||||
|
||||
static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_cache_ent *ent,
|
||||
int access_flags)
|
||||
struct mlx5_cache_ent *ent)
|
||||
{
|
||||
struct mlx5_ib_mr *mr;
|
||||
int err;
|
||||
@ -794,7 +793,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
|
||||
if (!ent)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
return _mlx5_mr_cache_alloc(dev, ent, access_flags);
|
||||
return _mlx5_mr_cache_alloc(dev, ent);
|
||||
}
|
||||
|
||||
static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
|
||||
@ -919,6 +918,25 @@ mkeys_err:
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
struct rb_root *root = &dev->cache.rb_root;
|
||||
struct mlx5_cache_ent *ent;
|
||||
struct rb_node *node;
|
||||
|
||||
mutex_lock(&dev->cache.rb_lock);
|
||||
node = rb_first(root);
|
||||
while (node) {
|
||||
ent = rb_entry(node, struct mlx5_cache_ent, node);
|
||||
node = rb_next(node);
|
||||
clean_keys(dev, ent);
|
||||
rb_erase(&ent->node, root);
|
||||
mlx5r_mkeys_uninit(ent);
|
||||
kfree(ent);
|
||||
}
|
||||
mutex_unlock(&dev->cache.rb_lock);
|
||||
}
|
||||
|
||||
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
struct mlx5_mkey_cache *cache = &dev->cache;
|
||||
@ -970,6 +988,8 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
|
||||
err:
|
||||
mutex_unlock(&cache->rb_lock);
|
||||
mlx5_mkey_cache_debugfs_cleanup(dev);
|
||||
mlx5r_destroy_cache_entries(dev);
|
||||
destroy_workqueue(cache->wq);
|
||||
mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
|
||||
return ret;
|
||||
}
|
||||
@ -1003,17 +1023,7 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
|
||||
mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
|
||||
|
||||
/* At this point all entries are disabled and have no concurrent work. */
|
||||
mutex_lock(&dev->cache.rb_lock);
|
||||
node = rb_first(root);
|
||||
while (node) {
|
||||
ent = rb_entry(node, struct mlx5_cache_ent, node);
|
||||
node = rb_next(node);
|
||||
clean_keys(dev, ent);
|
||||
rb_erase(&ent->node, root);
|
||||
mlx5r_mkeys_uninit(ent);
|
||||
kfree(ent);
|
||||
}
|
||||
mutex_unlock(&dev->cache.rb_lock);
|
||||
mlx5r_destroy_cache_entries(dev);
|
||||
|
||||
destroy_workqueue(dev->cache.wq);
|
||||
del_timer_sync(&dev->delay_timer);
|
||||
@ -1115,7 +1125,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
|
||||
struct mlx5r_cache_rb_key rb_key = {};
|
||||
struct mlx5_cache_ent *ent;
|
||||
struct mlx5_ib_mr *mr;
|
||||
unsigned int page_size;
|
||||
unsigned long page_size;
|
||||
|
||||
if (umem->is_dmabuf)
|
||||
page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
|
||||
@ -1144,7 +1154,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
|
||||
return mr;
|
||||
}
|
||||
|
||||
mr = _mlx5_mr_cache_alloc(dev, ent, access_flags);
|
||||
mr = _mlx5_mr_cache_alloc(dev, ent);
|
||||
if (IS_ERR(mr))
|
||||
return mr;
|
||||
|
||||
@ -1219,7 +1229,7 @@ err_1:
|
||||
*/
|
||||
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
|
||||
u64 iova, int access_flags,
|
||||
unsigned int page_size, bool populate,
|
||||
unsigned long page_size, bool populate,
|
||||
int access_mode)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = to_mdev(pd->device);
|
||||
@ -1425,7 +1435,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
|
||||
mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
|
||||
MLX5_MKC_ACCESS_MODE_MTT);
|
||||
} else {
|
||||
unsigned int page_size =
|
||||
unsigned long page_size =
|
||||
mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
|
||||
|
||||
mutex_lock(&dev->slow_path_mutex);
|
||||
@ -1957,7 +1967,6 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
|
||||
|
||||
if (mr->mmkey.cache_ent) {
|
||||
spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
|
||||
mr->mmkey.cache_ent->in_use--;
|
||||
goto end;
|
||||
}
|
||||
|
||||
@ -2025,6 +2034,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
|
||||
bool is_odp = is_odp_mr(mr);
|
||||
bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
|
||||
!to_ib_umem_dmabuf(mr->umem)->pinned;
|
||||
bool from_cache = !!ent;
|
||||
int ret = 0;
|
||||
|
||||
if (is_odp)
|
||||
@ -2037,6 +2047,8 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
|
||||
ent = mr->mmkey.cache_ent;
|
||||
/* upon storing to a clean temp entry - schedule its cleanup */
|
||||
spin_lock_irq(&ent->mkeys_queue.lock);
|
||||
if (from_cache)
|
||||
ent->in_use--;
|
||||
if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
|
||||
mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
|
||||
msecs_to_jiffies(30 * 1000));
|
||||
|
@ -309,9 +309,6 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
|
||||
blk_start_idx = idx;
|
||||
in_block = 1;
|
||||
}
|
||||
|
||||
/* Count page invalidations */
|
||||
invalidations += idx - blk_start_idx + 1;
|
||||
} else {
|
||||
u64 umr_offset = idx & umr_block_mask;
|
||||
|
||||
@ -321,14 +318,19 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
|
||||
MLX5_IB_UPD_XLT_ZAP |
|
||||
MLX5_IB_UPD_XLT_ATOMIC);
|
||||
in_block = 0;
|
||||
/* Count page invalidations */
|
||||
invalidations += idx - blk_start_idx + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (in_block)
|
||||
if (in_block) {
|
||||
mlx5r_umr_update_xlt(mr, blk_start_idx,
|
||||
idx - blk_start_idx + 1, 0,
|
||||
MLX5_IB_UPD_XLT_ZAP |
|
||||
MLX5_IB_UPD_XLT_ATOMIC);
|
||||
/* Count page invalidations */
|
||||
invalidations += idx - blk_start_idx + 1;
|
||||
}
|
||||
|
||||
mlx5_update_odp_stats_with_handled(mr, invalidations, invalidations);
|
||||
|
||||
|
@ -237,34 +237,6 @@ enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
|
||||
return IB_LINK_LAYER_ETHERNET;
|
||||
}
|
||||
|
||||
int pvrdma_modify_device(struct ib_device *ibdev, int mask,
|
||||
struct ib_device_modify *props)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
|
||||
IB_DEVICE_MODIFY_NODE_DESC)) {
|
||||
dev_warn(&to_vdev(ibdev)->pdev->dev,
|
||||
"unsupported device modify mask %#x\n", mask);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
|
||||
spin_lock_irqsave(&to_vdev(ibdev)->desc_lock, flags);
|
||||
memcpy(ibdev->node_desc, props->node_desc, 64);
|
||||
spin_unlock_irqrestore(&to_vdev(ibdev)->desc_lock, flags);
|
||||
}
|
||||
|
||||
if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
|
||||
mutex_lock(&to_vdev(ibdev)->port_mutex);
|
||||
to_vdev(ibdev)->sys_image_guid =
|
||||
cpu_to_be64(props->sys_image_guid);
|
||||
mutex_unlock(&to_vdev(ibdev)->port_mutex);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* pvrdma_modify_port - modify device port attributes
|
||||
* @ibdev: the device to modify
|
||||
|
@ -356,8 +356,6 @@ int pvrdma_query_pkey(struct ib_device *ibdev, u32 port,
|
||||
u16 index, u16 *pkey);
|
||||
enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
|
||||
u32 port);
|
||||
int pvrdma_modify_device(struct ib_device *ibdev, int mask,
|
||||
struct ib_device_modify *props);
|
||||
int pvrdma_modify_port(struct ib_device *ibdev, u32 port,
|
||||
int mask, struct ib_port_modify *props);
|
||||
int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
|
||||
|
@ -4,8 +4,7 @@ config RDMA_RXE
|
||||
depends on INET && PCI && INFINIBAND
|
||||
depends on INFINIBAND_VIRT_DMA
|
||||
select NET_UDP_TUNNEL
|
||||
select CRYPTO
|
||||
select CRYPTO_CRC32
|
||||
select CRC32
|
||||
help
|
||||
This driver implements the InfiniBand RDMA transport over
|
||||
the Linux network stack. It enables a system with a
|
||||
|
@ -23,3 +23,5 @@ rdma_rxe-y := \
|
||||
rxe_task.o \
|
||||
rxe_net.o \
|
||||
rxe_hw_counters.o
|
||||
|
||||
rdma_rxe-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += rxe_odp.o
|
||||
|
@ -31,9 +31,6 @@ void rxe_dealloc(struct ib_device *ib_dev)
|
||||
|
||||
WARN_ON(!RB_EMPTY_ROOT(&rxe->mcg_tree));
|
||||
|
||||
if (rxe->tfm)
|
||||
crypto_free_shash(rxe->tfm);
|
||||
|
||||
mutex_destroy(&rxe->usdev_lock);
|
||||
}
|
||||
|
||||
@ -72,10 +69,39 @@ static void rxe_init_device_param(struct rxe_dev *rxe, struct net_device *ndev)
|
||||
rxe->attr.max_pkeys = RXE_MAX_PKEYS;
|
||||
rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY;
|
||||
|
||||
if (ndev->addr_len) {
|
||||
memcpy(rxe->raw_gid, ndev->dev_addr,
|
||||
min_t(unsigned int, ndev->addr_len, ETH_ALEN));
|
||||
} else {
|
||||
/*
|
||||
* This device does not have a HW address, but
|
||||
* connection mangagement requires a unique gid.
|
||||
*/
|
||||
eth_random_addr(rxe->raw_gid);
|
||||
}
|
||||
|
||||
addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid,
|
||||
ndev->dev_addr);
|
||||
rxe->raw_gid);
|
||||
|
||||
rxe->max_ucontext = RXE_MAX_UCONTEXT;
|
||||
|
||||
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
|
||||
rxe->attr.kernel_cap_flags |= IBK_ON_DEMAND_PAGING;
|
||||
|
||||
/* IB_ODP_SUPPORT_IMPLICIT is not supported right now. */
|
||||
rxe->attr.odp_caps.general_caps |= IB_ODP_SUPPORT;
|
||||
|
||||
rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND;
|
||||
rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_RECV;
|
||||
rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
|
||||
|
||||
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND;
|
||||
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV;
|
||||
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE;
|
||||
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ;
|
||||
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
|
||||
rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
|
||||
}
|
||||
}
|
||||
|
||||
/* initialize port attributes */
|
||||
@ -113,7 +139,7 @@ static void rxe_init_ports(struct rxe_dev *rxe, struct net_device *ndev)
|
||||
|
||||
rxe_init_port_param(port);
|
||||
addrconf_addr_eui48((unsigned char *)&port->port_guid,
|
||||
ndev->dev_addr);
|
||||
rxe->raw_gid);
|
||||
spin_lock_init(&port->port_lock);
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,6 @@
|
||||
#include <rdma/ib_umem.h>
|
||||
#include <rdma/ib_cache.h>
|
||||
#include <rdma/ib_addr.h>
|
||||
#include <crypto/hash.h>
|
||||
|
||||
#include "rxe_net.h"
|
||||
#include "rxe_opcode.h"
|
||||
@ -100,43 +99,6 @@
|
||||
#define rxe_info_mw(mw, fmt, ...) ibdev_info_ratelimited((mw)->ibmw.device, \
|
||||
"mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__)
|
||||
|
||||
/* responder states */
|
||||
enum resp_states {
|
||||
RESPST_NONE,
|
||||
RESPST_GET_REQ,
|
||||
RESPST_CHK_PSN,
|
||||
RESPST_CHK_OP_SEQ,
|
||||
RESPST_CHK_OP_VALID,
|
||||
RESPST_CHK_RESOURCE,
|
||||
RESPST_CHK_LENGTH,
|
||||
RESPST_CHK_RKEY,
|
||||
RESPST_EXECUTE,
|
||||
RESPST_READ_REPLY,
|
||||
RESPST_ATOMIC_REPLY,
|
||||
RESPST_ATOMIC_WRITE_REPLY,
|
||||
RESPST_PROCESS_FLUSH,
|
||||
RESPST_COMPLETE,
|
||||
RESPST_ACKNOWLEDGE,
|
||||
RESPST_CLEANUP,
|
||||
RESPST_DUPLICATE_REQUEST,
|
||||
RESPST_ERR_MALFORMED_WQE,
|
||||
RESPST_ERR_UNSUPPORTED_OPCODE,
|
||||
RESPST_ERR_MISALIGNED_ATOMIC,
|
||||
RESPST_ERR_PSN_OUT_OF_SEQ,
|
||||
RESPST_ERR_MISSING_OPCODE_FIRST,
|
||||
RESPST_ERR_MISSING_OPCODE_LAST_C,
|
||||
RESPST_ERR_MISSING_OPCODE_LAST_D1E,
|
||||
RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
|
||||
RESPST_ERR_RNR,
|
||||
RESPST_ERR_RKEY_VIOLATION,
|
||||
RESPST_ERR_INVALIDATE_RKEY,
|
||||
RESPST_ERR_LENGTH,
|
||||
RESPST_ERR_CQ_OVERFLOW,
|
||||
RESPST_ERROR,
|
||||
RESPST_DONE,
|
||||
RESPST_EXIT,
|
||||
};
|
||||
|
||||
void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
|
||||
|
||||
int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name,
|
||||
|
@ -9,28 +9,6 @@
|
||||
#include "rxe.h"
|
||||
#include "rxe_loc.h"
|
||||
|
||||
/**
|
||||
* rxe_icrc_init() - Initialize crypto function for computing crc32
|
||||
* @rxe: rdma_rxe device object
|
||||
*
|
||||
* Return: 0 on success else an error
|
||||
*/
|
||||
int rxe_icrc_init(struct rxe_dev *rxe)
|
||||
{
|
||||
struct crypto_shash *tfm;
|
||||
|
||||
tfm = crypto_alloc_shash("crc32", 0, 0);
|
||||
if (IS_ERR(tfm)) {
|
||||
rxe_dbg_dev(rxe, "failed to init crc32 algorithm err: %ld\n",
|
||||
PTR_ERR(tfm));
|
||||
return PTR_ERR(tfm);
|
||||
}
|
||||
|
||||
rxe->tfm = tfm;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* rxe_crc32() - Compute cumulative crc32 for a contiguous segment
|
||||
* @rxe: rdma_rxe device object
|
||||
@ -42,23 +20,7 @@ int rxe_icrc_init(struct rxe_dev *rxe)
|
||||
*/
|
||||
static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len)
|
||||
{
|
||||
__be32 icrc;
|
||||
int err;
|
||||
|
||||
SHASH_DESC_ON_STACK(shash, rxe->tfm);
|
||||
|
||||
shash->tfm = rxe->tfm;
|
||||
*(__be32 *)shash_desc_ctx(shash) = crc;
|
||||
err = crypto_shash_update(shash, next, len);
|
||||
if (unlikely(err)) {
|
||||
rxe_dbg_dev(rxe, "failed crc calculation, err: %d\n", err);
|
||||
return (__force __be32)crc32_le((__force u32)crc, next, len);
|
||||
}
|
||||
|
||||
icrc = *(__be32 *)shash_desc_ctx(shash);
|
||||
barrier_data(shash_desc_ctx(shash));
|
||||
|
||||
return icrc;
|
||||
return (__force __be32)crc32_le((__force u32)crc, next, len);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -58,6 +58,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
|
||||
|
||||
/* rxe_mr.c */
|
||||
u8 rxe_get_next_key(u32 last_key);
|
||||
void rxe_mr_init(int access, struct rxe_mr *mr);
|
||||
void rxe_mr_init_dma(int access, struct rxe_mr *mr);
|
||||
int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
|
||||
int access, struct rxe_mr *mr);
|
||||
@ -80,6 +81,9 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key);
|
||||
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
|
||||
void rxe_mr_cleanup(struct rxe_pool_elem *elem);
|
||||
|
||||
/* defined in rxe_mr.c; used in rxe_mr.c and rxe_odp.c */
|
||||
extern spinlock_t atomic_ops_lock;
|
||||
|
||||
/* rxe_mw.c */
|
||||
int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
|
||||
int rxe_dealloc_mw(struct ib_mw *ibmw);
|
||||
@ -168,7 +172,6 @@ int rxe_sender(struct rxe_qp *qp);
|
||||
int rxe_receiver(struct rxe_qp *qp);
|
||||
|
||||
/* rxe_icrc.c */
|
||||
int rxe_icrc_init(struct rxe_dev *rxe);
|
||||
int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt);
|
||||
void rxe_icrc_generate(struct sk_buff *skb, struct rxe_pkt_info *pkt);
|
||||
|
||||
@ -181,4 +184,34 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
|
||||
return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
|
||||
}
|
||||
|
||||
/* rxe_odp.c */
|
||||
extern const struct mmu_interval_notifier_ops rxe_mn_ops;
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
|
||||
u64 iova, int access_flags, struct rxe_mr *mr);
|
||||
int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
|
||||
enum rxe_mr_copy_dir dir);
|
||||
int rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val);
|
||||
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
|
||||
static inline int
|
||||
rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
|
||||
int access_flags, struct rxe_mr *mr)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
|
||||
int length, enum rxe_mr_copy_dir dir)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int
|
||||
rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val)
|
||||
{
|
||||
return RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||
}
|
||||
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
|
||||
|
||||
#endif /* RXE_LOC_H */
|
||||
|
@ -45,7 +45,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
|
||||
}
|
||||
}
|
||||
|
||||
static void rxe_mr_init(int access, struct rxe_mr *mr)
|
||||
void rxe_mr_init(int access, struct rxe_mr *mr)
|
||||
{
|
||||
u32 key = mr->elem.index << 8 | rxe_get_next_key(-1);
|
||||
|
||||
@ -323,7 +323,10 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
|
||||
return err;
|
||||
}
|
||||
|
||||
return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
|
||||
if (mr->umem->is_odp)
|
||||
return rxe_odp_mr_copy(mr, iova, addr, length, dir);
|
||||
else
|
||||
return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
|
||||
}
|
||||
|
||||
/* copy data in or out of a wqe, i.e. sg list
|
||||
@ -466,7 +469,7 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
|
||||
}
|
||||
|
||||
/* Guarantee atomicity of atomic operations at the machine level. */
|
||||
static DEFINE_SPINLOCK(atomic_ops_lock);
|
||||
DEFINE_SPINLOCK(atomic_ops_lock);
|
||||
|
||||
int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val)
|
||||
@ -532,6 +535,10 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
|
||||
struct page *page;
|
||||
u64 *va;
|
||||
|
||||
/* ODP is not supported right now. WIP. */
|
||||
if (mr->umem->is_odp)
|
||||
return RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||
|
||||
/* See IBA oA19-28 */
|
||||
if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
|
||||
rxe_dbg_mr(mr, "mr not in valid state\n");
|
||||
|
326
drivers/infiniband/sw/rxe/rxe_odp.c
Normal file
326
drivers/infiniband/sw/rxe/rxe_odp.c
Normal file
@ -0,0 +1,326 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
||||
/*
|
||||
* Copyright (c) 2022-2023 Fujitsu Ltd. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/hmm.h>
|
||||
|
||||
#include <rdma/ib_umem_odp.h>
|
||||
|
||||
#include "rxe.h"
|
||||
|
||||
static bool rxe_ib_invalidate_range(struct mmu_interval_notifier *mni,
|
||||
const struct mmu_notifier_range *range,
|
||||
unsigned long cur_seq)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp =
|
||||
container_of(mni, struct ib_umem_odp, notifier);
|
||||
unsigned long start, end;
|
||||
|
||||
if (!mmu_notifier_range_blockable(range))
|
||||
return false;
|
||||
|
||||
mutex_lock(&umem_odp->umem_mutex);
|
||||
mmu_interval_set_seq(mni, cur_seq);
|
||||
|
||||
start = max_t(u64, ib_umem_start(umem_odp), range->start);
|
||||
end = min_t(u64, ib_umem_end(umem_odp), range->end);
|
||||
|
||||
/* update umem_odp->dma_list */
|
||||
ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
|
||||
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
return true;
|
||||
}
|
||||
|
||||
const struct mmu_interval_notifier_ops rxe_mn_ops = {
|
||||
.invalidate = rxe_ib_invalidate_range,
|
||||
};
|
||||
|
||||
#define RXE_PAGEFAULT_DEFAULT 0
|
||||
#define RXE_PAGEFAULT_RDONLY BIT(0)
|
||||
#define RXE_PAGEFAULT_SNAPSHOT BIT(1)
|
||||
static int rxe_odp_do_pagefault_and_lock(struct rxe_mr *mr, u64 user_va, int bcnt, u32 flags)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
bool fault = !(flags & RXE_PAGEFAULT_SNAPSHOT);
|
||||
u64 access_mask;
|
||||
int np;
|
||||
|
||||
access_mask = ODP_READ_ALLOWED_BIT;
|
||||
if (umem_odp->umem.writable && !(flags & RXE_PAGEFAULT_RDONLY))
|
||||
access_mask |= ODP_WRITE_ALLOWED_BIT;
|
||||
|
||||
/*
|
||||
* ib_umem_odp_map_dma_and_lock() locks umem_mutex on success.
|
||||
* Callers must release the lock later to let invalidation handler
|
||||
* do its work again.
|
||||
*/
|
||||
np = ib_umem_odp_map_dma_and_lock(umem_odp, user_va, bcnt,
|
||||
access_mask, fault);
|
||||
return np;
|
||||
}
|
||||
|
||||
static int rxe_odp_init_pages(struct rxe_mr *mr)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
int ret;
|
||||
|
||||
ret = rxe_odp_do_pagefault_and_lock(mr, mr->umem->address,
|
||||
mr->umem->length,
|
||||
RXE_PAGEFAULT_SNAPSHOT);
|
||||
|
||||
if (ret >= 0)
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
|
||||
return ret >= 0 ? 0 : ret;
|
||||
}
|
||||
|
||||
int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
|
||||
u64 iova, int access_flags, struct rxe_mr *mr)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp;
|
||||
int err;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
rxe_mr_init(access_flags, mr);
|
||||
|
||||
if (!start && length == U64_MAX) {
|
||||
if (iova != 0)
|
||||
return -EINVAL;
|
||||
if (!(rxe->attr.odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
|
||||
return -EINVAL;
|
||||
|
||||
/* Never reach here, for implicit ODP is not implemented. */
|
||||
}
|
||||
|
||||
umem_odp = ib_umem_odp_get(&rxe->ib_dev, start, length, access_flags,
|
||||
&rxe_mn_ops);
|
||||
if (IS_ERR(umem_odp)) {
|
||||
rxe_dbg_mr(mr, "Unable to create umem_odp err = %d\n",
|
||||
(int)PTR_ERR(umem_odp));
|
||||
return PTR_ERR(umem_odp);
|
||||
}
|
||||
|
||||
umem_odp->private = mr;
|
||||
|
||||
mr->umem = &umem_odp->umem;
|
||||
mr->access = access_flags;
|
||||
mr->ibmr.length = length;
|
||||
mr->ibmr.iova = iova;
|
||||
mr->page_offset = ib_umem_offset(&umem_odp->umem);
|
||||
|
||||
err = rxe_odp_init_pages(mr);
|
||||
if (err) {
|
||||
ib_umem_odp_release(umem_odp);
|
||||
return err;
|
||||
}
|
||||
|
||||
mr->state = RXE_MR_STATE_VALID;
|
||||
mr->ibmr.type = IB_MR_TYPE_USER;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline bool rxe_check_pagefault(struct ib_umem_odp *umem_odp,
|
||||
u64 iova, int length, u32 perm)
|
||||
{
|
||||
bool need_fault = false;
|
||||
u64 addr;
|
||||
int idx;
|
||||
|
||||
addr = iova & (~(BIT(umem_odp->page_shift) - 1));
|
||||
|
||||
/* Skim through all pages that are to be accessed. */
|
||||
while (addr < iova + length) {
|
||||
idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
|
||||
|
||||
if (!(umem_odp->dma_list[idx] & perm)) {
|
||||
need_fault = true;
|
||||
break;
|
||||
}
|
||||
|
||||
addr += BIT(umem_odp->page_shift);
|
||||
}
|
||||
return need_fault;
|
||||
}
|
||||
|
||||
static int rxe_odp_map_range_and_lock(struct rxe_mr *mr, u64 iova, int length, u32 flags)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
bool need_fault;
|
||||
u64 perm;
|
||||
int err;
|
||||
|
||||
if (unlikely(length < 1))
|
||||
return -EINVAL;
|
||||
|
||||
perm = ODP_READ_ALLOWED_BIT;
|
||||
if (!(flags & RXE_PAGEFAULT_RDONLY))
|
||||
perm |= ODP_WRITE_ALLOWED_BIT;
|
||||
|
||||
mutex_lock(&umem_odp->umem_mutex);
|
||||
|
||||
need_fault = rxe_check_pagefault(umem_odp, iova, length, perm);
|
||||
if (need_fault) {
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
|
||||
/* umem_mutex is locked on success. */
|
||||
err = rxe_odp_do_pagefault_and_lock(mr, iova, length,
|
||||
flags);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
need_fault = rxe_check_pagefault(umem_odp, iova, length, perm);
|
||||
if (need_fault)
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
|
||||
int length, enum rxe_mr_copy_dir dir)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
struct page *page;
|
||||
int idx, bytes;
|
||||
size_t offset;
|
||||
u8 *user_va;
|
||||
|
||||
idx = (iova - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
|
||||
offset = iova & (BIT(umem_odp->page_shift) - 1);
|
||||
|
||||
while (length > 0) {
|
||||
u8 *src, *dest;
|
||||
|
||||
page = hmm_pfn_to_page(umem_odp->pfn_list[idx]);
|
||||
user_va = kmap_local_page(page);
|
||||
if (!user_va)
|
||||
return -EFAULT;
|
||||
|
||||
src = (dir == RXE_TO_MR_OBJ) ? addr : user_va;
|
||||
dest = (dir == RXE_TO_MR_OBJ) ? user_va : addr;
|
||||
|
||||
bytes = BIT(umem_odp->page_shift) - offset;
|
||||
if (bytes > length)
|
||||
bytes = length;
|
||||
|
||||
memcpy(dest, src, bytes);
|
||||
kunmap_local(user_va);
|
||||
|
||||
length -= bytes;
|
||||
idx++;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
|
||||
enum rxe_mr_copy_dir dir)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
u32 flags = RXE_PAGEFAULT_DEFAULT;
|
||||
int err;
|
||||
|
||||
if (length == 0)
|
||||
return 0;
|
||||
|
||||
if (unlikely(!mr->umem->is_odp))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (dir) {
|
||||
case RXE_TO_MR_OBJ:
|
||||
break;
|
||||
|
||||
case RXE_FROM_MR_OBJ:
|
||||
flags |= RXE_PAGEFAULT_RDONLY;
|
||||
break;
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = rxe_odp_map_range_and_lock(mr, iova, length, flags);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = __rxe_odp_mr_copy(mr, iova, addr, length, dir);
|
||||
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int rxe_odp_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
unsigned int page_offset;
|
||||
struct page *page;
|
||||
unsigned int idx;
|
||||
u64 value;
|
||||
u64 *va;
|
||||
int err;
|
||||
|
||||
if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
|
||||
rxe_dbg_mr(mr, "mr not in valid state\n");
|
||||
return RESPST_ERR_RKEY_VIOLATION;
|
||||
}
|
||||
|
||||
err = mr_check_range(mr, iova, sizeof(value));
|
||||
if (err) {
|
||||
rxe_dbg_mr(mr, "iova out of range\n");
|
||||
return RESPST_ERR_RKEY_VIOLATION;
|
||||
}
|
||||
|
||||
idx = (iova - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
|
||||
page_offset = iova & (BIT(umem_odp->page_shift) - 1);
|
||||
page = hmm_pfn_to_page(umem_odp->pfn_list[idx]);
|
||||
if (!page)
|
||||
return RESPST_ERR_RKEY_VIOLATION;
|
||||
|
||||
if (unlikely(page_offset & 0x7)) {
|
||||
rxe_dbg_mr(mr, "iova not aligned\n");
|
||||
return RESPST_ERR_MISALIGNED_ATOMIC;
|
||||
}
|
||||
|
||||
va = kmap_local_page(page);
|
||||
|
||||
spin_lock_bh(&atomic_ops_lock);
|
||||
value = *orig_val = va[page_offset >> 3];
|
||||
|
||||
if (opcode == IB_OPCODE_RC_COMPARE_SWAP) {
|
||||
if (value == compare)
|
||||
va[page_offset >> 3] = swap_add;
|
||||
} else {
|
||||
value += swap_add;
|
||||
va[page_offset >> 3] = value;
|
||||
}
|
||||
spin_unlock_bh(&atomic_ops_lock);
|
||||
|
||||
kunmap_local(va);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
||||
u64 compare, u64 swap_add, u64 *orig_val)
|
||||
{
|
||||
struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
|
||||
int err;
|
||||
|
||||
err = rxe_odp_map_range_and_lock(mr, iova, sizeof(char),
|
||||
RXE_PAGEFAULT_DEFAULT);
|
||||
if (err < 0)
|
||||
return RESPST_ERR_RKEY_VIOLATION;
|
||||
|
||||
err = rxe_odp_do_atomic_op(mr, iova, opcode, compare, swap_add,
|
||||
orig_val);
|
||||
mutex_unlock(&umem_odp->umem_mutex);
|
||||
|
||||
return err;
|
||||
}
|
@ -5,7 +5,6 @@
|
||||
*/
|
||||
|
||||
#include <linux/skbuff.h>
|
||||
#include <crypto/hash.h>
|
||||
|
||||
#include "rxe.h"
|
||||
#include "rxe_loc.h"
|
||||
|
@ -649,6 +649,10 @@ static enum resp_states process_flush(struct rxe_qp *qp,
|
||||
struct rxe_mr *mr = qp->resp.mr;
|
||||
struct resp_res *res = qp->resp.res;
|
||||
|
||||
/* ODP is not supported right now. WIP. */
|
||||
if (mr->umem->is_odp)
|
||||
return RESPST_ERR_UNSUPPORTED_OPCODE;
|
||||
|
||||
/* oA19-14, oA19-15 */
|
||||
if (res && res->replay)
|
||||
return RESPST_ACKNOWLEDGE;
|
||||
@ -702,10 +706,16 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
|
||||
if (!res->replay) {
|
||||
u64 iova = qp->resp.va + qp->resp.offset;
|
||||
|
||||
err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
|
||||
atmeth_comp(pkt),
|
||||
atmeth_swap_add(pkt),
|
||||
&res->atomic.orig_val);
|
||||
if (mr->umem->is_odp)
|
||||
err = rxe_odp_atomic_op(mr, iova, pkt->opcode,
|
||||
atmeth_comp(pkt),
|
||||
atmeth_swap_add(pkt),
|
||||
&res->atomic.orig_val);
|
||||
else
|
||||
err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
|
||||
atmeth_comp(pkt),
|
||||
atmeth_swap_add(pkt),
|
||||
&res->atomic.orig_val);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
@ -80,6 +80,18 @@ err_out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int rxe_query_gid(struct ib_device *ibdev, u32 port, int idx,
|
||||
union ib_gid *gid)
|
||||
{
|
||||
struct rxe_dev *rxe = to_rdev(ibdev);
|
||||
|
||||
/* subnet_prefix == interface_id == 0; */
|
||||
memset(gid, 0, sizeof(*gid));
|
||||
memcpy(gid->raw, rxe->raw_gid, ETH_ALEN);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rxe_query_pkey(struct ib_device *ibdev,
|
||||
u32 port_num, u16 index, u16 *pkey)
|
||||
{
|
||||
@ -1286,7 +1298,10 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
|
||||
mr->ibmr.pd = ibpd;
|
||||
mr->ibmr.device = ibpd->device;
|
||||
|
||||
err = rxe_mr_init_user(rxe, start, length, access, mr);
|
||||
if (access & IB_ACCESS_ON_DEMAND)
|
||||
err = rxe_odp_mr_init_user(rxe, start, length, iova, access, mr);
|
||||
else
|
||||
err = rxe_mr_init_user(rxe, start, length, access, mr);
|
||||
if (err) {
|
||||
rxe_dbg_mr(mr, "reg_user_mr failed, err = %d\n", err);
|
||||
goto err_cleanup;
|
||||
@ -1493,6 +1508,7 @@ static const struct ib_device_ops rxe_dev_ops = {
|
||||
.query_ah = rxe_query_ah,
|
||||
.query_device = rxe_query_device,
|
||||
.query_pkey = rxe_query_pkey,
|
||||
.query_gid = rxe_query_gid,
|
||||
.query_port = rxe_query_port,
|
||||
.query_qp = rxe_query_qp,
|
||||
.query_srq = rxe_query_srq,
|
||||
@ -1523,7 +1539,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name,
|
||||
dev->num_comp_vectors = num_possible_cpus();
|
||||
dev->local_dma_lkey = 0;
|
||||
addrconf_addr_eui48((unsigned char *)&dev->node_guid,
|
||||
ndev->dev_addr);
|
||||
rxe->raw_gid);
|
||||
|
||||
dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
|
||||
BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ);
|
||||
@ -1533,10 +1549,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = rxe_icrc_init(rxe);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = ib_register_device(dev, ibdev_name, NULL);
|
||||
if (err)
|
||||
rxe_dbg_dev(rxe, "failed with error %d\n", err);
|
||||
|
@ -126,6 +126,43 @@ struct rxe_comp_info {
|
||||
u32 rnr_retry;
|
||||
};
|
||||
|
||||
/* responder states */
|
||||
enum resp_states {
|
||||
RESPST_NONE,
|
||||
RESPST_GET_REQ,
|
||||
RESPST_CHK_PSN,
|
||||
RESPST_CHK_OP_SEQ,
|
||||
RESPST_CHK_OP_VALID,
|
||||
RESPST_CHK_RESOURCE,
|
||||
RESPST_CHK_LENGTH,
|
||||
RESPST_CHK_RKEY,
|
||||
RESPST_EXECUTE,
|
||||
RESPST_READ_REPLY,
|
||||
RESPST_ATOMIC_REPLY,
|
||||
RESPST_ATOMIC_WRITE_REPLY,
|
||||
RESPST_PROCESS_FLUSH,
|
||||
RESPST_COMPLETE,
|
||||
RESPST_ACKNOWLEDGE,
|
||||
RESPST_CLEANUP,
|
||||
RESPST_DUPLICATE_REQUEST,
|
||||
RESPST_ERR_MALFORMED_WQE,
|
||||
RESPST_ERR_UNSUPPORTED_OPCODE,
|
||||
RESPST_ERR_MISALIGNED_ATOMIC,
|
||||
RESPST_ERR_PSN_OUT_OF_SEQ,
|
||||
RESPST_ERR_MISSING_OPCODE_FIRST,
|
||||
RESPST_ERR_MISSING_OPCODE_LAST_C,
|
||||
RESPST_ERR_MISSING_OPCODE_LAST_D1E,
|
||||
RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
|
||||
RESPST_ERR_RNR,
|
||||
RESPST_ERR_RKEY_VIOLATION,
|
||||
RESPST_ERR_INVALIDATE_RKEY,
|
||||
RESPST_ERR_LENGTH,
|
||||
RESPST_ERR_CQ_OVERFLOW,
|
||||
RESPST_ERROR,
|
||||
RESPST_DONE,
|
||||
RESPST_EXIT,
|
||||
};
|
||||
|
||||
enum rdatm_res_state {
|
||||
rdatm_res_state_next,
|
||||
rdatm_res_state_new,
|
||||
@ -376,7 +413,9 @@ struct rxe_dev {
|
||||
struct ib_device_attr attr;
|
||||
int max_ucontext;
|
||||
int max_inline_data;
|
||||
struct mutex usdev_lock;
|
||||
struct mutex usdev_lock;
|
||||
|
||||
char raw_gid[ETH_ALEN];
|
||||
|
||||
struct rxe_pool uc_pool;
|
||||
struct rxe_pool pd_pool;
|
||||
@ -402,7 +441,6 @@ struct rxe_dev {
|
||||
atomic64_t stats_counters[RXE_NUM_OF_COUNTERS];
|
||||
|
||||
struct rxe_port port;
|
||||
struct crypto_shash *tfm;
|
||||
};
|
||||
|
||||
static inline struct net_device *rxe_ib_device_get_netdev(struct ib_device *dev)
|
||||
|
@ -2,9 +2,7 @@ config RDMA_SIW
|
||||
tristate "Software RDMA over TCP/IP (iWARP) driver"
|
||||
depends on INET && INFINIBAND
|
||||
depends on INFINIBAND_VIRT_DMA
|
||||
select LIBCRC32C
|
||||
select CRYPTO
|
||||
select CRYPTO_CRC32C
|
||||
select CRC32
|
||||
help
|
||||
This driver implements the iWARP RDMA transport over
|
||||
the Linux TCP/IP network stack. It enables a system with a
|
||||
|
@ -10,9 +10,9 @@
|
||||
#include <rdma/restrack.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/crc32c.h>
|
||||
#include <linux/unaligned.h>
|
||||
|
||||
#include <rdma/siw-abi.h>
|
||||
#include "iwarp.h"
|
||||
@ -289,7 +289,8 @@ struct siw_rx_stream {
|
||||
|
||||
union iwarp_hdr hdr;
|
||||
struct mpa_trailer trailer;
|
||||
struct shash_desc *mpa_crc_hd;
|
||||
u32 mpa_crc;
|
||||
bool mpa_crc_enabled;
|
||||
|
||||
/*
|
||||
* For each FPDU, main RX loop runs through 3 stages:
|
||||
@ -390,7 +391,8 @@ struct siw_iwarp_tx {
|
||||
int burst;
|
||||
int bytes_unsent; /* ddp payload bytes */
|
||||
|
||||
struct shash_desc *mpa_crc_hd;
|
||||
u32 mpa_crc;
|
||||
bool mpa_crc_enabled;
|
||||
|
||||
u8 do_crc : 1; /* do crc for segment */
|
||||
u8 use_sendpage : 1; /* send w/o copy */
|
||||
@ -496,7 +498,6 @@ extern u_char mpa_version;
|
||||
extern const bool peer_to_peer;
|
||||
extern struct task_struct *siw_tx_thread[];
|
||||
|
||||
extern struct crypto_shash *siw_crypto_shash;
|
||||
extern struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1];
|
||||
|
||||
/* QP general functions */
|
||||
@ -668,6 +669,30 @@ static inline struct siw_sqe *irq_alloc_free(struct siw_qp *qp)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void siw_crc_init(u32 *crc)
|
||||
{
|
||||
*crc = ~0;
|
||||
}
|
||||
|
||||
static inline void siw_crc_update(u32 *crc, const void *data, size_t len)
|
||||
{
|
||||
*crc = crc32c(*crc, data, len);
|
||||
}
|
||||
|
||||
static inline void siw_crc_final(u32 *crc, u8 out[4])
|
||||
{
|
||||
put_unaligned_le32(~*crc, out);
|
||||
}
|
||||
|
||||
static inline void siw_crc_oneshot(const void *data, size_t len, u8 out[4])
|
||||
{
|
||||
u32 crc;
|
||||
|
||||
siw_crc_init(&crc);
|
||||
siw_crc_update(&crc, data, len);
|
||||
return siw_crc_final(&crc, out);
|
||||
}
|
||||
|
||||
static inline __wsum siw_csum_update(const void *buff, int len, __wsum sum)
|
||||
{
|
||||
return (__force __wsum)crc32c((__force __u32)sum, buff, len);
|
||||
@ -686,11 +711,11 @@ static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len)
|
||||
.update = siw_csum_update,
|
||||
.combine = siw_csum_combine,
|
||||
};
|
||||
__wsum crc = *(u32 *)shash_desc_ctx(srx->mpa_crc_hd);
|
||||
__wsum crc = (__force __wsum)srx->mpa_crc;
|
||||
|
||||
crc = __skb_checksum(srx->skb, srx->skb_offset, len, crc,
|
||||
&siw_cs_ops);
|
||||
*(u32 *)shash_desc_ctx(srx->mpa_crc_hd) = crc;
|
||||
srx->mpa_crc = (__force u32)crc;
|
||||
}
|
||||
|
||||
#define siw_dbg(ibdev, fmt, ...) \
|
||||
|
@ -59,7 +59,6 @@ u_char mpa_version = MPA_REVISION_2;
|
||||
const bool peer_to_peer;
|
||||
|
||||
struct task_struct *siw_tx_thread[NR_CPUS];
|
||||
struct crypto_shash *siw_crypto_shash;
|
||||
|
||||
static int siw_device_register(struct siw_device *sdev, const char *name)
|
||||
{
|
||||
@ -467,20 +466,7 @@ static __init int siw_init_module(void)
|
||||
rv = -ENOMEM;
|
||||
goto out_error;
|
||||
}
|
||||
/*
|
||||
* Locate CRC32 algorithm. If unsuccessful, fail
|
||||
* loading siw only, if CRC is required.
|
||||
*/
|
||||
siw_crypto_shash = crypto_alloc_shash("crc32c", 0, 0);
|
||||
if (IS_ERR(siw_crypto_shash)) {
|
||||
pr_info("siw: Loading CRC32c failed: %ld\n",
|
||||
PTR_ERR(siw_crypto_shash));
|
||||
siw_crypto_shash = NULL;
|
||||
if (mpa_crc_required) {
|
||||
rv = -EOPNOTSUPP;
|
||||
goto out_error;
|
||||
}
|
||||
}
|
||||
|
||||
rv = register_netdevice_notifier(&siw_netdev_nb);
|
||||
if (rv)
|
||||
goto out_error;
|
||||
@ -493,9 +479,6 @@ static __init int siw_init_module(void)
|
||||
out_error:
|
||||
siw_stop_tx_threads();
|
||||
|
||||
if (siw_crypto_shash)
|
||||
crypto_free_shash(siw_crypto_shash);
|
||||
|
||||
pr_info("SoftIWARP attach failed. Error: %d\n", rv);
|
||||
|
||||
siw_cm_exit();
|
||||
@ -516,9 +499,6 @@ static void __exit siw_exit_module(void)
|
||||
|
||||
siw_destroy_cpulist(siw_cpu_info.num_nodes);
|
||||
|
||||
if (siw_crypto_shash)
|
||||
crypto_free_shash(siw_crypto_shash);
|
||||
|
||||
pr_info("SoftiWARP detached\n");
|
||||
}
|
||||
|
||||
|
@ -226,33 +226,6 @@ static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int siw_qp_enable_crc(struct siw_qp *qp)
|
||||
{
|
||||
struct siw_rx_stream *c_rx = &qp->rx_stream;
|
||||
struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
|
||||
int size;
|
||||
|
||||
if (siw_crypto_shash == NULL)
|
||||
return -ENOENT;
|
||||
|
||||
size = crypto_shash_descsize(siw_crypto_shash) +
|
||||
sizeof(struct shash_desc);
|
||||
|
||||
c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
|
||||
c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
|
||||
if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) {
|
||||
kfree(c_tx->mpa_crc_hd);
|
||||
kfree(c_rx->mpa_crc_hd);
|
||||
c_tx->mpa_crc_hd = NULL;
|
||||
c_rx->mpa_crc_hd = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
c_tx->mpa_crc_hd->tfm = siw_crypto_shash;
|
||||
c_rx->mpa_crc_hd->tfm = siw_crypto_shash;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send a non signalled READ or WRITE to peer side as negotiated
|
||||
* with MPAv2 P2P setup protocol. The work request is only created
|
||||
@ -583,20 +556,15 @@ void siw_send_terminate(struct siw_qp *qp)
|
||||
|
||||
term->ctrl.mpa_len =
|
||||
cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE));
|
||||
if (qp->tx_ctx.mpa_crc_hd) {
|
||||
crypto_shash_init(qp->tx_ctx.mpa_crc_hd);
|
||||
if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
|
||||
(u8 *)iov[0].iov_base,
|
||||
iov[0].iov_len))
|
||||
goto out;
|
||||
|
||||
if (qp->tx_ctx.mpa_crc_enabled) {
|
||||
siw_crc_init(&qp->tx_ctx.mpa_crc);
|
||||
siw_crc_update(&qp->tx_ctx.mpa_crc,
|
||||
iov[0].iov_base, iov[0].iov_len);
|
||||
if (num_frags == 3) {
|
||||
if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
|
||||
(u8 *)iov[1].iov_base,
|
||||
iov[1].iov_len))
|
||||
goto out;
|
||||
siw_crc_update(&qp->tx_ctx.mpa_crc,
|
||||
iov[1].iov_base, iov[1].iov_len);
|
||||
}
|
||||
crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc);
|
||||
siw_crc_final(&qp->tx_ctx.mpa_crc, (u8 *)&crc);
|
||||
}
|
||||
|
||||
rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate);
|
||||
@ -604,7 +572,6 @@ void siw_send_terminate(struct siw_qp *qp)
|
||||
rv == len_terminate ? "success" : "failure",
|
||||
__rdmap_term_layer(term), __rdmap_term_etype(term),
|
||||
__rdmap_term_ecode(term), rv);
|
||||
out:
|
||||
kfree(term);
|
||||
kfree(err_hdr);
|
||||
}
|
||||
@ -643,9 +610,10 @@ static int siw_qp_nextstate_from_idle(struct siw_qp *qp,
|
||||
switch (attrs->state) {
|
||||
case SIW_QP_STATE_RTS:
|
||||
if (attrs->flags & SIW_MPA_CRC) {
|
||||
rv = siw_qp_enable_crc(qp);
|
||||
if (rv)
|
||||
break;
|
||||
siw_crc_init(&qp->tx_ctx.mpa_crc);
|
||||
qp->tx_ctx.mpa_crc_enabled = true;
|
||||
siw_crc_init(&qp->rx_stream.mpa_crc);
|
||||
qp->rx_stream.mpa_crc_enabled = true;
|
||||
}
|
||||
if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
|
||||
siw_dbg_qp(qp, "no socket\n");
|
||||
|
@ -67,10 +67,10 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
|
||||
|
||||
return -EFAULT;
|
||||
}
|
||||
if (srx->mpa_crc_hd) {
|
||||
if (srx->mpa_crc_enabled) {
|
||||
if (rdma_is_kernel_res(&rx_qp(srx)->base_qp.res)) {
|
||||
crypto_shash_update(srx->mpa_crc_hd,
|
||||
(u8 *)(dest + pg_off), bytes);
|
||||
siw_crc_update(&srx->mpa_crc, dest + pg_off,
|
||||
bytes);
|
||||
kunmap_atomic(dest);
|
||||
} else {
|
||||
kunmap_atomic(dest);
|
||||
@ -114,8 +114,8 @@ static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len)
|
||||
|
||||
return rv;
|
||||
}
|
||||
if (srx->mpa_crc_hd)
|
||||
crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len);
|
||||
if (srx->mpa_crc_enabled)
|
||||
siw_crc_update(&srx->mpa_crc, kva, len);
|
||||
|
||||
srx->skb_offset += len;
|
||||
srx->skb_copied += len;
|
||||
@ -966,16 +966,16 @@ static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
|
||||
if (srx->fpdu_part_rem)
|
||||
return -EAGAIN;
|
||||
|
||||
if (!srx->mpa_crc_hd)
|
||||
if (!srx->mpa_crc_enabled)
|
||||
return 0;
|
||||
|
||||
if (srx->pad)
|
||||
crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
|
||||
siw_crc_update(&srx->mpa_crc, tbuf, srx->pad);
|
||||
/*
|
||||
* CRC32 is computed, transmitted and received directly in NBO,
|
||||
* so there's never a reason to convert byte order.
|
||||
*/
|
||||
crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own);
|
||||
siw_crc_final(&srx->mpa_crc, (u8 *)&crc_own);
|
||||
crc_in = (__force __wsum)srx->trailer.crc;
|
||||
|
||||
if (unlikely(crc_in != crc_own)) {
|
||||
@ -1093,13 +1093,12 @@ static int siw_get_hdr(struct siw_rx_stream *srx)
|
||||
* (tagged/untagged). E.g., a WRITE can get intersected by a SEND,
|
||||
* but not by a READ RESPONSE etc.
|
||||
*/
|
||||
if (srx->mpa_crc_hd) {
|
||||
if (srx->mpa_crc_enabled) {
|
||||
/*
|
||||
* Restart CRC computation
|
||||
*/
|
||||
crypto_shash_init(srx->mpa_crc_hd);
|
||||
crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr,
|
||||
srx->fpdu_part_rcvd);
|
||||
siw_crc_init(&srx->mpa_crc);
|
||||
siw_crc_update(&srx->mpa_crc, c_hdr, srx->fpdu_part_rcvd);
|
||||
}
|
||||
if (frx->more_ddp_segs) {
|
||||
frx->first_ddp_seg = 0;
|
||||
|
@ -248,10 +248,8 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx)
|
||||
/*
|
||||
* Do complete CRC if enabled and short packet
|
||||
*/
|
||||
if (c_tx->mpa_crc_hd &&
|
||||
crypto_shash_digest(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt,
|
||||
c_tx->ctrl_len, (u8 *)crc) != 0)
|
||||
return -EINVAL;
|
||||
if (c_tx->mpa_crc_enabled)
|
||||
siw_crc_oneshot(&c_tx->pkt, c_tx->ctrl_len, (u8 *)crc);
|
||||
c_tx->ctrl_len += MPA_CRC_SIZE;
|
||||
|
||||
return PKT_COMPLETE;
|
||||
@ -482,9 +480,8 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
|
||||
iov[seg].iov_len = sge_len;
|
||||
|
||||
if (do_crc)
|
||||
crypto_shash_update(c_tx->mpa_crc_hd,
|
||||
iov[seg].iov_base,
|
||||
sge_len);
|
||||
siw_crc_update(&c_tx->mpa_crc,
|
||||
iov[seg].iov_base, sge_len);
|
||||
sge_off += sge_len;
|
||||
data_len -= sge_len;
|
||||
seg++;
|
||||
@ -516,15 +513,14 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
|
||||
iov[seg].iov_len = plen;
|
||||
|
||||
if (do_crc)
|
||||
crypto_shash_update(
|
||||
c_tx->mpa_crc_hd,
|
||||
siw_crc_update(
|
||||
&c_tx->mpa_crc,
|
||||
iov[seg].iov_base,
|
||||
plen);
|
||||
} else if (do_crc) {
|
||||
kaddr = kmap_local_page(p);
|
||||
crypto_shash_update(c_tx->mpa_crc_hd,
|
||||
kaddr + fp_off,
|
||||
plen);
|
||||
siw_crc_update(&c_tx->mpa_crc,
|
||||
kaddr + fp_off, plen);
|
||||
kunmap_local(kaddr);
|
||||
}
|
||||
} else {
|
||||
@ -536,10 +532,9 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
|
||||
|
||||
page_array[seg] = ib_virt_dma_to_page(va);
|
||||
if (do_crc)
|
||||
crypto_shash_update(
|
||||
c_tx->mpa_crc_hd,
|
||||
ib_virt_dma_to_ptr(va),
|
||||
plen);
|
||||
siw_crc_update(&c_tx->mpa_crc,
|
||||
ib_virt_dma_to_ptr(va),
|
||||
plen);
|
||||
}
|
||||
|
||||
sge_len -= plen;
|
||||
@ -576,14 +571,14 @@ sge_done:
|
||||
if (c_tx->pad) {
|
||||
*(u32 *)c_tx->trailer.pad = 0;
|
||||
if (do_crc)
|
||||
crypto_shash_update(c_tx->mpa_crc_hd,
|
||||
(u8 *)&c_tx->trailer.crc - c_tx->pad,
|
||||
c_tx->pad);
|
||||
siw_crc_update(&c_tx->mpa_crc,
|
||||
(u8 *)&c_tx->trailer.crc - c_tx->pad,
|
||||
c_tx->pad);
|
||||
}
|
||||
if (!c_tx->mpa_crc_hd)
|
||||
if (!c_tx->mpa_crc_enabled)
|
||||
c_tx->trailer.crc = 0;
|
||||
else if (do_crc)
|
||||
crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)&c_tx->trailer.crc);
|
||||
siw_crc_final(&c_tx->mpa_crc, (u8 *)&c_tx->trailer.crc);
|
||||
|
||||
data_len = c_tx->bytes_unsent;
|
||||
|
||||
@ -736,10 +731,9 @@ static void siw_prepare_fpdu(struct siw_qp *qp, struct siw_wqe *wqe)
|
||||
/*
|
||||
* Init MPA CRC computation
|
||||
*/
|
||||
if (c_tx->mpa_crc_hd) {
|
||||
crypto_shash_init(c_tx->mpa_crc_hd);
|
||||
crypto_shash_update(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt,
|
||||
c_tx->ctrl_len);
|
||||
if (c_tx->mpa_crc_enabled) {
|
||||
siw_crc_init(&c_tx->mpa_crc);
|
||||
siw_crc_update(&c_tx->mpa_crc, &c_tx->pkt, c_tx->ctrl_len);
|
||||
c_tx->do_crc = 1;
|
||||
}
|
||||
}
|
||||
|
@ -631,9 +631,6 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
|
||||
}
|
||||
up_write(&qp->state_lock);
|
||||
|
||||
kfree(qp->tx_ctx.mpa_crc_hd);
|
||||
kfree(qp->rx_stream.mpa_crc_hd);
|
||||
|
||||
qp->scq = qp->rcq = NULL;
|
||||
|
||||
siw_qp_put(qp);
|
||||
|
@ -393,10 +393,10 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task)
|
||||
* @task: iscsi task
|
||||
* @sector: error sector if exsists (output)
|
||||
*
|
||||
* Return: zero if no data-integrity errors have occured
|
||||
* 0x1: data-integrity error occured in the guard-block
|
||||
* 0x2: data-integrity error occured in the reference tag
|
||||
* 0x3: data-integrity error occured in the application tag
|
||||
* Return: zero if no data-integrity errors have occurred
|
||||
* 0x1: data-integrity error occurred in the guard-block
|
||||
* 0x2: data-integrity error occurred in the reference tag
|
||||
* 0x3: data-integrity error occurred in the application tag
|
||||
*
|
||||
* In addition the error sector is marked.
|
||||
*/
|
||||
|
@ -337,6 +337,7 @@ void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue)
|
||||
mana_gd_ring_doorbell(gc, queue->gdma_dev->doorbell, queue->type,
|
||||
queue->id, queue->head * GDMA_WQE_BU_SIZE, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_NS(mana_gd_wq_ring_doorbell, "NET_MANA");
|
||||
|
||||
void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
|
||||
{
|
||||
@ -349,6 +350,7 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
|
||||
mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id,
|
||||
head, arm_bit);
|
||||
}
|
||||
EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA");
|
||||
|
||||
static void mana_gd_process_eqe(struct gdma_queue *eq)
|
||||
{
|
||||
@ -894,6 +896,7 @@ free_q:
|
||||
kfree(queue);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_NS(mana_gd_create_mana_wq_cq, "NET_MANA");
|
||||
|
||||
void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue)
|
||||
{
|
||||
@ -1068,7 +1071,7 @@ static u32 mana_gd_write_client_oob(const struct gdma_wqe_request *wqe_req,
|
||||
header->inline_oob_size_div4 = client_oob_size / sizeof(u32);
|
||||
|
||||
if (oob_in_sgl) {
|
||||
WARN_ON_ONCE(!pad_data || wqe_req->num_sge < 2);
|
||||
WARN_ON_ONCE(wqe_req->num_sge < 2);
|
||||
|
||||
header->client_oob_in_sgl = 1;
|
||||
|
||||
@ -1175,6 +1178,7 @@ int mana_gd_post_work_request(struct gdma_queue *wq,
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_NS(mana_gd_post_work_request, "NET_MANA");
|
||||
|
||||
int mana_gd_post_and_ring(struct gdma_queue *queue,
|
||||
const struct gdma_wqe_request *wqe_req,
|
||||
@ -1248,6 +1252,7 @@ int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe)
|
||||
|
||||
return cqe_idx;
|
||||
}
|
||||
EXPORT_SYMBOL_NS(mana_gd_poll_cq, "NET_MANA");
|
||||
|
||||
static irqreturn_t mana_gd_intr(int irq, void *arg)
|
||||
{
|
||||
|
@ -3179,21 +3179,27 @@ out:
|
||||
dev_dbg(dev, "%s succeeded\n", __func__);
|
||||
}
|
||||
|
||||
struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index)
|
||||
struct net_device *mana_get_primary_netdev(struct mana_context *ac,
|
||||
u32 port_index,
|
||||
netdevice_tracker *tracker)
|
||||
{
|
||||
struct net_device *ndev;
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
|
||||
"Taking primary netdev without holding the RCU read lock");
|
||||
if (port_index >= ac->num_ports)
|
||||
return NULL;
|
||||
|
||||
/* When mana is used in netvsc, the upper netdevice should be returned. */
|
||||
if (ac->ports[port_index]->flags & IFF_SLAVE)
|
||||
ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]);
|
||||
else
|
||||
rcu_read_lock();
|
||||
|
||||
/* If mana is used in netvsc, the upper netdevice should be returned. */
|
||||
ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]);
|
||||
|
||||
/* If there is no upper device, use the parent Ethernet device */
|
||||
if (!ndev)
|
||||
ndev = ac->ports[port_index];
|
||||
|
||||
netdev_hold(ndev, tracker, GFP_ATOMIC);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ndev;
|
||||
}
|
||||
EXPORT_SYMBOL_NS(mana_get_primary_netdev_rcu, "NET_MANA");
|
||||
EXPORT_SYMBOL_NS(mana_get_primary_netdev, "NET_MANA");
|
||||
|
@ -1533,8 +1533,8 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
|
||||
return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
|
||||
}
|
||||
|
||||
#define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 2
|
||||
#define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 1
|
||||
#define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 6
|
||||
#define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 4
|
||||
#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 16
|
||||
#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 16
|
||||
#define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1
|
||||
|
@ -152,6 +152,7 @@ struct gdma_general_req {
|
||||
#define GDMA_MESSAGE_V1 1
|
||||
#define GDMA_MESSAGE_V2 2
|
||||
#define GDMA_MESSAGE_V3 3
|
||||
#define GDMA_MESSAGE_V4 4
|
||||
|
||||
struct gdma_general_resp {
|
||||
struct gdma_resp_hdr hdr;
|
||||
@ -778,6 +779,7 @@ struct gdma_destroy_dma_region_req {
|
||||
|
||||
enum gdma_pd_flags {
|
||||
GDMA_PD_FLAG_INVALID = 0,
|
||||
GDMA_PD_FLAG_ALLOW_GPA_MR = 1,
|
||||
};
|
||||
|
||||
struct gdma_create_pd_req {
|
||||
@ -803,6 +805,11 @@ struct gdma_destory_pd_resp {
|
||||
};/* HW DATA */
|
||||
|
||||
enum gdma_mr_type {
|
||||
/*
|
||||
* Guest Physical Address - MRs of this type allow access
|
||||
* to any DMA-mapped memory using bus-logical address
|
||||
*/
|
||||
GDMA_MR_TYPE_GPA = 1,
|
||||
/* Guest Virtual Address - MRs of this type allow access
|
||||
* to memory mapped by PTEs associated with this MR using a virtual
|
||||
* address that is set up in the MST
|
||||
|
@ -827,5 +827,7 @@ int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id,
|
||||
u32 doorbell_pg_id);
|
||||
void mana_uncfg_vport(struct mana_port_context *apc);
|
||||
|
||||
struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index);
|
||||
struct net_device *mana_get_primary_netdev(struct mana_context *ac,
|
||||
u32 port_index,
|
||||
netdevice_tracker *tracker);
|
||||
#endif /* _MANA_H */
|
||||
|
30
include/rdma/ib_ucaps.h
Normal file
30
include/rdma/ib_ucaps.h
Normal file
@ -0,0 +1,30 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
|
||||
/*
|
||||
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
|
||||
*/
|
||||
|
||||
#ifndef _IB_UCAPS_H_
|
||||
#define _IB_UCAPS_H_
|
||||
|
||||
#define UCAP_ENABLED(ucaps, type) (!!((ucaps) & (1U << (type))))
|
||||
|
||||
enum rdma_user_cap {
|
||||
RDMA_UCAP_MLX5_CTRL_LOCAL,
|
||||
RDMA_UCAP_MLX5_CTRL_OTHER_VHCA,
|
||||
RDMA_UCAP_MAX
|
||||
};
|
||||
|
||||
void ib_cleanup_ucaps(void);
|
||||
int ib_get_ucaps(int *fds, int fd_count, uint64_t *idx_mask);
|
||||
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
|
||||
int ib_create_ucap(enum rdma_user_cap type);
|
||||
void ib_remove_ucap(enum rdma_user_cap type);
|
||||
#else
|
||||
static inline int ib_create_ucap(enum rdma_user_cap type)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline void ib_remove_ucap(enum rdma_user_cap type) {}
|
||||
#endif /* CONFIG_INFINIBAND_USER_ACCESS */
|
||||
|
||||
#endif /* _IB_UCAPS_H_ */
|
@ -519,6 +519,23 @@ enum ib_port_state {
|
||||
IB_PORT_ACTIVE_DEFER = 5
|
||||
};
|
||||
|
||||
static inline const char *__attribute_const__
|
||||
ib_port_state_to_str(enum ib_port_state state)
|
||||
{
|
||||
const char * const states[] = {
|
||||
[IB_PORT_NOP] = "NOP",
|
||||
[IB_PORT_DOWN] = "DOWN",
|
||||
[IB_PORT_INIT] = "INIT",
|
||||
[IB_PORT_ARMED] = "ARMED",
|
||||
[IB_PORT_ACTIVE] = "ACTIVE",
|
||||
[IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER",
|
||||
};
|
||||
|
||||
if (state < ARRAY_SIZE(states))
|
||||
return states[state];
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
enum ib_port_phys_state {
|
||||
IB_PORT_PHYS_STATE_SLEEP = 1,
|
||||
IB_PORT_PHYS_STATE_POLLING = 2,
|
||||
@ -1513,6 +1530,7 @@ struct ib_ucontext {
|
||||
struct ib_uverbs_file *ufile;
|
||||
|
||||
struct ib_rdmacg_object cg_obj;
|
||||
u64 enabled_caps;
|
||||
/*
|
||||
* Implementation details of the RDMA core, don't use in drivers:
|
||||
*/
|
||||
@ -2626,12 +2644,13 @@ struct ib_device_ops {
|
||||
* @counter - The counter to be bound. If counter->id is zero then
|
||||
* the driver needs to allocate a new counter and set counter->id
|
||||
*/
|
||||
int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp);
|
||||
int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp,
|
||||
u32 port);
|
||||
/**
|
||||
* counter_unbind_qp - Unbind the qp from the dynamically-allocated
|
||||
* counter and bind it onto the default one
|
||||
*/
|
||||
int (*counter_unbind_qp)(struct ib_qp *qp);
|
||||
int (*counter_unbind_qp)(struct ib_qp *qp, u32 port);
|
||||
/**
|
||||
* counter_dealloc -De-allocate the hw counter
|
||||
*/
|
||||
@ -2647,6 +2666,11 @@ struct ib_device_ops {
|
||||
*/
|
||||
int (*counter_update_stats)(struct rdma_counter *counter);
|
||||
|
||||
/**
|
||||
* counter_init - Initialize the driver specific rdma counter struct.
|
||||
*/
|
||||
void (*counter_init)(struct rdma_counter *counter);
|
||||
|
||||
/**
|
||||
* Allows rdma drivers to add their own restrack attributes
|
||||
* dumped via 'rdma stat' iproute2 command.
|
||||
@ -2698,6 +2722,7 @@ struct ib_device_ops {
|
||||
DECLARE_RDMA_OBJ_SIZE(ib_srq);
|
||||
DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
|
||||
DECLARE_RDMA_OBJ_SIZE(ib_xrcd);
|
||||
DECLARE_RDMA_OBJ_SIZE(rdma_counter);
|
||||
};
|
||||
|
||||
struct ib_core_device {
|
||||
@ -2750,6 +2775,7 @@ struct ib_device {
|
||||
* It is a NULL terminated array.
|
||||
*/
|
||||
const struct attribute_group *groups[4];
|
||||
u8 hw_stats_attr_index;
|
||||
|
||||
u64 uverbs_cmd_mask;
|
||||
|
||||
|
@ -23,6 +23,7 @@ struct rdma_counter_mode {
|
||||
enum rdma_nl_counter_mode mode;
|
||||
enum rdma_nl_counter_mask mask;
|
||||
struct auto_mode_param param;
|
||||
bool bind_opcnt;
|
||||
};
|
||||
|
||||
struct rdma_port_counter {
|
||||
@ -47,9 +48,10 @@ void rdma_counter_init(struct ib_device *dev);
|
||||
void rdma_counter_release(struct ib_device *dev);
|
||||
int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
|
||||
enum rdma_nl_counter_mask mask,
|
||||
bool bind_opcnt,
|
||||
struct netlink_ext_ack *extack);
|
||||
int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port);
|
||||
int rdma_counter_unbind_qp(struct ib_qp *qp, bool force);
|
||||
int rdma_counter_unbind_qp(struct ib_qp *qp, u32 port, bool force);
|
||||
|
||||
int rdma_counter_query_stats(struct rdma_counter *counter);
|
||||
u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index);
|
||||
@ -61,7 +63,8 @@ int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
|
||||
u32 qp_num, u32 counter_id);
|
||||
int rdma_counter_get_mode(struct ib_device *dev, u32 port,
|
||||
enum rdma_nl_counter_mode *mode,
|
||||
enum rdma_nl_counter_mask *mask);
|
||||
enum rdma_nl_counter_mask *mask,
|
||||
bool *opcnt);
|
||||
|
||||
int rdma_counter_modify(struct ib_device *dev, u32 port,
|
||||
unsigned int index, bool enable);
|
||||
|
@ -34,7 +34,7 @@
|
||||
static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
|
||||
{
|
||||
if (IS_ERR(uobj))
|
||||
return NULL;
|
||||
return ERR_CAST(uobj);
|
||||
return uobj->object;
|
||||
}
|
||||
#define uobj_get_obj_read(_object, _type, _id, _attrs) \
|
||||
|
@ -88,6 +88,7 @@ enum uverbs_attrs_query_port_cmd_attr_ids {
|
||||
enum uverbs_attrs_get_context_attr_ids {
|
||||
UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
|
||||
UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
|
||||
UVERBS_ATTR_GET_CONTEXT_FD_ARR,
|
||||
};
|
||||
|
||||
enum uverbs_attrs_query_context_attr_ids {
|
||||
|
@ -239,6 +239,7 @@ enum mlx5_ib_flow_matcher_create_attrs {
|
||||
MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
|
||||
MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
|
||||
MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
|
||||
MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
|
||||
};
|
||||
|
||||
enum mlx5_ib_flow_matcher_destroy_attrs {
|
||||
|
@ -45,6 +45,8 @@ enum mlx5_ib_uapi_flow_table_type {
|
||||
MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB = 0x2,
|
||||
MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX = 0x3,
|
||||
MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX = 0x4,
|
||||
MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX = 0x5,
|
||||
MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX = 0x6,
|
||||
};
|
||||
|
||||
enum mlx5_ib_uapi_flow_action_packet_reformat_type {
|
||||
|
@ -580,6 +580,8 @@ enum rdma_nldev_attr {
|
||||
RDMA_NLDEV_ATTR_EVENT_TYPE, /* u8 */
|
||||
|
||||
RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, /* u8 */
|
||||
|
||||
RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, /* u8 */
|
||||
/*
|
||||
* Always the end
|
||||
*/
|
||||
|
Loading…
x
Reference in New Issue
Block a user