mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/
synced 2025-04-20 05:08:28 +09:00

Before commit 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") it was possible to use FIB rules to match on a L3 domain. This was done by having a FIB rule match on iif / oif being a L3 master device. It worked because prior to the FIB rule lookup the iif / oif fields in the flow structure were reset to the index of the L3 master device to which the input / output device was enslaved to. The above scheme made it impossible to match on the original input / output device. Therefore, cited commit stopped overwriting the iif / oif fields in the flow structure and instead stored the index of the enslaving L3 master device in a new field ('flowi_l3mdev') in the flow structure. While the change enabled new use cases, it broke the original use case of matching on a L3 domain. Fix this by interpreting the iif / oif matching on a L3 master device as a match against the L3 domain. In other words, if the iif / oif in the FIB rule points to a L3 master device, compare the provided index against 'flowi_l3mdev' rather than 'flowi_{i,o}if'. Before cited commit, a FIB rule that matched on 'iif vrf1' would only match incoming traffic from devices enslaved to 'vrf1'. With the proposed change (i.e., comparing against 'flowi_l3mdev'), the rule would also match traffic originating from a socket bound to 'vrf1'. Avoid that by adding a new flow flag ('FLOWI_FLAG_L3MDEV_OIF') that indicates if the L3 domain was derived from the output interface or the input interface (when not set) and take this flag into account when evaluating the FIB rule against the flow structure. Avoid unnecessary checks in the data path by detecting that a rule matches on a L3 master device when the rule is installed and marking it as such. Tested using the following script [1]. Output before 40867d74c374 (v5.4.291): default dev dummy1 table 100 scope link default dev dummy1 table 200 scope link Output after 40867d74c374: default dev dummy1 table 300 scope link default dev dummy1 table 300 scope link Output with this patch: default dev dummy1 table 100 scope link default dev dummy1 table 200 scope link [1] #!/bin/bash ip link add name vrf1 up type vrf table 10 ip link add name dummy1 up master vrf1 type dummy sysctl -wq net.ipv4.conf.all.forwarding=1 sysctl -wq net.ipv4.conf.all.rp_filter=0 ip route add table 100 default dev dummy1 ip route add table 200 default dev dummy1 ip route add table 300 default dev dummy1 ip rule add prio 0 oif vrf1 table 100 ip rule add prio 1 iif vrf1 table 200 ip rule add prio 2 table 300 ip route get 192.0.2.1 oif dummy1 fibmatch ip route get 192.0.2.1 iif dummy1 from 198.51.100.1 fibmatch Fixes: 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") Reported-by: hanhuihui <hanhuihui5@huawei.com> Closes: https://lore.kernel.org/netdev/ec671c4f821a4d63904d0da15d604b75@huawei.com/ Signed-off-by: Ido Schimmel <idosch@nvidia.com> Acked-by: David Ahern <dsahern@kernel.org> Link: https://patch.msgid.link/20250414172022.242991-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
304 lines
6.6 KiB
C
304 lines
6.6 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* net/l3mdev/l3mdev.c - L3 master device implementation
|
|
* Copyright (c) 2015 Cumulus Networks
|
|
* Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
|
|
*/
|
|
|
|
#include <linux/netdevice.h>
|
|
#include <net/fib_rules.h>
|
|
#include <net/l3mdev.h>
|
|
|
|
static DEFINE_SPINLOCK(l3mdev_lock);
|
|
|
|
struct l3mdev_handler {
|
|
lookup_by_table_id_t dev_lookup;
|
|
};
|
|
|
|
static struct l3mdev_handler l3mdev_handlers[L3MDEV_TYPE_MAX + 1];
|
|
|
|
static int l3mdev_check_type(enum l3mdev_type l3type)
|
|
{
|
|
if (l3type <= L3MDEV_TYPE_UNSPEC || l3type > L3MDEV_TYPE_MAX)
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int l3mdev_table_lookup_register(enum l3mdev_type l3type,
|
|
lookup_by_table_id_t fn)
|
|
{
|
|
struct l3mdev_handler *hdlr;
|
|
int res;
|
|
|
|
res = l3mdev_check_type(l3type);
|
|
if (res)
|
|
return res;
|
|
|
|
hdlr = &l3mdev_handlers[l3type];
|
|
|
|
spin_lock(&l3mdev_lock);
|
|
|
|
if (hdlr->dev_lookup) {
|
|
res = -EBUSY;
|
|
goto unlock;
|
|
}
|
|
|
|
hdlr->dev_lookup = fn;
|
|
res = 0;
|
|
|
|
unlock:
|
|
spin_unlock(&l3mdev_lock);
|
|
|
|
return res;
|
|
}
|
|
EXPORT_SYMBOL_GPL(l3mdev_table_lookup_register);
|
|
|
|
void l3mdev_table_lookup_unregister(enum l3mdev_type l3type,
|
|
lookup_by_table_id_t fn)
|
|
{
|
|
struct l3mdev_handler *hdlr;
|
|
|
|
if (l3mdev_check_type(l3type))
|
|
return;
|
|
|
|
hdlr = &l3mdev_handlers[l3type];
|
|
|
|
spin_lock(&l3mdev_lock);
|
|
|
|
if (hdlr->dev_lookup == fn)
|
|
hdlr->dev_lookup = NULL;
|
|
|
|
spin_unlock(&l3mdev_lock);
|
|
}
|
|
EXPORT_SYMBOL_GPL(l3mdev_table_lookup_unregister);
|
|
|
|
int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type,
|
|
struct net *net, u32 table_id)
|
|
{
|
|
lookup_by_table_id_t lookup;
|
|
struct l3mdev_handler *hdlr;
|
|
int ifindex = -EINVAL;
|
|
int res;
|
|
|
|
res = l3mdev_check_type(l3type);
|
|
if (res)
|
|
return res;
|
|
|
|
hdlr = &l3mdev_handlers[l3type];
|
|
|
|
spin_lock(&l3mdev_lock);
|
|
|
|
lookup = hdlr->dev_lookup;
|
|
if (!lookup)
|
|
goto unlock;
|
|
|
|
ifindex = lookup(net, table_id);
|
|
|
|
unlock:
|
|
spin_unlock(&l3mdev_lock);
|
|
|
|
return ifindex;
|
|
}
|
|
EXPORT_SYMBOL_GPL(l3mdev_ifindex_lookup_by_table_id);
|
|
|
|
/**
|
|
* l3mdev_master_ifindex_rcu - get index of L3 master device
|
|
* @dev: targeted interface
|
|
*/
|
|
|
|
int l3mdev_master_ifindex_rcu(const struct net_device *dev)
|
|
{
|
|
int ifindex = 0;
|
|
|
|
if (!dev)
|
|
return 0;
|
|
|
|
if (netif_is_l3_master(dev)) {
|
|
ifindex = dev->ifindex;
|
|
} else if (netif_is_l3_slave(dev)) {
|
|
struct net_device *master;
|
|
struct net_device *_dev = (struct net_device *)dev;
|
|
|
|
/* netdev_master_upper_dev_get_rcu calls
|
|
* list_first_or_null_rcu to walk the upper dev list.
|
|
* list_first_or_null_rcu does not handle a const arg. We aren't
|
|
* making changes, just want the master device from that list so
|
|
* typecast to remove the const
|
|
*/
|
|
master = netdev_master_upper_dev_get_rcu(_dev);
|
|
if (master)
|
|
ifindex = master->ifindex;
|
|
}
|
|
|
|
return ifindex;
|
|
}
|
|
EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
|
|
|
|
/**
|
|
* l3mdev_master_upper_ifindex_by_index_rcu - get index of upper l3 master
|
|
* device
|
|
* @net: network namespace for device index lookup
|
|
* @ifindex: targeted interface
|
|
*/
|
|
int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
|
|
{
|
|
struct net_device *dev;
|
|
|
|
dev = dev_get_by_index_rcu(net, ifindex);
|
|
while (dev && !netif_is_l3_master(dev))
|
|
dev = netdev_master_upper_dev_get_rcu(dev);
|
|
|
|
return dev ? dev->ifindex : 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(l3mdev_master_upper_ifindex_by_index_rcu);
|
|
|
|
/**
|
|
* l3mdev_fib_table_rcu - get FIB table id associated with an L3
|
|
* master interface
|
|
* @dev: targeted interface
|
|
*/
|
|
|
|
u32 l3mdev_fib_table_rcu(const struct net_device *dev)
|
|
{
|
|
u32 tb_id = 0;
|
|
|
|
if (!dev)
|
|
return 0;
|
|
|
|
if (netif_is_l3_master(dev)) {
|
|
if (dev->l3mdev_ops->l3mdev_fib_table)
|
|
tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev);
|
|
} else if (netif_is_l3_slave(dev)) {
|
|
/* Users of netdev_master_upper_dev_get_rcu need non-const,
|
|
* but current inet_*type functions take a const
|
|
*/
|
|
struct net_device *_dev = (struct net_device *) dev;
|
|
const struct net_device *master;
|
|
|
|
master = netdev_master_upper_dev_get_rcu(_dev);
|
|
if (master &&
|
|
master->l3mdev_ops->l3mdev_fib_table)
|
|
tb_id = master->l3mdev_ops->l3mdev_fib_table(master);
|
|
}
|
|
|
|
return tb_id;
|
|
}
|
|
EXPORT_SYMBOL_GPL(l3mdev_fib_table_rcu);
|
|
|
|
u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
|
|
{
|
|
struct net_device *dev;
|
|
u32 tb_id = 0;
|
|
|
|
if (!ifindex)
|
|
return 0;
|
|
|
|
rcu_read_lock();
|
|
|
|
dev = dev_get_by_index_rcu(net, ifindex);
|
|
if (dev)
|
|
tb_id = l3mdev_fib_table_rcu(dev);
|
|
|
|
rcu_read_unlock();
|
|
|
|
return tb_id;
|
|
}
|
|
EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
|
|
|
|
/**
|
|
* l3mdev_link_scope_lookup - IPv6 route lookup based on flow for link
|
|
* local and multicast addresses
|
|
* @net: network namespace for device index lookup
|
|
* @fl6: IPv6 flow struct for lookup
|
|
* This function does not hold refcnt on the returned dst.
|
|
* Caller must hold rcu_read_lock().
|
|
*/
|
|
|
|
struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
|
|
struct flowi6 *fl6)
|
|
{
|
|
struct dst_entry *dst = NULL;
|
|
struct net_device *dev;
|
|
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
if (fl6->flowi6_oif) {
|
|
dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
|
|
if (dev && netif_is_l3_slave(dev))
|
|
dev = netdev_master_upper_dev_get_rcu(dev);
|
|
|
|
if (dev && netif_is_l3_master(dev) &&
|
|
dev->l3mdev_ops->l3mdev_link_scope_lookup)
|
|
dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6);
|
|
}
|
|
|
|
return dst;
|
|
}
|
|
EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup);
|
|
|
|
/**
|
|
* l3mdev_fib_rule_match - Determine if flowi references an
|
|
* L3 master device
|
|
* @net: network namespace for device index lookup
|
|
* @fl: flow struct
|
|
* @arg: store the table the rule matched with here
|
|
*/
|
|
|
|
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
|
|
struct fib_lookup_arg *arg)
|
|
{
|
|
struct net_device *dev;
|
|
int rc = 0;
|
|
|
|
/* update flow ensures flowi_l3mdev is set when relevant */
|
|
if (!fl->flowi_l3mdev)
|
|
return 0;
|
|
|
|
rcu_read_lock();
|
|
|
|
dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev);
|
|
if (dev && netif_is_l3_master(dev) &&
|
|
dev->l3mdev_ops->l3mdev_fib_table) {
|
|
arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
|
|
rc = 1;
|
|
}
|
|
|
|
rcu_read_unlock();
|
|
|
|
return rc;
|
|
}
|
|
|
|
void l3mdev_update_flow(struct net *net, struct flowi *fl)
|
|
{
|
|
struct net_device *dev;
|
|
|
|
rcu_read_lock();
|
|
|
|
if (fl->flowi_oif) {
|
|
dev = dev_get_by_index_rcu(net, fl->flowi_oif);
|
|
if (dev) {
|
|
if (!fl->flowi_l3mdev) {
|
|
fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
|
|
fl->flowi_flags |= FLOWI_FLAG_L3MDEV_OIF;
|
|
}
|
|
|
|
/* oif set to L3mdev directs lookup to its table;
|
|
* reset to avoid oif match in fib_lookup
|
|
*/
|
|
if (netif_is_l3_master(dev))
|
|
fl->flowi_oif = 0;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) {
|
|
dev = dev_get_by_index_rcu(net, fl->flowi_iif);
|
|
if (dev)
|
|
fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
|
|
}
|
|
|
|
out:
|
|
rcu_read_unlock();
|
|
}
|
|
EXPORT_SYMBOL_GPL(l3mdev_update_flow);
|