mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/
synced 2025-04-19 20:58:31 +09:00

Before commit 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") it was possible to use FIB rules to match on a L3 domain. This was done by having a FIB rule match on iif / oif being a L3 master device. It worked because prior to the FIB rule lookup the iif / oif fields in the flow structure were reset to the index of the L3 master device to which the input / output device was enslaved to. The above scheme made it impossible to match on the original input / output device. Therefore, cited commit stopped overwriting the iif / oif fields in the flow structure and instead stored the index of the enslaving L3 master device in a new field ('flowi_l3mdev') in the flow structure. While the change enabled new use cases, it broke the original use case of matching on a L3 domain. Fix this by interpreting the iif / oif matching on a L3 master device as a match against the L3 domain. In other words, if the iif / oif in the FIB rule points to a L3 master device, compare the provided index against 'flowi_l3mdev' rather than 'flowi_{i,o}if'. Before cited commit, a FIB rule that matched on 'iif vrf1' would only match incoming traffic from devices enslaved to 'vrf1'. With the proposed change (i.e., comparing against 'flowi_l3mdev'), the rule would also match traffic originating from a socket bound to 'vrf1'. Avoid that by adding a new flow flag ('FLOWI_FLAG_L3MDEV_OIF') that indicates if the L3 domain was derived from the output interface or the input interface (when not set) and take this flag into account when evaluating the FIB rule against the flow structure. Avoid unnecessary checks in the data path by detecting that a rule matches on a L3 master device when the rule is installed and marking it as such. Tested using the following script [1]. Output before 40867d74c374 (v5.4.291): default dev dummy1 table 100 scope link default dev dummy1 table 200 scope link Output after 40867d74c374: default dev dummy1 table 300 scope link default dev dummy1 table 300 scope link Output with this patch: default dev dummy1 table 100 scope link default dev dummy1 table 200 scope link [1] #!/bin/bash ip link add name vrf1 up type vrf table 10 ip link add name dummy1 up master vrf1 type dummy sysctl -wq net.ipv4.conf.all.forwarding=1 sysctl -wq net.ipv4.conf.all.rp_filter=0 ip route add table 100 default dev dummy1 ip route add table 200 default dev dummy1 ip route add table 300 default dev dummy1 ip rule add prio 0 oif vrf1 table 100 ip rule add prio 1 iif vrf1 table 200 ip rule add prio 2 table 300 ip route get 192.0.2.1 oif dummy1 fibmatch ip route get 192.0.2.1 iif dummy1 from 198.51.100.1 fibmatch Fixes: 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") Reported-by: hanhuihui <hanhuihui5@huawei.com> Closes: https://lore.kernel.org/netdev/ec671c4f821a4d63904d0da15d604b75@huawei.com/ Signed-off-by: Ido Schimmel <idosch@nvidia.com> Acked-by: David Ahern <dsahern@kernel.org> Link: https://patch.msgid.link/20250414172022.242991-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
364 lines
7.6 KiB
C
364 lines
7.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* include/net/l3mdev.h - L3 master device API
|
|
* Copyright (c) 2015 Cumulus Networks
|
|
* Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
|
|
*/
|
|
#ifndef _NET_L3MDEV_H_
|
|
#define _NET_L3MDEV_H_
|
|
|
|
#include <net/dst.h>
|
|
#include <net/fib_rules.h>
|
|
|
|
enum l3mdev_type {
|
|
L3MDEV_TYPE_UNSPEC,
|
|
L3MDEV_TYPE_VRF,
|
|
__L3MDEV_TYPE_MAX
|
|
};
|
|
|
|
#define L3MDEV_TYPE_MAX (__L3MDEV_TYPE_MAX - 1)
|
|
|
|
typedef int (*lookup_by_table_id_t)(struct net *net, u32 table_d);
|
|
|
|
/**
|
|
* struct l3mdev_ops - l3mdev operations
|
|
*
|
|
* @l3mdev_fib_table: Get FIB table id to use for lookups
|
|
*
|
|
* @l3mdev_l3_rcv: Hook in L3 receive path
|
|
*
|
|
* @l3mdev_l3_out: Hook in L3 output path
|
|
*
|
|
* @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations
|
|
*/
|
|
|
|
struct l3mdev_ops {
|
|
u32 (*l3mdev_fib_table)(const struct net_device *dev);
|
|
struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev,
|
|
struct sk_buff *skb, u16 proto);
|
|
struct sk_buff * (*l3mdev_l3_out)(struct net_device *dev,
|
|
struct sock *sk, struct sk_buff *skb,
|
|
u16 proto);
|
|
|
|
/* IPv6 ops */
|
|
struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev,
|
|
struct flowi6 *fl6);
|
|
};
|
|
|
|
#ifdef CONFIG_NET_L3_MASTER_DEV
|
|
|
|
int l3mdev_table_lookup_register(enum l3mdev_type l3type,
|
|
lookup_by_table_id_t fn);
|
|
|
|
void l3mdev_table_lookup_unregister(enum l3mdev_type l3type,
|
|
lookup_by_table_id_t fn);
|
|
|
|
int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net,
|
|
u32 table_id);
|
|
|
|
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
|
|
struct fib_lookup_arg *arg);
|
|
|
|
static inline
|
|
bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex)
|
|
{
|
|
return !(fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF) &&
|
|
fl->flowi_l3mdev == iifindex;
|
|
}
|
|
|
|
static inline
|
|
bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex)
|
|
{
|
|
return fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF &&
|
|
fl->flowi_l3mdev == oifindex;
|
|
}
|
|
|
|
void l3mdev_update_flow(struct net *net, struct flowi *fl);
|
|
|
|
int l3mdev_master_ifindex_rcu(const struct net_device *dev);
|
|
static inline int l3mdev_master_ifindex(struct net_device *dev)
|
|
{
|
|
int ifindex;
|
|
|
|
rcu_read_lock();
|
|
ifindex = l3mdev_master_ifindex_rcu(dev);
|
|
rcu_read_unlock();
|
|
|
|
return ifindex;
|
|
}
|
|
|
|
static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
|
|
{
|
|
struct net_device *dev;
|
|
int rc = 0;
|
|
|
|
if (ifindex) {
|
|
rcu_read_lock();
|
|
|
|
dev = dev_get_by_index_rcu(net, ifindex);
|
|
if (dev)
|
|
rc = l3mdev_master_ifindex_rcu(dev);
|
|
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
static inline
|
|
struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
|
|
{
|
|
/* netdev_master_upper_dev_get_rcu calls
|
|
* list_first_or_null_rcu to walk the upper dev list.
|
|
* list_first_or_null_rcu does not handle a const arg. We aren't
|
|
* making changes, just want the master device from that list so
|
|
* typecast to remove the const
|
|
*/
|
|
struct net_device *dev = (struct net_device *)_dev;
|
|
struct net_device *master;
|
|
|
|
if (!dev)
|
|
return NULL;
|
|
|
|
if (netif_is_l3_master(dev))
|
|
master = dev;
|
|
else if (netif_is_l3_slave(dev))
|
|
master = netdev_master_upper_dev_get_rcu(dev);
|
|
else
|
|
master = NULL;
|
|
|
|
return master;
|
|
}
|
|
|
|
int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex);
|
|
static inline
|
|
int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex)
|
|
{
|
|
rcu_read_lock();
|
|
ifindex = l3mdev_master_upper_ifindex_by_index_rcu(net, ifindex);
|
|
rcu_read_unlock();
|
|
|
|
return ifindex;
|
|
}
|
|
|
|
u32 l3mdev_fib_table_rcu(const struct net_device *dev);
|
|
u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
|
|
static inline u32 l3mdev_fib_table(const struct net_device *dev)
|
|
{
|
|
u32 tb_id;
|
|
|
|
rcu_read_lock();
|
|
tb_id = l3mdev_fib_table_rcu(dev);
|
|
rcu_read_unlock();
|
|
|
|
return tb_id;
|
|
}
|
|
|
|
static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
|
|
{
|
|
struct net_device *dev;
|
|
bool rc = false;
|
|
|
|
if (ifindex == 0)
|
|
return false;
|
|
|
|
rcu_read_lock();
|
|
|
|
dev = dev_get_by_index_rcu(net, ifindex);
|
|
if (dev)
|
|
rc = netif_is_l3_master(dev);
|
|
|
|
rcu_read_unlock();
|
|
|
|
return rc;
|
|
}
|
|
|
|
struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6);
|
|
|
|
static inline
|
|
struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
|
|
{
|
|
struct net_device *master = NULL;
|
|
|
|
if (netif_is_l3_slave(skb->dev))
|
|
master = netdev_master_upper_dev_get_rcu(skb->dev);
|
|
else if (netif_is_l3_master(skb->dev) ||
|
|
netif_has_l3_rx_handler(skb->dev))
|
|
master = skb->dev;
|
|
|
|
if (master && master->l3mdev_ops->l3mdev_l3_rcv)
|
|
skb = master->l3mdev_ops->l3mdev_l3_rcv(master, skb, proto);
|
|
|
|
return skb;
|
|
}
|
|
|
|
static inline
|
|
struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
|
|
{
|
|
return l3mdev_l3_rcv(skb, AF_INET);
|
|
}
|
|
|
|
static inline
|
|
struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
|
|
{
|
|
return l3mdev_l3_rcv(skb, AF_INET6);
|
|
}
|
|
|
|
static inline
|
|
struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto)
|
|
{
|
|
struct net_device *dev = skb_dst(skb)->dev;
|
|
|
|
if (netif_is_l3_slave(dev)) {
|
|
struct net_device *master;
|
|
|
|
rcu_read_lock();
|
|
master = netdev_master_upper_dev_get_rcu(dev);
|
|
if (master && master->l3mdev_ops->l3mdev_l3_out)
|
|
skb = master->l3mdev_ops->l3mdev_l3_out(master, sk,
|
|
skb, proto);
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
return skb;
|
|
}
|
|
|
|
static inline
|
|
struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
return l3mdev_l3_out(sk, skb, AF_INET);
|
|
}
|
|
|
|
static inline
|
|
struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
return l3mdev_l3_out(sk, skb, AF_INET6);
|
|
}
|
|
#else
|
|
|
|
static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline int l3mdev_master_ifindex(struct net_device *dev)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline
|
|
int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline
|
|
int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline
|
|
struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline u32 l3mdev_fib_table(const struct net_device *dev)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline
|
|
struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline
|
|
struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
|
|
{
|
|
return skb;
|
|
}
|
|
|
|
static inline
|
|
struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
|
|
{
|
|
return skb;
|
|
}
|
|
|
|
static inline
|
|
struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
return skb;
|
|
}
|
|
|
|
static inline
|
|
struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
return skb;
|
|
}
|
|
|
|
static inline
|
|
int l3mdev_table_lookup_register(enum l3mdev_type l3type,
|
|
lookup_by_table_id_t fn)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
static inline
|
|
void l3mdev_table_lookup_unregister(enum l3mdev_type l3type,
|
|
lookup_by_table_id_t fn)
|
|
{
|
|
}
|
|
|
|
static inline
|
|
int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net,
|
|
u32 table_id)
|
|
{
|
|
return -ENODEV;
|
|
}
|
|
|
|
static inline
|
|
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
|
|
struct fib_lookup_arg *arg)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline
|
|
bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline
|
|
bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline
|
|
void l3mdev_update_flow(struct net *net, struct flowi *fl)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#endif /* _NET_L3MDEV_H_ */
|