mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/
synced 2025-04-19 20:58:31 +09:00
bpf-fixes
-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEE+soXsSLHKoYyzcli6rmadz2vbToFAmf6sD8ACgkQ6rmadz2v bTq86w//bbg2S1ZhSXXQvgRSbxfecvJ0r6XGDOaMsKxPXcqpbaMoSCYx2D8puO+b xm0vc+5qXlzuTHq9I8flDKrWdA+/sHxLQhXjcBA796vaY6IgJEnapf3kENyzZ3Vp agpNPlZe9FLaANDRivTFPVgzVjr07/3eL7VKItASksb/3yjBSa+vrIJVfGF1krQT slxTMzVMzB+p0MdKVjmeGn5EodWXp8TdVzQBPb8vnCn7U1h1HULSh4j1+nZ/Z1yr zC4/pVPmdDJe1H8ghBGm4f0nY+EwXPtZiVbXnYS2FhgjvthRKFYIyxN9F6kg7AD7 NG0T6xw/QYNfPTR40PSiV/WHhH5qa2zRVtlepVU7tqqmsyRXi+0Eq/MfJyiuNzgN WWmJec0O/Ax4r2Xs/QgX3mFlRnLNi5gmc7fuOARmayAlqElZ9QdB2x6ebW5Fk4Qx 9oyQACpcu6/oUKgeMSo52MDa82wUPPxpC6qdsefmQYaAcOKM5MD4SNd+eEnfX03E RAaItTW9az57a2BL9C/ejJO/SwY4Er+O8B3PO7GaKiURMSZa5nVlY+2QB2fJy6TA 7IvSYjFD5E4risMbZgPFCqWkQ0yHbY7zEn/tbcNC5AFZoKv70jELPQTLPXq7UPLe BuKoL9VJyeXF7E1MQqQH33q3tfcwlIL++piCNHvTQoPadEba2dM= =Mezb -----END PGP SIGNATURE----- Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf Pull bpf fixes from Alexei Starovoitov: - Followup fixes for resilient spinlock (Kumar Kartikeya Dwivedi): - Make res_spin_lock test less verbose, since it was spamming BPF CI on failure, and make the check for AA deadlock stronger - Fix rebasing mistake and use architecture provided res_smp_cond_load_acquire - Convert BPF maps (queue_stack and ringbuf) to resilient spinlock to address long standing syzbot reports - Make sure that classic BPF load instruction from SKF_[NET|LL]_OFF offsets works when skb is fragmeneted (Willem de Bruijn) * tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: bpf: Convert ringbuf map to rqspinlock bpf: Convert queue_stack map to rqspinlock bpf: Use architecture provided res_smp_cond_load_acquire selftests/bpf: Make res_spin_lock AA test condition stronger selftests/net: test sk_filter support for SKF_NET_OFF on frags bpf: support SKF_NET_OFF and SKF_LL_OFF on skb frags selftests/bpf: Make res_spin_lock test less verbose
This commit is contained in:
commit
b676ac484f
@ -86,7 +86,7 @@
|
||||
|
||||
#endif
|
||||
|
||||
#define res_smp_cond_load_acquire_timewait(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)
|
||||
#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)
|
||||
|
||||
#include <asm-generic/rqspinlock.h>
|
||||
|
||||
|
@ -9,13 +9,14 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include "percpu_freelist.h"
|
||||
#include <asm/rqspinlock.h>
|
||||
|
||||
#define QUEUE_STACK_CREATE_FLAG_MASK \
|
||||
(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
|
||||
|
||||
struct bpf_queue_stack {
|
||||
struct bpf_map map;
|
||||
raw_spinlock_t lock;
|
||||
rqspinlock_t lock;
|
||||
u32 head, tail;
|
||||
u32 size; /* max_entries + 1 */
|
||||
|
||||
@ -78,7 +79,7 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
|
||||
|
||||
qs->size = size;
|
||||
|
||||
raw_spin_lock_init(&qs->lock);
|
||||
raw_res_spin_lock_init(&qs->lock);
|
||||
|
||||
return &qs->map;
|
||||
}
|
||||
@ -98,12 +99,8 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
|
||||
int err = 0;
|
||||
void *ptr;
|
||||
|
||||
if (in_nmi()) {
|
||||
if (!raw_spin_trylock_irqsave(&qs->lock, flags))
|
||||
return -EBUSY;
|
||||
} else {
|
||||
raw_spin_lock_irqsave(&qs->lock, flags);
|
||||
}
|
||||
if (raw_res_spin_lock_irqsave(&qs->lock, flags))
|
||||
return -EBUSY;
|
||||
|
||||
if (queue_stack_map_is_empty(qs)) {
|
||||
memset(value, 0, qs->map.value_size);
|
||||
@ -120,7 +117,7 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
|
||||
}
|
||||
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&qs->lock, flags);
|
||||
raw_res_spin_unlock_irqrestore(&qs->lock, flags);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -133,12 +130,8 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
|
||||
void *ptr;
|
||||
u32 index;
|
||||
|
||||
if (in_nmi()) {
|
||||
if (!raw_spin_trylock_irqsave(&qs->lock, flags))
|
||||
return -EBUSY;
|
||||
} else {
|
||||
raw_spin_lock_irqsave(&qs->lock, flags);
|
||||
}
|
||||
if (raw_res_spin_lock_irqsave(&qs->lock, flags))
|
||||
return -EBUSY;
|
||||
|
||||
if (queue_stack_map_is_empty(qs)) {
|
||||
memset(value, 0, qs->map.value_size);
|
||||
@ -157,7 +150,7 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
|
||||
qs->head = index;
|
||||
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&qs->lock, flags);
|
||||
raw_res_spin_unlock_irqrestore(&qs->lock, flags);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -203,12 +196,8 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
|
||||
if (flags & BPF_NOEXIST || flags > BPF_EXIST)
|
||||
return -EINVAL;
|
||||
|
||||
if (in_nmi()) {
|
||||
if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags))
|
||||
return -EBUSY;
|
||||
} else {
|
||||
raw_spin_lock_irqsave(&qs->lock, irq_flags);
|
||||
}
|
||||
if (raw_res_spin_lock_irqsave(&qs->lock, irq_flags))
|
||||
return -EBUSY;
|
||||
|
||||
if (queue_stack_map_is_full(qs)) {
|
||||
if (!replace) {
|
||||
@ -227,7 +216,7 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
|
||||
qs->head = 0;
|
||||
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
|
||||
raw_res_spin_unlock_irqrestore(&qs->lock, irq_flags);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/kmemleak.h>
|
||||
#include <uapi/linux/btf.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <asm/rqspinlock.h>
|
||||
|
||||
#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE)
|
||||
|
||||
@ -29,7 +30,7 @@ struct bpf_ringbuf {
|
||||
u64 mask;
|
||||
struct page **pages;
|
||||
int nr_pages;
|
||||
raw_spinlock_t spinlock ____cacheline_aligned_in_smp;
|
||||
rqspinlock_t spinlock ____cacheline_aligned_in_smp;
|
||||
/* For user-space producer ring buffers, an atomic_t busy bit is used
|
||||
* to synchronize access to the ring buffers in the kernel, rather than
|
||||
* the spinlock that is used for kernel-producer ring buffers. This is
|
||||
@ -173,7 +174,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
|
||||
if (!rb)
|
||||
return NULL;
|
||||
|
||||
raw_spin_lock_init(&rb->spinlock);
|
||||
raw_res_spin_lock_init(&rb->spinlock);
|
||||
atomic_set(&rb->busy, 0);
|
||||
init_waitqueue_head(&rb->waitq);
|
||||
init_irq_work(&rb->work, bpf_ringbuf_notify);
|
||||
@ -416,12 +417,8 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
|
||||
|
||||
cons_pos = smp_load_acquire(&rb->consumer_pos);
|
||||
|
||||
if (in_nmi()) {
|
||||
if (!raw_spin_trylock_irqsave(&rb->spinlock, flags))
|
||||
return NULL;
|
||||
} else {
|
||||
raw_spin_lock_irqsave(&rb->spinlock, flags);
|
||||
}
|
||||
if (raw_res_spin_lock_irqsave(&rb->spinlock, flags))
|
||||
return NULL;
|
||||
|
||||
pend_pos = rb->pending_pos;
|
||||
prod_pos = rb->producer_pos;
|
||||
@ -446,7 +443,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
|
||||
*/
|
||||
if (new_prod_pos - cons_pos > rb->mask ||
|
||||
new_prod_pos - pend_pos > rb->mask) {
|
||||
raw_spin_unlock_irqrestore(&rb->spinlock, flags);
|
||||
raw_res_spin_unlock_irqrestore(&rb->spinlock, flags);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -458,7 +455,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
|
||||
/* pairs with consumer's smp_load_acquire() */
|
||||
smp_store_release(&rb->producer_pos, new_prod_pos);
|
||||
|
||||
raw_spin_unlock_irqrestore(&rb->spinlock, flags);
|
||||
raw_res_spin_unlock_irqrestore(&rb->spinlock, flags);
|
||||
|
||||
return (void *)hdr + BPF_RINGBUF_HDR_SZ;
|
||||
}
|
||||
|
@ -253,7 +253,7 @@ static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
|
||||
})
|
||||
#else
|
||||
#define RES_CHECK_TIMEOUT(ts, ret, mask) \
|
||||
({ (ret) = check_timeout(&(ts)); })
|
||||
({ (ret) = check_timeout((lock), (mask), &(ts)); })
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -218,24 +218,36 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_skb_load_helper_convert_offset(const struct sk_buff *skb, int offset)
|
||||
{
|
||||
if (likely(offset >= 0))
|
||||
return offset;
|
||||
|
||||
if (offset >= SKF_NET_OFF)
|
||||
return offset - SKF_NET_OFF + skb_network_offset(skb);
|
||||
|
||||
if (offset >= SKF_LL_OFF && skb_mac_header_was_set(skb))
|
||||
return offset - SKF_LL_OFF + skb_mac_offset(skb);
|
||||
|
||||
return INT_MIN;
|
||||
}
|
||||
|
||||
BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
|
||||
data, int, headlen, int, offset)
|
||||
{
|
||||
u8 tmp, *ptr;
|
||||
u8 tmp;
|
||||
const int len = sizeof(tmp);
|
||||
|
||||
if (offset >= 0) {
|
||||
if (headlen - offset >= len)
|
||||
return *(u8 *)(data + offset);
|
||||
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
|
||||
return tmp;
|
||||
} else {
|
||||
ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
|
||||
if (likely(ptr))
|
||||
return *(u8 *)ptr;
|
||||
}
|
||||
offset = bpf_skb_load_helper_convert_offset(skb, offset);
|
||||
if (offset == INT_MIN)
|
||||
return -EFAULT;
|
||||
|
||||
return -EFAULT;
|
||||
if (headlen - offset >= len)
|
||||
return *(u8 *)(data + offset);
|
||||
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
|
||||
return tmp;
|
||||
else
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
|
||||
@ -248,21 +260,19 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
|
||||
BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
|
||||
data, int, headlen, int, offset)
|
||||
{
|
||||
__be16 tmp, *ptr;
|
||||
__be16 tmp;
|
||||
const int len = sizeof(tmp);
|
||||
|
||||
if (offset >= 0) {
|
||||
if (headlen - offset >= len)
|
||||
return get_unaligned_be16(data + offset);
|
||||
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
|
||||
return be16_to_cpu(tmp);
|
||||
} else {
|
||||
ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
|
||||
if (likely(ptr))
|
||||
return get_unaligned_be16(ptr);
|
||||
}
|
||||
offset = bpf_skb_load_helper_convert_offset(skb, offset);
|
||||
if (offset == INT_MIN)
|
||||
return -EFAULT;
|
||||
|
||||
return -EFAULT;
|
||||
if (headlen - offset >= len)
|
||||
return get_unaligned_be16(data + offset);
|
||||
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
|
||||
return be16_to_cpu(tmp);
|
||||
else
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
|
||||
@ -275,21 +285,19 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
|
||||
BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
|
||||
data, int, headlen, int, offset)
|
||||
{
|
||||
__be32 tmp, *ptr;
|
||||
__be32 tmp;
|
||||
const int len = sizeof(tmp);
|
||||
|
||||
if (likely(offset >= 0)) {
|
||||
if (headlen - offset >= len)
|
||||
return get_unaligned_be32(data + offset);
|
||||
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
|
||||
return be32_to_cpu(tmp);
|
||||
} else {
|
||||
ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
|
||||
if (likely(ptr))
|
||||
return get_unaligned_be32(ptr);
|
||||
}
|
||||
offset = bpf_skb_load_helper_convert_offset(skb, offset);
|
||||
if (offset == INT_MIN)
|
||||
return -EFAULT;
|
||||
|
||||
return -EFAULT;
|
||||
if (headlen - offset >= len)
|
||||
return get_unaligned_be32(data + offset);
|
||||
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
|
||||
return be32_to_cpu(tmp);
|
||||
else
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
|
||||
|
@ -25,8 +25,11 @@ static void *spin_lock_thread(void *arg)
|
||||
|
||||
while (!READ_ONCE(skip)) {
|
||||
err = bpf_prog_test_run_opts(prog_fd, &topts);
|
||||
ASSERT_OK(err, "test_run");
|
||||
ASSERT_OK(topts.retval, "test_run retval");
|
||||
if (err || topts.retval) {
|
||||
ASSERT_OK(err, "test_run");
|
||||
ASSERT_OK(topts.retval, "test_run retval");
|
||||
break;
|
||||
}
|
||||
}
|
||||
pthread_exit(arg);
|
||||
}
|
||||
|
@ -38,13 +38,14 @@ int res_spin_lock_test(struct __sk_buff *ctx)
|
||||
r = bpf_res_spin_lock(&elem1->lock);
|
||||
if (r)
|
||||
return r;
|
||||
if (!bpf_res_spin_lock(&elem2->lock)) {
|
||||
r = bpf_res_spin_lock(&elem2->lock);
|
||||
if (!r) {
|
||||
bpf_res_spin_unlock(&elem2->lock);
|
||||
bpf_res_spin_unlock(&elem1->lock);
|
||||
return -1;
|
||||
}
|
||||
bpf_res_spin_unlock(&elem1->lock);
|
||||
return 0;
|
||||
return r != -EDEADLK;
|
||||
}
|
||||
|
||||
SEC("tc")
|
||||
@ -124,12 +125,15 @@ int res_spin_lock_test_held_lock_max(struct __sk_buff *ctx)
|
||||
/* Trigger AA, after exhausting entries in the held lock table. This
|
||||
* time, only the timeout can save us, as AA detection won't succeed.
|
||||
*/
|
||||
if (!bpf_res_spin_lock(locks[34])) {
|
||||
ret = bpf_res_spin_lock(locks[34]);
|
||||
if (!ret) {
|
||||
bpf_res_spin_unlock(locks[34]);
|
||||
ret = 1;
|
||||
goto end;
|
||||
}
|
||||
|
||||
ret = ret != -ETIMEDOUT ? 2 : 0;
|
||||
|
||||
end:
|
||||
for (i = i - 1; i >= 0; i--)
|
||||
bpf_res_spin_unlock(locks[i]);
|
||||
|
1
tools/testing/selftests/net/.gitignore
vendored
1
tools/testing/selftests/net/.gitignore
vendored
@ -39,6 +39,7 @@ scm_rights
|
||||
sk_bind_sendto_listen
|
||||
sk_connect_zero_addr
|
||||
sk_so_peek_off
|
||||
skf_net_off
|
||||
socket
|
||||
so_incoming_cpu
|
||||
so_netns_cookie
|
||||
|
@ -106,6 +106,8 @@ TEST_PROGS += ipv6_route_update_soft_lockup.sh
|
||||
TEST_PROGS += busy_poll_test.sh
|
||||
TEST_GEN_PROGS += proc_net_pktgen
|
||||
TEST_PROGS += lwt_dst_cache_ref_loop.sh
|
||||
TEST_PROGS += skf_net_off.sh
|
||||
TEST_GEN_FILES += skf_net_off
|
||||
|
||||
# YNL files, must be before "include ..lib.mk"
|
||||
YNL_GEN_FILES := busy_poller netlink-dumps
|
||||
|
244
tools/testing/selftests/net/skf_net_off.c
Normal file
244
tools/testing/selftests/net/skf_net_off.c
Normal file
@ -0,0 +1,244 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/* Open a tun device.
|
||||
*
|
||||
* [modifications: use IFF_NAPI_FRAGS, add sk filter]
|
||||
*
|
||||
* Expects the device to have been configured previously, e.g.:
|
||||
* sudo ip tuntap add name tap1 mode tap
|
||||
* sudo ip link set tap1 up
|
||||
* sudo ip link set dev tap1 addr 02:00:00:00:00:01
|
||||
* sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad
|
||||
*
|
||||
* And to avoid premature pskb_may_pull:
|
||||
*
|
||||
* sudo ethtool -K tap1 gro off
|
||||
* sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux'
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <errno.h>
|
||||
#include <error.h>
|
||||
#include <fcntl.h>
|
||||
#include <getopt.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/if.h>
|
||||
#include <linux/if_packet.h>
|
||||
#include <linux/if_tun.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <netinet/if_ether.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/ip.h>
|
||||
#include <netinet/ip6.h>
|
||||
#include <netinet/udp.h>
|
||||
#include <poll.h>
|
||||
#include <signal.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/poll.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/uio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static bool cfg_do_filter;
|
||||
static bool cfg_do_frags;
|
||||
static int cfg_dst_port = 8000;
|
||||
static char *cfg_ifname;
|
||||
|
||||
static int tun_open(const char *tun_name)
|
||||
{
|
||||
struct ifreq ifr = {0};
|
||||
int fd, ret;
|
||||
|
||||
fd = open("/dev/net/tun", O_RDWR);
|
||||
if (fd == -1)
|
||||
error(1, errno, "open /dev/net/tun");
|
||||
|
||||
ifr.ifr_flags = IFF_TAP;
|
||||
if (cfg_do_frags)
|
||||
ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS;
|
||||
|
||||
strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1);
|
||||
|
||||
ret = ioctl(fd, TUNSETIFF, &ifr);
|
||||
if (ret)
|
||||
error(1, ret, "ioctl TUNSETIFF");
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
static void sk_set_filter(int fd)
|
||||
{
|
||||
const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt);
|
||||
const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest);
|
||||
|
||||
/* Filter UDP packets with destination port cfg_dst_port */
|
||||
struct sock_filter filter_code[] = {
|
||||
BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
|
||||
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
|
||||
BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto),
|
||||
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2),
|
||||
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport),
|
||||
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0),
|
||||
BPF_STMT(BPF_RET + BPF_K, 0),
|
||||
BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
|
||||
};
|
||||
|
||||
struct sock_fprog filter = {
|
||||
sizeof(filter_code) / sizeof(filter_code[0]),
|
||||
filter_code,
|
||||
};
|
||||
|
||||
if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)))
|
||||
error(1, errno, "setsockopt attach filter");
|
||||
}
|
||||
|
||||
static int raw_open(void)
|
||||
{
|
||||
int fd;
|
||||
|
||||
fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP);
|
||||
if (fd == -1)
|
||||
error(1, errno, "socket raw (udp)");
|
||||
|
||||
if (cfg_do_filter)
|
||||
sk_set_filter(fd);
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
static void tun_write(int fd)
|
||||
{
|
||||
const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 };
|
||||
const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 };
|
||||
struct tun_pi pi = {0};
|
||||
struct ipv6hdr ip6h = {0};
|
||||
struct udphdr uh = {0};
|
||||
struct ethhdr eth = {0};
|
||||
uint32_t payload;
|
||||
struct iovec iov[5];
|
||||
int ret;
|
||||
|
||||
pi.proto = htons(ETH_P_IPV6);
|
||||
|
||||
memcpy(eth.h_source, eth_src, sizeof(eth_src));
|
||||
memcpy(eth.h_dest, eth_dst, sizeof(eth_dst));
|
||||
eth.h_proto = htons(ETH_P_IPV6);
|
||||
|
||||
ip6h.version = 6;
|
||||
ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t));
|
||||
ip6h.nexthdr = IPPROTO_UDP;
|
||||
ip6h.hop_limit = 8;
|
||||
if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1)
|
||||
error(1, errno, "inet_pton src");
|
||||
if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1)
|
||||
error(1, errno, "inet_pton src");
|
||||
|
||||
uh.source = htons(8000);
|
||||
uh.dest = htons(cfg_dst_port);
|
||||
uh.len = ip6h.payload_len;
|
||||
uh.check = 0;
|
||||
|
||||
payload = htonl(0xABABABAB); /* Covered in IPv6 length */
|
||||
|
||||
iov[0].iov_base = π
|
||||
iov[0].iov_len = sizeof(pi);
|
||||
iov[1].iov_base = ð
|
||||
iov[1].iov_len = sizeof(eth);
|
||||
iov[2].iov_base = &ip6h;
|
||||
iov[2].iov_len = sizeof(ip6h);
|
||||
iov[3].iov_base = &uh;
|
||||
iov[3].iov_len = sizeof(uh);
|
||||
iov[4].iov_base = &payload;
|
||||
iov[4].iov_len = sizeof(payload);
|
||||
|
||||
ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0]));
|
||||
if (ret <= 0)
|
||||
error(1, errno, "writev");
|
||||
}
|
||||
|
||||
static void raw_read(int fd)
|
||||
{
|
||||
struct timeval tv = { .tv_usec = 100 * 1000 };
|
||||
struct msghdr msg = {0};
|
||||
struct iovec iov[2];
|
||||
struct udphdr uh;
|
||||
uint32_t payload[2];
|
||||
int ret;
|
||||
|
||||
if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
|
||||
error(1, errno, "setsockopt rcvtimeo udp");
|
||||
|
||||
iov[0].iov_base = &uh;
|
||||
iov[0].iov_len = sizeof(uh);
|
||||
|
||||
iov[1].iov_base = payload;
|
||||
iov[1].iov_len = sizeof(payload);
|
||||
|
||||
msg.msg_iov = iov;
|
||||
msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]);
|
||||
|
||||
ret = recvmsg(fd, &msg, 0);
|
||||
if (ret <= 0)
|
||||
error(1, errno, "read raw");
|
||||
if (ret != sizeof(uh) + sizeof(payload[0]))
|
||||
error(1, errno, "read raw: len=%d\n", ret);
|
||||
|
||||
fprintf(stderr, "raw recv: 0x%x\n", payload[0]);
|
||||
}
|
||||
|
||||
static void parse_opts(int argc, char **argv)
|
||||
{
|
||||
int c;
|
||||
|
||||
while ((c = getopt(argc, argv, "fFi:")) != -1) {
|
||||
switch (c) {
|
||||
case 'f':
|
||||
cfg_do_filter = true;
|
||||
printf("bpf filter enabled\n");
|
||||
break;
|
||||
case 'F':
|
||||
cfg_do_frags = true;
|
||||
printf("napi frags mode enabled\n");
|
||||
break;
|
||||
case 'i':
|
||||
cfg_ifname = optarg;
|
||||
break;
|
||||
default:
|
||||
error(1, 0, "unknown option %c", optopt);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!cfg_ifname)
|
||||
error(1, 0, "must specify tap interface name (-i)");
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int fdt, fdr;
|
||||
|
||||
parse_opts(argc, argv);
|
||||
|
||||
fdr = raw_open();
|
||||
fdt = tun_open(cfg_ifname);
|
||||
|
||||
tun_write(fdt);
|
||||
raw_read(fdr);
|
||||
|
||||
if (close(fdt))
|
||||
error(1, errno, "close tun");
|
||||
if (close(fdr))
|
||||
error(1, errno, "close udp");
|
||||
|
||||
fprintf(stderr, "OK\n");
|
||||
return 0;
|
||||
}
|
||||
|
30
tools/testing/selftests/net/skf_net_off.sh
Executable file
30
tools/testing/selftests/net/skf_net_off.sh
Executable file
@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
readonly NS="ns-$(mktemp -u XXXXXX)"
|
||||
|
||||
cleanup() {
|
||||
ip netns del $NS
|
||||
}
|
||||
|
||||
ip netns add $NS
|
||||
trap cleanup EXIT
|
||||
|
||||
ip -netns $NS link set lo up
|
||||
ip -netns $NS tuntap add name tap1 mode tap
|
||||
ip -netns $NS link set tap1 up
|
||||
ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01
|
||||
ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad
|
||||
ip netns exec $NS ethtool -K tap1 gro off
|
||||
|
||||
# disable early demux, else udp_v6_early_demux pulls udp header into linear
|
||||
ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0
|
||||
|
||||
echo "no filter"
|
||||
ip netns exec $NS ./skf_net_off -i tap1
|
||||
|
||||
echo "filter, linear skb (-f)"
|
||||
ip netns exec $NS ./skf_net_off -i tap1 -f
|
||||
|
||||
echo "filter, fragmented skb (-f) (-F)"
|
||||
ip netns exec $NS ./skf_net_off -i tap1 -f -F
|
Loading…
x
Reference in New Issue
Block a user