Tizian Maxime Weigt aef70ec801 Adding vLan forwarding and offloading Support
Adding vLan forwarding and offloading Support for XDP offloading.
2025-10-27 21:43:40 +00:00

260 lines
6.3 KiB
C

#define KBUILD_MODNAME "xdp_l3fwd"
#include <linux/bpf.h>
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#ifndef AF_INET
#define AF_INET 2
#endif
#ifndef AF_INET6
#define AF_INET6 10
#endif
#define IPV6_FLOWINFO_MASK bpf_htonl(0x0FFFFFFF)
#define VLAN_MAX_DEPTH 2 /* Support double-tagged VLANs */
/* Forwarding ports */
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(int));
__uint(max_entries, 512);
} xdp_l3fwd_ports SEC(".maps");
/* Stats key — identifies a connection or flow */
struct flow_key {
__u8 proto;
__u8 pad[3]; /* alignment */
__u16 vlan_id; /* VLAN ID (0 if untagged) */
__u16 pad2; /* alignment */
union {
__u32 ipv4_src;
__u8 ipv6_src[16];
};
union {
__u32 ipv4_dst;
__u8 ipv6_dst[16];
};
__u16 sport;
__u16 dport;
};
/* Stats value — counts packets and bytes */
struct flow_stats {
__u64 packets;
__u64 bytes;
};
/* Hash map to track stats per flow */
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(struct flow_key));
__uint(value_size, sizeof(struct flow_stats));
__uint(max_entries, 65536);
} xdp_flow_stats SEC(".maps");
/* Decrease IPv4 TTL helper */
static __always_inline int ip_decrease_ttl(struct iphdr *iph)
{
__u32 check = (__u32)iph->check;
check += (__u32)bpf_htons(0x0100);
iph->check = (__sum16)(check + (check >= 0xFFFF));
return --iph->ttl;
}
/* Record stats in the xdp_flow_stats map */
static __always_inline void record_stats(struct xdp_md *ctx, struct flow_key *key, __u64 bytes)
{
struct flow_stats *stats;
stats = bpf_map_lookup_elem(&xdp_flow_stats, key);
if (stats) {
__sync_fetch_and_add(&stats->packets, 1);
__sync_fetch_and_add(&stats->bytes, bytes);
} else {
struct flow_stats new_stats = {
.packets = 1,
.bytes = bytes,
};
bpf_map_update_elem(&xdp_flow_stats, key, &new_stats, BPF_ANY);
}
}
/* Parse VLAN headers and return next protocol and offset */
static __always_inline int parse_vlan(void *data, void *data_end, __u64 *nh_off, __u16 *h_proto, __u16 *vlan_id)
{
struct vlan_hdr {
__be16 h_vlan_TCI;
__be16 h_vlan_encapsulated_proto;
} *vhdr;
int i;
/* Parse up to VLAN_MAX_DEPTH VLAN headers */
#pragma unroll
for (i = 0; i < VLAN_MAX_DEPTH; i++) {
if (*h_proto != bpf_htons(ETH_P_8021Q) && *h_proto != bpf_htons(ETH_P_8021AD))
break;
vhdr = data + *nh_off;
if ((void *)(vhdr + 1) > data_end)
return -1;
/* Store the outermost VLAN ID */
if (i == 0)
*vlan_id = bpf_ntohs(vhdr->h_vlan_TCI) & 0x0FFF;
*nh_off += sizeof(*vhdr);
*h_proto = vhdr->h_vlan_encapsulated_proto;
}
return 0;
}
static __always_inline int xdp_l3fwd_flags(struct xdp_md *ctx, __u32 flags)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct bpf_fib_lookup fib_params;
struct ethhdr *eth = data;
struct ipv6hdr *ip6h;
struct iphdr *iph;
__u16 h_proto;
__u64 nh_off;
int rc;
__u16 vlan_id = 0;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
return XDP_DROP;
__builtin_memset(&fib_params, 0, sizeof(fib_params));
h_proto = eth->h_proto;
/* Parse VLAN headers if present */
if (parse_vlan(data, data_end, &nh_off, &h_proto, &vlan_id) < 0)
return XDP_DROP;
struct flow_key key = {};
key.vlan_id = vlan_id;
__u64 bytes = data_end - data;
if (h_proto == bpf_htons(ETH_P_IP)) {
iph = data + nh_off;
if ((void *)(iph + 1) > data_end)
return XDP_DROP;
key.proto = iph->protocol;
key.ipv4_src = iph->saddr;
key.ipv4_dst = iph->daddr;
void *l4_hdr = (void *)iph + (iph->ihl * 4);
if (l4_hdr + sizeof(struct udphdr) <= data_end) {
if (iph->protocol == IPPROTO_TCP) {
struct tcphdr *tcph = l4_hdr;
key.sport = tcph->source;
key.dport = tcph->dest;
} else if (iph->protocol == IPPROTO_UDP) {
struct udphdr *udph = l4_hdr;
key.sport = udph->source;
key.dport = udph->dest;
}
}
/* Record stats before forwarding */
record_stats(ctx, &key, bytes);
if (iph->ttl <= 1)
return XDP_PASS;
fib_params.family = AF_INET;
fib_params.tos = iph->tos;
fib_params.l4_protocol = iph->protocol;
fib_params.tot_len = bpf_ntohs(iph->tot_len);
fib_params.ipv4_src = iph->saddr;
fib_params.ipv4_dst = iph->daddr;
} else if (h_proto == bpf_htons(ETH_P_IPV6)) {
ip6h = data + nh_off;
if ((void *)(ip6h + 1) > data_end)
return XDP_DROP;
key.proto = ip6h->nexthdr;
__builtin_memcpy(key.ipv6_src, &ip6h->saddr, 16);
__builtin_memcpy(key.ipv6_dst, &ip6h->daddr, 16);
void *l4_hdr = (void *)(ip6h + 1);
if (l4_hdr + sizeof(struct udphdr) <= data_end) {
if (ip6h->nexthdr == IPPROTO_TCP) {
struct tcphdr *tcph = l4_hdr;
key.sport = tcph->source;
key.dport = tcph->dest;
} else if (ip6h->nexthdr == IPPROTO_UDP) {
struct udphdr *udph = l4_hdr;
key.sport = udph->source;
key.dport = udph->dest;
}
}
/* Record stats before forwarding */
record_stats(ctx, &key, bytes);
if (ip6h->hop_limit <= 1)
return XDP_PASS;
fib_params.family = AF_INET6;
fib_params.flowinfo = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK;
fib_params.l4_protocol = ip6h->nexthdr;
fib_params.tot_len = bpf_ntohs(ip6h->payload_len);
__builtin_memcpy(fib_params.ipv6_src, &ip6h->saddr, 16);
__builtin_memcpy(fib_params.ipv6_dst, &ip6h->daddr, 16);
} else {
return XDP_PASS;
}
fib_params.ifindex = ctx->ingress_ifindex;
rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
if (rc == BPF_FIB_LKUP_RET_SUCCESS) {
if (!bpf_map_lookup_elem(&xdp_l3fwd_ports, &fib_params.ifindex))
return XDP_PASS;
if (h_proto == bpf_htons(ETH_P_IP))
ip_decrease_ttl(iph);
else if (h_proto == bpf_htons(ETH_P_IPV6))
ip6h->hop_limit--;
__builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
__builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
return bpf_redirect_map(&xdp_l3fwd_ports, fib_params.ifindex, 0);
}
return XDP_PASS;
}
SEC("xdp_l3fwd")
int xdp_l3fwd_prog(struct xdp_md *ctx)
{
return xdp_l3fwd_flags(ctx, 0);
}
SEC("xdp_l3fwd_direct")
int xdp_l3fwd_direct_prog(struct xdp_md *ctx)
{
return xdp_l3fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
}
char _license[] SEC("license") = "GPL";