281 lines
7.5 KiB
C
281 lines
7.5 KiB
C
#define KBUILD_MODNAME "xdp_l3fwd"
|
|
#include <linux/bpf.h>
|
|
#include <linux/in.h>
|
|
#include <linux/if_ether.h>
|
|
#include <linux/if_packet.h>
|
|
#include <linux/if_vlan.h>
|
|
#include <linux/ip.h>
|
|
#include <linux/ipv6.h>
|
|
#include <linux/udp.h>
|
|
#include <linux/tcp.h>
|
|
#include <linux/types.h>
|
|
|
|
#include <bpf/bpf_helpers.h>
|
|
#include <bpf/bpf_endian.h>
|
|
|
|
#ifndef AF_INET
|
|
#define AF_INET 2
|
|
#endif
|
|
|
|
#ifndef AF_INET6
|
|
#define AF_INET6 10
|
|
#endif
|
|
|
|
#define IPV6_FLOWINFO_MASK bpf_htonl(0x0FFFFFFF)
|
|
#define VLAN_MAX_DEPTH 2
|
|
#define IPV6_EXT_MAX_CHAIN 6
|
|
|
|
struct vlan_hdr {
|
|
__be16 h_vlan_TCI;
|
|
__be16 h_vlan_encapsulated_proto;
|
|
};
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_DEVMAP);
|
|
__type(key, __u32);
|
|
__type(value, __u32);
|
|
__uint(max_entries, 512);
|
|
} xdp_l3fwd_ports SEC(".maps");
|
|
|
|
struct flow_key {
|
|
__u8 proto;
|
|
__u8 pad[3];
|
|
__u16 vlan_id;
|
|
__u16 pad2;
|
|
|
|
union {
|
|
__u32 ipv4_src;
|
|
__u8 ipv6_src[16];
|
|
};
|
|
union {
|
|
__u32 ipv4_dst;
|
|
__u8 ipv6_dst[16];
|
|
};
|
|
|
|
__u16 sport;
|
|
__u16 dport;
|
|
};
|
|
|
|
struct flow_stats {
|
|
__u64 packets;
|
|
__u64 bytes;
|
|
};
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
|
|
__uint(key_size, sizeof(struct flow_key));
|
|
__uint(value_size, sizeof(struct flow_stats));
|
|
__uint(max_entries, 65536);
|
|
} xdp_flow_stats SEC(".maps");
|
|
|
|
static __always_inline int ip_decrease_ttl(struct iphdr *iph)
|
|
{
|
|
__u32 check = (__u32)iph->check;
|
|
check += (__u32)bpf_htons(0x0100);
|
|
iph->check = (__sum16)(check + (check >= 0xFFFF));
|
|
return --iph->ttl;
|
|
}
|
|
|
|
static __always_inline void record_stats(struct flow_key *key, __u64 bytes)
|
|
{
|
|
struct flow_stats *stats = bpf_map_lookup_elem(&xdp_flow_stats, key);
|
|
if (stats) {
|
|
stats->packets++;
|
|
stats->bytes += bytes;
|
|
} else {
|
|
struct flow_stats new_stats = {
|
|
.packets = 1,
|
|
.bytes = bytes,
|
|
};
|
|
bpf_map_update_elem(&xdp_flow_stats, key, &new_stats, BPF_ANY);
|
|
}
|
|
}
|
|
|
|
static __always_inline int parse_vlan(void *data, void *data_end, __u64 *nh_off, __u16 *h_proto, __u16 *vlan_id)
|
|
{
|
|
struct vlan_hdr *vh;
|
|
#pragma unroll
|
|
for (int i = 0; i < VLAN_MAX_DEPTH; i++) {
|
|
if (*h_proto != bpf_htons(ETH_P_8021Q) && *h_proto != bpf_htons(ETH_P_8021AD))
|
|
break;
|
|
|
|
vh = (void *)((char *)data + *nh_off);
|
|
if ((void *)(vh + 1) > data_end)
|
|
return -1;
|
|
|
|
if (i == 0)
|
|
*vlan_id = bpf_ntohs(vh->h_vlan_TCI) & 0x0FFF;
|
|
|
|
*nh_off += sizeof(*vh);
|
|
*h_proto = vh->h_vlan_encapsulated_proto;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static __always_inline int skip_ip6hdrext(void *data, void *data_end, __u64 *nh_off, __u8 next)
|
|
{
|
|
struct ipv6_opt_hdr *hdr;
|
|
#pragma unroll
|
|
for (int i = 0; i < IPV6_EXT_MAX_CHAIN; i++) {
|
|
hdr = (void *)((char *)data + *nh_off);
|
|
if ((void *)(hdr + 1) > data_end)
|
|
return -1;
|
|
|
|
switch (next) {
|
|
case IPPROTO_HOPOPTS:
|
|
case IPPROTO_DSTOPTS:
|
|
case IPPROTO_ROUTING:
|
|
case IPPROTO_MH:
|
|
*nh_off += (hdr->hdrlen + 1) * 8;
|
|
next = hdr->nexthdr;
|
|
break;
|
|
case IPPROTO_AH:
|
|
*nh_off += (hdr->hdrlen + 2) * 4;
|
|
next = hdr->nexthdr;
|
|
break;
|
|
case IPPROTO_FRAGMENT:
|
|
*nh_off += 8;
|
|
next = hdr->nexthdr;
|
|
break;
|
|
default:
|
|
return next;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static __always_inline int xdp_l3fwd_flags(struct xdp_md *ctx, __u32 flags)
|
|
{
|
|
void *data_end = (void *)(long)ctx->data_end;
|
|
void *data = (void *)(long)ctx->data;
|
|
|
|
struct ethhdr *eth = data;
|
|
__u64 nh_off = sizeof(*eth);
|
|
if ((void *)((char *)data + nh_off) > data_end)
|
|
return XDP_DROP;
|
|
|
|
struct bpf_fib_lookup fib_params = {};
|
|
__u16 h_proto = eth->h_proto;
|
|
__u16 vlan_id = 0;
|
|
|
|
if (parse_vlan(data, data_end, &nh_off, &h_proto, &vlan_id) < 0)
|
|
return XDP_DROP;
|
|
|
|
struct flow_key key = {};
|
|
key.vlan_id = vlan_id;
|
|
__u64 bytes = (char *)data_end - (char *)data;
|
|
|
|
if (h_proto == bpf_htons(ETH_P_IP)) {
|
|
struct iphdr *iph = (void *)((char *)data + nh_off);
|
|
if ((void *)(iph + 1) > data_end)
|
|
return XDP_DROP;
|
|
|
|
if (iph->ttl <= 1)
|
|
return XDP_PASS;
|
|
|
|
key.proto = iph->protocol;
|
|
key.ipv4_src = iph->saddr;
|
|
key.ipv4_dst = iph->daddr;
|
|
|
|
__u8 ihl = iph->ihl;
|
|
if (ihl < 5)
|
|
return XDP_DROP;
|
|
|
|
__u64 l4_off = nh_off + (ihl * 4);
|
|
|
|
/* Parse L4 ports - check exactly 4 bytes (sport + dport) */
|
|
void *l4_hdr = (void *)((char *)data + l4_off);
|
|
if ((void *)((char *)l4_hdr + 4) <= data_end) {
|
|
if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP) {
|
|
__u16 *ports = l4_hdr;
|
|
key.sport = ports[0];
|
|
key.dport = ports[1];
|
|
fib_params.sport = ports[0];
|
|
fib_params.dport = ports[1];
|
|
}
|
|
}
|
|
|
|
fib_params.family = AF_INET;
|
|
fib_params.tos = iph->tos;
|
|
fib_params.l4_protocol = iph->protocol;
|
|
fib_params.tot_len = bpf_ntohs(iph->tot_len);
|
|
fib_params.ipv4_src = iph->saddr;
|
|
fib_params.ipv4_dst = iph->daddr;
|
|
|
|
} else if (h_proto == bpf_htons(ETH_P_IPV6)) {
|
|
struct ipv6hdr *ip6h = (void *)((char *)data + nh_off);
|
|
if ((void *)(ip6h + 1) > data_end)
|
|
return XDP_DROP;
|
|
|
|
if (ip6h->hop_limit <= 1)
|
|
return XDP_PASS;
|
|
|
|
__builtin_memcpy(key.ipv6_src, &ip6h->saddr, 16);
|
|
__builtin_memcpy(key.ipv6_dst, &ip6h->daddr, 16);
|
|
|
|
__u64 l4_off = nh_off + sizeof(*ip6h);
|
|
int l4_proto = skip_ip6hdrext(data, data_end, &l4_off, ip6h->nexthdr);
|
|
if (l4_proto < 0)
|
|
l4_proto = ip6h->nexthdr;
|
|
|
|
key.proto = l4_proto;
|
|
|
|
/* Parse L4 ports - check exactly 4 bytes */
|
|
void *l4_hdr = (void *)((char *)data + l4_off);
|
|
if ((void *)((char *)l4_hdr + 4) <= data_end) {
|
|
if (l4_proto == IPPROTO_TCP || l4_proto == IPPROTO_UDP) {
|
|
__u16 *ports = l4_hdr;
|
|
key.sport = ports[0];
|
|
key.dport = ports[1];
|
|
fib_params.sport = ports[0];
|
|
fib_params.dport = ports[1];
|
|
}
|
|
}
|
|
|
|
fib_params.family = AF_INET6;
|
|
__be32 flow = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK;
|
|
fib_params.flowinfo = flow;
|
|
fib_params.l4_protocol = l4_proto;
|
|
fib_params.tot_len = bpf_ntohs(ip6h->payload_len);
|
|
__builtin_memcpy(fib_params.ipv6_src, &ip6h->saddr, 16);
|
|
__builtin_memcpy(fib_params.ipv6_dst, &ip6h->daddr, 16);
|
|
} else {
|
|
return XDP_PASS;
|
|
}
|
|
|
|
fib_params.ifindex = ctx->ingress_ifindex;
|
|
|
|
int rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
|
|
if (rc == 0) {
|
|
record_stats(&key, bytes);
|
|
|
|
if (h_proto == bpf_htons(ETH_P_IP)) {
|
|
struct iphdr *iph = (void *)((char *)data + nh_off);
|
|
ip_decrease_ttl(iph);
|
|
} else if (h_proto == bpf_htons(ETH_P_IPV6)) {
|
|
struct ipv6hdr *ip6h = (void *)((char *)data + nh_off);
|
|
ip6h->hop_limit--;
|
|
}
|
|
|
|
__builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
|
|
__builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
|
|
|
|
return bpf_redirect_map(&xdp_l3fwd_ports, fib_params.ifindex, XDP_PASS);
|
|
}
|
|
|
|
return XDP_PASS;
|
|
}
|
|
|
|
SEC("xdp")
|
|
int xdp_l3fwd_prog(struct xdp_md *ctx)
|
|
{
|
|
return xdp_l3fwd_flags(ctx, 0);
|
|
}
|
|
|
|
SEC("xdp")
|
|
int xdp_l3fwd_direct_prog(struct xdp_md *ctx)
|
|
{
|
|
return xdp_l3fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
|
|
}
|
|
|
|
char _license[] SEC("license") = "GPL"; |