#define KBUILD_MODNAME "xdp_l3fwd" #include #include #include #include #include #include #include #include #include #include #define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) /* Forwarding ports */ struct bpf_map_def SEC("maps") xdp_l3fwd_ports = { .type = BPF_MAP_TYPE_DEVMAP, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 512, }; /* Stats key — identifies a connection or flow */ struct flow_key { __u8 proto; __u8 pad[3]; /* alignment */ union { __u32 ipv4_src; __u8 ipv6_src[16]; }; union { __u32 ipv4_dst; __u8 ipv6_dst[16]; }; __u16 sport; __u16 dport; }; /* Stats value — counts packets and bytes */ struct flow_stats { __u64 packets; __u64 bytes; }; /* Hash map to track stats per flow */ struct bpf_map_def SEC("maps") xdp_flow_stats = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(struct flow_key), .value_size = sizeof(struct flow_stats), .max_entries = 65536, }; /* Decrease IPv4 TTL helper */ static __always_inline int ip_decrease_ttl(struct iphdr *iph) { u32 check = (__force u32)iph->check; check += (__force u32)htons(0x0100); iph->check = (__force __sum16)(check + (check >= 0xFFFF)); return --iph->ttl; } /* Record stats in the xdp_flow_stats map */ static __always_inline void record_stats(struct xdp_md *ctx, struct flow_key *key, __u64 bytes) { struct flow_stats *stats; stats = bpf_map_lookup_elem(&xdp_flow_stats, key); if (stats) { __sync_fetch_and_add(&stats->packets, 1); __sync_fetch_and_add(&stats->bytes, bytes); } else { struct flow_stats new_stats = { .packets = 1, .bytes = bytes, }; bpf_map_update_elem(&xdp_flow_stats, key, &new_stats, BPF_ANY); } } static __always_inline int xdp_l3fwd_flags(struct xdp_md *ctx, u32 flags) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; struct bpf_fib_lookup fib_params; struct ethhdr *eth = data; struct ipv6hdr *ip6h; struct iphdr *iph; u16 h_proto; u64 nh_off; int rc; nh_off = sizeof(*eth); if (data + nh_off > data_end) return XDP_DROP; __builtin_memset(&fib_params, 0, sizeof(fib_params)); h_proto = eth->h_proto; struct flow_key key = {}; __u64 bytes = data_end - data; if (h_proto == htons(ETH_P_IP)) { iph = data + nh_off; if ((void *)(iph + 1) > data_end) return XDP_DROP; key.proto = iph->protocol; key.ipv4_src = iph->saddr; key.ipv4_dst = iph->daddr; void *l4_hdr = (void *)iph + (iph->ihl * 4); if (l4_hdr + sizeof(struct udphdr) <= data_end) { if (iph->protocol == IPPROTO_TCP) { struct tcphdr *tcph = l4_hdr; key.sport = tcph->source; key.dport = tcph->dest; } else if (iph->protocol == IPPROTO_UDP) { struct udphdr *udph = l4_hdr; key.sport = udph->source; key.dport = udph->dest; } } /* Record stats before forwarding */ record_stats(ctx, &key, bytes); if (iph->ttl <= 1) return XDP_PASS; fib_params.family = AF_INET; fib_params.tos = iph->tos; fib_params.l4_protocol = iph->protocol; fib_params.tot_len = ntohs(iph->tot_len); fib_params.ipv4_src = iph->saddr; fib_params.ipv4_dst = iph->daddr; } else if (h_proto == htons(ETH_P_IPV6)) { ip6h = data + nh_off; if ((void *)(ip6h + 1) > data_end) return XDP_DROP; key.proto = ip6h->nexthdr; __builtin_memcpy(key.ipv6_src, &ip6h->saddr, 16); __builtin_memcpy(key.ipv6_dst, &ip6h->daddr, 16); void *l4_hdr = (void *)(ip6h + 1); if (l4_hdr + sizeof(struct udphdr) <= data_end) { if (ip6h->nexthdr == IPPROTO_TCP) { struct tcphdr *tcph = l4_hdr; key.sport = tcph->source; key.dport = tcph->dest; } else if (ip6h->nexthdr == IPPROTO_UDP) { struct udphdr *udph = l4_hdr; key.sport = udph->source; key.dport = udph->dest; } } /* Record stats before forwarding */ record_stats(ctx, &key, bytes); if (ip6h->hop_limit <= 1) return XDP_PASS; fib_params.family = AF_INET6; fib_params.flowinfo = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK; fib_params.l4_protocol = ip6h->nexthdr; fib_params.tot_len = ntohs(ip6h->payload_len); __builtin_memcpy(fib_params.ipv6_src, &ip6h->saddr, 16); __builtin_memcpy(fib_params.ipv6_dst, &ip6h->daddr, 16); } else { return XDP_PASS; } fib_params.ifindex = ctx->ingress_ifindex; rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags); if (rc == BPF_FIB_LKUP_RET_SUCCESS) { if (!bpf_map_lookup_elem(&xdp_l3fwd_ports, &fib_params.ifindex)) return XDP_PASS; if (h_proto == htons(ETH_P_IP)) ip_decrease_ttl(iph); else if (h_proto == htons(ETH_P_IPV6)) ip6h->hop_limit--; __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN); return bpf_redirect_map(&xdp_l3fwd_ports, fib_params.ifindex, 0); } return XDP_PASS; } SEC("xdp_l3fwd") int xdp_l3fwd_prog(struct xdp_md *ctx) { return xdp_l3fwd_flags(ctx, 0); } SEC("xdp_l3fwd_direct") int xdp_l3fwd_direct_prog(struct xdp_md *ctx) { return xdp_l3fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT); } char _license[] SEC("license") = "GPL";