diff --git a/main.c b/main.c index 1a17f08..252c79c 100644 --- a/main.c +++ b/main.c @@ -25,479 +25,287 @@ #define VLAN_MAX_DEPTH 2 #define IPV6_EXT_MAX_CHAIN 6 -struct vlan_hdr { - __be16 h_vlan_TCI; - __be16 h_vlan_encapsulated_proto; -}; - -/* Auto-learned VLAN info */ -struct vlan_learning_entry { - __u16 vlan_id; - __u16 confidence; - __u32 last_seen; -}; - struct { - __uint(type, BPF_MAP_TYPE_HASH); - __type(key, __u32); - __type(value, struct vlan_learning_entry); - __uint(max_entries, 512); -} xdp_vlan_learning SEC(".maps"); - -struct vlan_parent_info { - __u32 parent_ifindex; - __u16 vlan_id; - __u16 pad; -}; - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __type(key, __u32); - __type(value, struct vlan_parent_info); - __uint(max_entries, 512); -} xdp_vlan_parents SEC(".maps"); + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(max_entries, 512); +} xdp_l3fwd_ports SEC(".maps"); struct flow_key { - __u8 proto; - __u8 pad[3]; - __u16 vlan_id; - __u16 pad2; + __u8 proto; + __u8 pad[3]; + __u16 vlan_id; + __u16 pad2; - union { - __u32 ipv4_src; - __u8 ipv6_src[16]; - }; - union { - __u32 ipv4_dst; - __u8 ipv6_dst[16]; - }; + union { + __u32 ipv4_src; + __u8 ipv6_src[16]; + }; - __u16 sport; - __u16 dport; + union { + __u32 ipv4_dst; + __u8 ipv6_dst[16]; + }; + + __u16 sport; + __u16 dport; }; struct flow_stats { - __u64 packets; - __u64 bytes; + __u64 packets; + __u64 bytes; }; struct { - __uint(type, BPF_MAP_TYPE_PERCPU_HASH); - __uint(key_size, sizeof(struct flow_key)); - __uint(value_size, sizeof(struct flow_stats)); - __uint(max_entries, 65536); + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(key_size, sizeof(struct flow_key)); + __uint(value_size, sizeof(struct flow_stats)); + __uint(max_entries, 65536); } xdp_flow_stats SEC(".maps"); +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + static __always_inline int ip_decrease_ttl(struct iphdr *iph) { - __u32 check = (__u32)iph->check; - check += (__u32)bpf_htons(0x0100); - iph->check = (__sum16)(check + (check >= 0xFFFF)); - return --iph->ttl; + __u32 check = (__u32)iph->check; + check += (__u32)bpf_htons(0x0100); + iph->check = (__sum16)(check + (check >= 0xFFFF)); + return --iph->ttl; } -static __always_inline void record_stats(struct flow_key *key, __u64 bytes) +static __always_inline void record_stats(struct xdp_md *ctx, struct flow_key *key, __u64 bytes) { - struct flow_stats *stats = bpf_map_lookup_elem(&xdp_flow_stats, key); - if (stats) { - stats->packets++; - stats->bytes += bytes; - } else { - struct flow_stats new_stats = { - .packets = 1, - .bytes = bytes, - }; - bpf_map_update_elem(&xdp_flow_stats, key, &new_stats, BPF_ANY); - } -} + struct flow_stats *stats; -static __always_inline void learn_vlan(struct xdp_md *ctx, __u16 vlan_id) -{ - __u32 ifindex = ctx->ingress_ifindex; - struct vlan_learning_entry *entry = bpf_map_lookup_elem(&xdp_vlan_learning, &ifindex); - - if (entry) { - if (vlan_id > 0) { - if (entry->vlan_id == vlan_id) { - if (entry->confidence < 65535) - entry->confidence++; - } else if (entry->confidence > 0) { - entry->confidence--; - if (entry->confidence == 0) { - entry->vlan_id = vlan_id; - entry->confidence = 1; - } - } - } - } else if (vlan_id > 0) { - struct vlan_learning_entry new_entry = { - .vlan_id = vlan_id, - .confidence = 1, - .last_seen = 0, - }; - bpf_map_update_elem(&xdp_vlan_learning, &ifindex, &new_entry, BPF_ANY); - } -} - -static __always_inline __u16 get_interface_vlan(struct xdp_md *ctx, __u32 ifindex) -{ - struct vlan_parent_info *parent_info = bpf_map_lookup_elem(&xdp_vlan_parents, &ifindex); - if (parent_info && parent_info->vlan_id > 0) { - return parent_info->vlan_id; - } - - struct vlan_learning_entry *learned = bpf_map_lookup_elem(&xdp_vlan_learning, &ifindex); - if (learned && learned->confidence > 5) { - return learned->vlan_id; - } - - __u32 ingress_idx = ctx->ingress_ifindex; - if (ingress_idx != ifindex) { - struct vlan_learning_entry *ingress_learned = bpf_map_lookup_elem(&xdp_vlan_learning, &ingress_idx); - if (ingress_learned && ingress_learned->confidence > 10) { - struct vlan_learning_entry *egress_learned = bpf_map_lookup_elem(&xdp_vlan_learning, &ifindex); - if (!egress_learned || egress_learned->confidence < 3) { - return 0; - } - } - } - - return 0; + stats = bpf_map_lookup_elem(&xdp_flow_stats, key); + if (stats) { + stats->packets++; + stats->bytes += bytes; + } else { + struct flow_stats new_stats = { + .packets = 1, + .bytes = bytes, + }; + bpf_map_update_elem(&xdp_flow_stats, key, &new_stats, BPF_ANY); + } } static __always_inline int parse_vlan(void *data, void *data_end, __u64 *nh_off, __u16 *h_proto, __u16 *vlan_id) { - struct vlan_hdr *vh; -#pragma unroll - for (int i = 0; i < VLAN_MAX_DEPTH; i++) { - if (*h_proto != bpf_htons(ETH_P_8021Q) && *h_proto != bpf_htons(ETH_P_8021AD)) - break; + struct vlan_hdr *vhdr; + int i, vlan_count = 0; - vh = (void *)((char *)data + *nh_off); - if ((void *)(vh + 1) > data_end) - return -1; + #pragma unroll + for (i = 0; i < VLAN_MAX_DEPTH; i++) { + if (*h_proto != bpf_htons(ETH_P_8021Q) && *h_proto != bpf_htons(ETH_P_8021AD)) + break; - if (i == 0) - *vlan_id = bpf_ntohs(vh->h_vlan_TCI) & 0x0FFF; + vhdr = data + *nh_off; + if ((void *)(vhdr + 1) > data_end) + return -1; - *nh_off += sizeof(*vh); - *h_proto = vh->h_vlan_encapsulated_proto; - } - return 0; + if (i == 0) + *vlan_id = bpf_ntohs(vhdr->h_vlan_TCI) & 0x0FFF; + + *nh_off += sizeof(*vhdr); + *h_proto = vhdr->h_vlan_encapsulated_proto; + vlan_count++; + } + + return vlan_count; } -static __always_inline int skip_ip6hdrext(void *data, void *data_end, __u64 *nh_off, __u8 next) +static __always_inline int skip_ip6hdrext(void *data, void *data_end, __u64 *nh_off, __u8 next_hdr_type) { - struct ipv6_opt_hdr *hdr; -#pragma unroll - for (int i = 0; i < IPV6_EXT_MAX_CHAIN; i++) { - hdr = (void *)((char *)data + *nh_off); - if ((void *)(hdr + 1) > data_end) - return -1; + struct ipv6_opt_hdr { + __u8 nexthdr; + __u8 hdrlen; + } *hdr; + int i; - switch (next) { - case IPPROTO_HOPOPTS: - case IPPROTO_DSTOPTS: - case IPPROTO_ROUTING: - case IPPROTO_MH: - *nh_off += (hdr->hdrlen + 1) * 8; - next = hdr->nexthdr; - break; - case IPPROTO_AH: - *nh_off += (hdr->hdrlen + 2) * 4; - next = hdr->nexthdr; - break; - case IPPROTO_FRAGMENT: - *nh_off += 8; - next = hdr->nexthdr; - break; - default: - return next; - } - } - return -1; -} + #pragma unroll + for (i = 0; i < IPV6_EXT_MAX_CHAIN; i++) { + hdr = data + *nh_off; + + if ((void *)(hdr + 1) > data_end) + return -1; -/* Insert VLAN tag using head adjustment */ -static __always_inline int insert_vlan_tag(struct xdp_md *ctx, __u16 vlan_id) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - - struct ethhdr *old_eth = data; - - if ((void *)(old_eth + 1) > data_end) - return -1; - - struct ethhdr orig_eth; - __builtin_memcpy(&orig_eth, old_eth, sizeof(orig_eth)); - - /* Expand headroom */ - if (bpf_xdp_adjust_head(ctx, -(int)sizeof(struct vlan_hdr))) - return -1; - - /* Re-read pointers after head adjustment */ - data = (void *)(long)ctx->data; - data_end = (void *)(long)ctx->data_end; - - struct ethhdr *new_eth = data; - struct vlan_hdr *vlan = (struct vlan_hdr *)(new_eth + 1); - - if ((void *)(vlan + 1) > data_end) - return -1; - - /* Copy ethernet header to new position */ - __builtin_memcpy(new_eth->h_dest, orig_eth.h_dest, ETH_ALEN); - __builtin_memcpy(new_eth->h_source, orig_eth.h_source, ETH_ALEN); - - /* Set up VLAN header */ - vlan->h_vlan_TCI = bpf_htons(vlan_id & 0x0FFF); - vlan->h_vlan_encapsulated_proto = orig_eth.h_proto; - - /* Update ethernet proto to VLAN */ - new_eth->h_proto = bpf_htons(ETH_P_8021Q); - - return 0; -} + switch (next_hdr_type) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + case IPPROTO_MH: + *nh_off += (hdr->hdrlen + 1) * 8; + next_hdr_type = hdr->nexthdr; + break; + case IPPROTO_AH: + *nh_off += (hdr->hdrlen + 2) * 4; + next_hdr_type = hdr->nexthdr; + break; + case IPPROTO_FRAGMENT: + *nh_off += 8; + next_hdr_type = hdr->nexthdr; + break; + default: + return next_hdr_type; + } + } -/* Remove VLAN tag */ -static __always_inline int remove_vlan_tag(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - - struct ethhdr *eth = data; - struct vlan_hdr *vlan = (struct vlan_hdr *)(eth + 1); - - if ((void *)(vlan + 1) > data_end) - return -1; - - __be16 encap_proto = vlan->h_vlan_encapsulated_proto; - - struct ethhdr tmp_eth; - __builtin_memcpy(&tmp_eth, eth, sizeof(tmp_eth)); - - /* Adjust head to remove VLAN header */ - if (bpf_xdp_adjust_head(ctx, (int)sizeof(struct vlan_hdr))) - return -1; - - /* Re-read pointers after head adjustment */ - data = (void *)(long)ctx->data; - data_end = (void *)(long)ctx->data_end; - eth = data; - - if ((void *)(eth + 1) > data_end) - return -1; - - __builtin_memcpy(eth->h_dest, tmp_eth.h_dest, ETH_ALEN); - __builtin_memcpy(eth->h_source, tmp_eth.h_source, ETH_ALEN); - eth->h_proto = encap_proto; - - return 0; + return -1; } static __always_inline int xdp_l3fwd_flags(struct xdp_md *ctx, __u32 flags) { - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct bpf_fib_lookup fib_params; + struct ethhdr *eth = data; + struct ipv6hdr *ip6h; + struct iphdr *iph; + __u16 h_proto; + __u64 nh_off; + int rc, vlan_count; + __u16 vlan_id = 0; - struct ethhdr *eth = data; - __u64 nh_off = sizeof(*eth); - if ((void *)((char *)data + nh_off) > data_end) - return XDP_DROP; + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; - struct bpf_fib_lookup fib_params = {}; - __u16 h_proto = eth->h_proto; - __u16 vlan_id = 0; - __u16 orig_vlan_id = 0; - int had_vlan = 0; + __builtin_memset(&fib_params, 0, sizeof(fib_params)); + h_proto = eth->h_proto; - if (h_proto == bpf_htons(ETH_P_8021Q) || h_proto == bpf_htons(ETH_P_8021AD)) - had_vlan = 1; + vlan_count = parse_vlan(data, data_end, &nh_off, &h_proto, &vlan_id); + if (vlan_count < 0) + return XDP_DROP; - if (parse_vlan(data, data_end, &nh_off, &h_proto, &vlan_id) < 0) - return XDP_DROP; - - orig_vlan_id = vlan_id; - - if (vlan_id > 0) - learn_vlan(ctx, vlan_id); + struct flow_key key = {}; + key.vlan_id = vlan_id; + __u64 bytes = data_end - data; - struct flow_key key = {}; - key.vlan_id = vlan_id; - __u64 bytes = (char *)data_end - (char *)data; + if (h_proto == bpf_htons(ETH_P_IP)) { + iph = data + nh_off; + if ((void *)(iph + 1) > data_end) + return XDP_DROP; - if (h_proto == bpf_htons(ETH_P_IP)) { - struct iphdr *iph = (void *)((char *)data + nh_off); - if ((void *)(iph + 1) > data_end) - return XDP_DROP; + if (iph->ttl <= 1) + return XDP_PASS; - if (iph->ttl <= 1) - return XDP_PASS; + key.proto = iph->protocol; + key.ipv4_src = iph->saddr; + key.ipv4_dst = iph->daddr; - key.proto = iph->protocol; - key.ipv4_src = iph->saddr; - key.ipv4_dst = iph->daddr; + /* Calculate L4 offset - use pointer arithmetic from iph */ + __u8 ihl = iph->ihl; + if (ihl < 5) + return XDP_DROP; - __u8 ihl = iph->ihl; - if (ihl < 5) - return XDP_DROP; - - __u64 l4_off = nh_off + (ihl * 4); - - void *l4_hdr = (void *)((char *)data + l4_off); - if ((void *)((char *)l4_hdr + 4) <= data_end) { - if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP) { - __u16 *ports = l4_hdr; - key.sport = ports[0]; - key.dport = ports[1]; - fib_params.sport = ports[0]; - fib_params.dport = ports[1]; - } - } + void *l4ptr = (void *)iph + (ihl * 4); + + if (iph->protocol == IPPROTO_TCP) { + struct tcphdr *tcph = l4ptr; + if ((void *)(tcph + 1) > data_end) + goto skip_v4_ports; + key.sport = tcph->source; + key.dport = tcph->dest; + } else if (iph->protocol == IPPROTO_UDP) { + struct udphdr *udph = l4ptr; + if ((void *)(udph + 1) > data_end) + goto skip_v4_ports; + key.sport = udph->source; + key.dport = udph->dest; + } - fib_params.family = AF_INET; - fib_params.tos = iph->tos; - fib_params.l4_protocol = iph->protocol; - fib_params.tot_len = bpf_ntohs(iph->tot_len); - fib_params.ipv4_src = iph->saddr; - fib_params.ipv4_dst = iph->daddr; +skip_v4_ports: + fib_params.family = AF_INET; + fib_params.tos = iph->tos; + fib_params.l4_protocol = iph->protocol; + fib_params.tot_len = bpf_ntohs(iph->tot_len); + fib_params.ipv4_src = iph->saddr; + fib_params.ipv4_dst = iph->daddr; - } else if (h_proto == bpf_htons(ETH_P_IPV6)) { - struct ipv6hdr *ip6h = (void *)((char *)data + nh_off); - if ((void *)(ip6h + 1) > data_end) - return XDP_DROP; + } else if (h_proto == bpf_htons(ETH_P_IPV6)) { + ip6h = data + nh_off; + if ((void *)(ip6h + 1) > data_end) + return XDP_DROP; - if (ip6h->hop_limit <= 1) - return XDP_PASS; + if (ip6h->hop_limit <= 1) + return XDP_PASS; - __builtin_memcpy(key.ipv6_src, &ip6h->saddr, 16); - __builtin_memcpy(key.ipv6_dst, &ip6h->daddr, 16); + __builtin_memcpy(key.ipv6_src, &ip6h->saddr, 16); + __builtin_memcpy(key.ipv6_dst, &ip6h->daddr, 16); - __u64 l4_off = nh_off + sizeof(*ip6h); - int l4_proto = skip_ip6hdrext(data, data_end, &l4_off, ip6h->nexthdr); - if (l4_proto < 0) - l4_proto = ip6h->nexthdr; + __u64 l4_off = nh_off + sizeof(*ip6h); + int l4_proto = skip_ip6hdrext(data, data_end, &l4_off, ip6h->nexthdr); + + if (l4_proto < 0) + l4_proto = ip6h->nexthdr; - key.proto = l4_proto; + key.proto = l4_proto; - void *l4_hdr = (void *)((char *)data + l4_off); - if ((void *)((char *)l4_hdr + 4) <= data_end) { - if (l4_proto == IPPROTO_TCP || l4_proto == IPPROTO_UDP) { - __u16 *ports = l4_hdr; - key.sport = ports[0]; - key.dport = ports[1]; - fib_params.sport = ports[0]; - fib_params.dport = ports[1]; - } - } + void *l4ptr = data + l4_off; - fib_params.family = AF_INET6; - __be32 flow = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK; - fib_params.flowinfo = flow; - fib_params.l4_protocol = l4_proto; - fib_params.tot_len = bpf_ntohs(ip6h->payload_len); - __builtin_memcpy(fib_params.ipv6_src, &ip6h->saddr, 16); - __builtin_memcpy(fib_params.ipv6_dst, &ip6h->daddr, 16); - } else { - return XDP_PASS; - } + if (l4_proto == IPPROTO_TCP) { + struct tcphdr *tcph = l4ptr; + if ((void *)(tcph + 1) > data_end) + goto skip_v6_ports; + key.sport = tcph->source; + key.dport = tcph->dest; + } else if (l4_proto == IPPROTO_UDP) { + struct udphdr *udph = l4ptr; + if ((void *)(udph + 1) > data_end) + goto skip_v6_ports; + key.sport = udph->source; + key.dport = udph->dest; + } - fib_params.ifindex = ctx->ingress_ifindex; +skip_v6_ports: + fib_params.family = AF_INET6; + fib_params.flowinfo = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK; + fib_params.l4_protocol = l4_proto; + fib_params.tot_len = bpf_ntohs(ip6h->payload_len); + __builtin_memcpy(fib_params.ipv6_src, &ip6h->saddr, 16); + __builtin_memcpy(fib_params.ipv6_dst, &ip6h->daddr, 16); + } else { + return XDP_PASS; + } - int rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags); - if (rc == 0) { - record_stats(&key, bytes); + fib_params.ifindex = ctx->ingress_ifindex; + rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags); + + if (rc == BPF_FIB_LKUP_RET_SUCCESS) { + if (!bpf_map_lookup_elem(&xdp_l3fwd_ports, &fib_params.ifindex)) + return XDP_PASS; - __u16 egress_vlan = get_interface_vlan(ctx, fib_params.ifindex); - - if (egress_vlan > 0 && !had_vlan) { - /* Need to add VLAN tag */ - if (insert_vlan_tag(ctx, egress_vlan) < 0) - return XDP_DROP; - - } else if (egress_vlan == 0 && had_vlan) { - /* Need to remove VLAN tag */ - if (remove_vlan_tag(ctx) < 0) { - /* Keep VLAN if removal fails */ - } - - } else if (egress_vlan > 0 && had_vlan && egress_vlan != orig_vlan_id) { - /* Need to change VLAN ID - reload pointers first */ - data = (void *)(long)ctx->data; - data_end = (void *)(long)ctx->data_end; - eth = data; - - if ((void *)(eth + 1) > data_end) - return XDP_DROP; - - if (eth->h_proto == bpf_htons(ETH_P_8021Q) || - eth->h_proto == bpf_htons(ETH_P_8021AD)) { - struct vlan_hdr *vlan = (struct vlan_hdr *)(eth + 1); - if ((void *)(vlan + 1) > data_end) - return XDP_DROP; - - vlan->h_vlan_TCI = bpf_htons(egress_vlan & 0x0FFF); - } - } - - /* CRITICAL: Always reload pointers after FIB lookup to satisfy verifier */ - data = (void *)(long)ctx->data; - data_end = (void *)(long)ctx->data_end; - eth = data; - - /* Re-establish packet bounds for verifier */ - if ((void *)(eth + 1) > data_end) - return XDP_DROP; - - nh_off = sizeof(*eth); - - /* Skip VLAN header if present */ - if (eth->h_proto == bpf_htons(ETH_P_8021Q) || - eth->h_proto == bpf_htons(ETH_P_8021AD)) { - nh_off += sizeof(struct vlan_hdr); - } - - /* Verify nh_off is within bounds */ - if ((void *)((char *)data + nh_off) > data_end) - return XDP_DROP; - - /* Decrease TTL/hop_limit */ - if (h_proto == bpf_htons(ETH_P_IP)) { - struct iphdr *iph = (void *)((char *)data + nh_off); - if ((void *)(iph + 1) > data_end) - return XDP_DROP; - ip_decrease_ttl(iph); - } else if (h_proto == bpf_htons(ETH_P_IPV6)) { - struct ipv6hdr *ip6h = (void *)((char *)data + nh_off); - if ((void *)(ip6h + 1) > data_end) - return XDP_DROP; - ip6h->hop_limit--; - } + record_stats(ctx, &key, bytes); - /* Update MAC addresses - verify eth is still valid */ - if ((void *)(eth + 1) > data_end) - return XDP_DROP; - - __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); - __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN); + if (h_proto == bpf_htons(ETH_P_IP)) + ip_decrease_ttl(iph); + else if (h_proto == bpf_htons(ETH_P_IPV6)) + ip6h->hop_limit--; - return bpf_redirect(fib_params.ifindex, 0); - } + __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); + __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN); + + return bpf_redirect_map(&xdp_l3fwd_ports, fib_params.ifindex, 0); + } - return XDP_PASS; + return XDP_PASS; } SEC("xdp") int xdp_l3fwd_prog(struct xdp_md *ctx) { - return xdp_l3fwd_flags(ctx, 0); + return xdp_l3fwd_flags(ctx, 0); } SEC("xdp") int xdp_l3fwd_direct_prog(struct xdp_md *ctx) { - return xdp_l3fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT); + return xdp_l3fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT); } -char _license[] SEC("license") = "GPL"; \ No newline at end of file +char _license[] SEC("license") = "GPL";