我就两周以来遇到的错误寻求您的帮助。
我正在使用 BCC 在 XDP 中实现 NAT64,节点之间的交换有效,我的发送方节点发送 ping,接收方节点接收 ping,在两个节点之间有另一个名为
router
的节点,其中附加了XDP 程序。在一个接口中,它从 ipv6 转换为 ipv4,而另一接口则将 ipv4 转换为 ipv6。
节点是命名空间。
问题发生在目标节点收到数据包时,我从 tcpdump 看到的 ICMP 校验和是错误的(没有检查 ip 校验和,所以我不知道那个是否正确)。
我尝试了很多不同的方法来计算散布在互联网上的校验和,但它们都不起作用,我开始认为错误可能在其他地方? 所以这两周我一直在检查和重新检查代码,但没有发现任何错误。
这是我的输出
tcpdump -i veth1 -vvv
:
16:10:45.883038 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto ICMP (1), length 84)
192.168.9.1 > archiz-linux: ICMP echo request, id 47244, seq 1, length 64 (wrong icmp cksum 3f72 (->6fc9)!)
我尝试使用以下方法禁用校验和卸载:
ethtool -K veth rx off tx off
对于每个命名空间中的每个 veth,但我有相同的先前输出。也许它不起作用,因为这个方法没有像post所说的那样覆盖icmp校验和。
校验和计算功能:
csum_fold_helper()
static inline __u16 csum_fold_helper(__u64 csum)
{
__u32 sum;
sum = (csum >> 16) + (csum & 0xffff);
sum += (sum >> 16);
return ~sum;
}
这是我的实现,为了简单起见,我省略了部分内容:
int xdp_router_func(struct xdp_md *ctx){
void *data = (void *)(long)ctx->data;
void *data_end = (void *)(long)ctx->data_end;
struct bpf_fib_lookup fib_params = {0};
struct ethhdr eth_cpy;
struct ethhdr *eth = data;
struct iphdr *iph = {0};
__u64 nh_off = sizeof(*eth);
if (data + nh_off > data_end)
return XDP_DROP;
if (eth->h_proto == bpf_htons(ETH_P_IPV6))
{
memcpy(ð_cpy, eth, sizeof(eth_cpy));
struct ipv6hdr *ip6h = data + nh_off;
if (ip6h + 1 > data_end)
return XDP_DROP;
...
struct iphdr dst_hdr = {
.version = 4,
.ihl = 5,
.frag_off = bpf_htons(1 << 14),
};
__u32 assigned_ipv4 = 0;
// search inside the natting_table the ipv6_addr associated to the ipv4
...
dst_hdr.saddr = bpf_htonl((__be32)assigned_ipv4);
dst_hdr.daddr = ip6h->daddr.s6_addr32[3];
dst_hdr.protocol = ip6h->nexthdr;
dst_hdr.ttl = ip6h->hop_limit;
dst_hdr.tos = ip6h->priority << 4 | (ip6h->flow_lbl[0] >> 4);
dst_hdr.tot_len = bpf_htons(bpf_ntohs(ip6h->payload_len) + sizeof(dst_hdr));
// check if the packet is a icmpv6
if (dst_hdr.protocol == IPPROTO_ICMPV6)
{
struct icmp6hdr *icmp6h = (void *)ip6h + sizeof(*ip6h);
if (icmp6h + 1 > data_end)
return XDP_DROP;
// ready to parse the icmpv6 header in icmp
struct icmphdr tmp_icmp;
struct icmphdr *icmp;
// set the right type to icmp, id field and sequence field
if (write_icmp(&tmp_icmp, icmp6h) == -1)
{
bpf_trace_printk("[ERR]: error during icpmv6 parse in icmp");
return XDP_DROP;
}
if (bpf_xdp_adjust_head(ctx, (int)sizeof(*icmp6h) - (int)sizeof(tmp_icmp)))
return XDP_DROP;
// after the adjust head I have to reassign the pointers
data = (void *)(long)ctx->data;
data_end = (void *)(long)ctx->data_end;
icmp = (void *)(data + sizeof(struct ethhdr) + sizeof(struct ipv6hdr));
if (icmp + 1 > data_end)
return XDP_DROP;
*icmp = tmp_icmp;
// set the checksum
icmp->checksum = 0x0000;
icmp->checksum = csum_fold_helper(bpf_csum_diff((__be32 *)icmp, 0, (__be32 *)icmp, sizeof(icmp), 0));
dst_hdr.protocol = IPPROTO_ICMP;
} // icmpv6
dst_hdr.check = csum_fold_helper(bpf_csum_diff((__be32 *)&dst_hdr, 0, (__be32 *)&dst_hdr, sizeof(dst_hdr), 0));
if (bpf_xdp_adjust_head(ctx, (int)sizeof(struct ipv6hdr) - (int)sizeof(struct iphdr)))
return XDP_DROP;
// after the adjust head I have to reassign the pointers
eth = (void *)(long)ctx->data;
data = (void *)(long)ctx->data;
data_end = (void *)(long)ctx->data_end;
if (eth + 1 > data_end)
return XDP_DROP;
memcpy(eth, ð_cpy, sizeof(*eth));
eth->h_proto = bpf_htons(ETH_P_IP);
iph = (void *)(data + sizeof(*eth));
if (iph + 1 > data_end)
{
bpf_trace_printk("iph out of boundary");
return XDP_DROP;
}
*iph = dst_hdr;
// start forwarding
// setting the fib_params
fib_params.family = AF_INET;
fib_params.tos = iph->tos;
fib_params.tot_len = bpf_ntohs(iph->tot_len);
fib_params.ipv4_dst = iph->daddr;
fib_params.ipv4_src = iph->saddr;
fib_params.ifindex = ctx->ingress_ifindex;
fib_params.sport = 0;
fib_params.dport = 0;
} // end ipv6
// forwarding
int rc;
rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), 0);
switch (rc)
{
case BPF_FIB_LKUP_RET_SUCCESS: /* lookup successful */
memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
int action;
action = bpf_redirect(fib_params.ifindex, 0);
return action;
// other cases
...
}
return XDP_PASS;
}
Cilium 实现了这样的校验和计算,所以我会使用它(具有适当的归属和 Copyleft 许可证):
csum = icmp_wsum_accumulate(icmp_payload, icmp_payload_end, ctx_full_len(ctx));
csum += csum_diff(icmp, 0, icmp, sizeof(struct icmphdr), 0);
icmp->checksum = csum_fold(csum);
static __always_inline __sum16 csum_fold(__wsum csum)
{
csum = (csum & 0xffff) + (csum >> 16);
csum = (csum & 0xffff) + (csum >> 16);
return (__sum16)~csum;
}
icmp_wsum_accumulate
在来源中定义(我认为需要归属和 GPL 兼容许可)。 icmp_payload
和 icmp_payload_end
应指向 ICMP 消息负载(通常是原始 IP 数据包)的开始和结束。