calico

先设置变量:

NS=cali
VETH=v-cali

创建 netns 和 veth, veth 一端塞进去, 射 ip:

ip netns add $NS

ip l add $VETH type veth peer name $VETH-peer

ip l set $VETH-peer up
ip l set $VETH netns $NS
ip netns exec $NS ip l set $VETH up

ip netns exec $NS ip a add 10.2.0.1/32 dev $VETH

然后在宿主机直接路由 ip 到 veth:

ip r add 10.2.0.1/32 dev $VETH-peer

netns 里设置 default gw 到 veth:

ip netns exec $NS ip r add default dev $VETH
[root@bogon ~]# ping 10.2.0.1
PING 10.2.0.1 (10.2.0.1) 56(84) bytes of data.
64 bytes from 10.2.0.1: icmp_seq=1 ttl=64 time=0.107 ms
64 bytes from 10.2.0.1: icmp_seq=2 ttl=64 time=0.024 ms
^C
--- 10.2.0.1 ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 1021ms
rtt min/avg/max/mdev = 0.024/0.065/0.107/0.042 ms
[root@bogon ~]# route -n
Kernel IP routing table
Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
0.0.0.0         10.10.16.254    0.0.0.0         UG    0      0        0 enahisic2i0
10.2.0.1        0.0.0.0         255.255.255.255 UH    0      0        0 v-cali-peer
[root@bogon ~]# ip netns exec cali  ip a
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
98: v-cali@if97: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 16:95:64:35:e3:17 brd ff:ff:ff:ff:ff:ff link-netnsid 0
    inet 10.2.0.1/32 scope global v-cali
       valid_lft forever preferred_lft forever
    inet6 fe80::1495:64ff:fe35:e317/64 scope link 
       valid_lft forever preferred_lft forever
[root@bogon ~]# ip netns exec cali  tcpdump -i v-cali icmp -nnvv
tcpdump: listening on v-cali, link-type EN10MB (Ethernet), capture size 262144 bytes
11:48:00.249026 IP (tos 0x0, ttl 64, id 43401, offset 0, flags [DF], proto ICMP (1), length 84)
    10.10.16.81 > 10.2.0.1: ICMP echo request, id 46022, seq 1, length 64
11:48:00.249052 IP (tos 0x0, ttl 64, id 18221, offset 0, flags [none], proto ICMP (1), length 84)
    10.2.0.1 > 10.10.16.81: ICMP echo reply, id 46022, seq 1, length 64
11:48:01.252474 IP (tos 0x0, ttl 64, id 43423, offset 0, flags [DF], proto ICMP (1), length 84)
    10.10.16.81 > 10.2.0.1: ICMP echo request, id 46022, seq 2, length 64
11:48:01.252490 IP (tos 0x0, ttl 64, id 18254, offset 0, flags [none], proto ICMP (1), length 84)
    10.2.0.1 > 10.10.16.81: ICMP echo reply, id 46022, seq 2, length 64

这时候可以从 host ping netns, 但是反过来就不可达, 抓包发现是因为 arp 不知道 mac 地址, 加上 arp proxy

[root@bogon ~]# ip netns exec cali  ping 10.10.16.81
PING 10.10.16.81 (10.10.16.81) 56(84) bytes of data.
64 bytes from 10.10.16.81: icmp_seq=1 ttl=64 time=0.067 ms
64 bytes from 10.10.16.81: icmp_seq=2 ttl=64 time=0.036 ms
64 bytes from 10.10.16.81: icmp_seq=3 ttl=64 time=0.033 ms
64 bytes from 10.10.16.81: icmp_seq=4 ttl=64 time=0.024 ms
64 bytes from 10.10.16.81: icmp_seq=5 ttl=64 time=0.027 ms
^C^C
--- 10.10.16.81 ping statistics ---
5 packets transmitted, 5 received, 0% packet loss, time 4187ms
rtt min/avg/max/mdev = 0.024/0.037/0.067/0.016 ms
[root@bogon ~]# ip netns exec cali  ping 8.8.8.8
PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.
^C
--- 8.8.8.8 ping statistics ---
2 packets transmitted, 0 received, 100% packet loss, time 1047ms

[root@bogon ~]# 

添加snat,还是无法访问

[root@bogon ~]# iptables -t nat -A POSTROUTING -s 10.2.0.1/32 -j MASQUERADE
[root@bogon ~]# ip netns exec cali  ping 8.8.8.8
PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.
^C
--- 8.8.8.8 ping statistics ---
2 packets transmitted, 0 received, 100% packet loss, time 1027ms

[root@bogon ~]# 

host上抓包

[root@bogon ~]# tcpdump -i v-cali-peer arp -nv
tcpdump: listening on v-cali-peer, link-type EN10MB (Ethernet), capture size 262144 bytes
11:50:57.812451 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 8.8.8.8 tell 10.2.0.1, length 28
11:50:58.852454 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 8.8.8.8 tell 10.2.0.1, length 28
11:50:59.892511 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 8.8.8.8 tell 10.2.0.1, length 28
11:51:00.932453 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 8.8.8.8 tell 10.2.0.1, length 28
11:51:01.972453 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 8.8.8.8 tell 10.2.0.1, length 28
11:51:03.012519 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 8.8.8.8 tell 10.2.0.1, length 28

抓包发现是因为 arp 不知道 mac 地址, 加上 arp proxy

[root@bogon ~]# echo 1 > /proc/sys/net/ipv4/conf/$VETH-peer/proxy_arp
[root@bogon ~]# sysctl -p
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_nonlocal_bind = 1
[root@bogon ~]# 
[root@bogon ~]# ip netns exec cali  ip n
8.8.8.8 dev v-cali lladdr 92:07:52:14:06:42 STALE
10.10.16.81 dev v-cali lladdr 92:07:52:14:06:42 STALE
[root@bogon ~]# 

可以访问通了

这下可以和 host 互 ping 了, google.com 也没问题了, 功能上没问题.

不过有个优化的问题, arp proxy 会有一些问题, 比如这里会导致 netns 里的 arp cache 无限扩张, 所有的 outbound ip 都会产生一条 arp entry.

[root@bogon ~]# ip netns exec cali  ip n
8.8.8.8 dev v-cali lladdr 92:07:52:14:06:42 STALE
10.10.16.81 dev v-cali lladdr 92:07:52:14:06:42 STALE
114.114.114.114 dev v-cali lladdr 92:07:52:14:06:42 REACHABLE
[root@bogon ~]# 

为了解决这个问题, 我们用一个假的 ip 169.254.1.1 作为 link-local address, 绕一下:

scope link
[root@bogon ~]# ip netns exec $NS ip r del default dev $VETH
[root@bogon ~]# ip netns exec $NS ip r add 169.254.1.1 dev $VETH  scope link
[root@bogon ~]# ip netns exec $NS ip r add default via 169.254.1.1 dev $VETH
[root@bogon ~]# ip netns exec $NS ip r 
default via 169.254.1.1 dev v-cali 
169.254.1.1 dev v-cali scope link 
[root@bogon ~]# ip netns exec $NS ip a
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
98: v-cali@if97: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 16:95:64:35:e3:17 brd ff:ff:ff:ff:ff:ff link-netnsid 0
    inet 10.2.0.1/32 scope global v-cali
       valid_lft forever preferred_lft forever
    inet6 fe80::1495:64ff:fe35:e317/64 scope link 
       valid_lft forever preferred_lft forever
[root@bogon ~]# 
 [root@bogon ~]# ip netns exec cali  ip n
8.8.8.8 dev v-cali lladdr 92:07:52:14:06:42 STALE
10.10.16.81 dev v-cali lladdr 92:07:52:14:06:42 STALE
114.114.114.114 dev v-cali lladdr 92:07:52:14:06:42 STALE
[root@bogon ~]# ip netns exec cali  ip n del 8.8.8.8 dev v-cali
[root@bogon ~]# ip netns exec cali  ip n del 10.10.16.81 dev v-cali
[root@bogon ~]# ip netns exec cali  ip n del 114.114.114.114  dev v-cali
[root@bogon ~]# ip netns exec cali  ip n
[root@bogon ~]# ip netns exec cali  ping 8.8.8.8
PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.
64 bytes from 8.8.8.8: icmp_seq=1 ttl=103 time=279 ms
64 bytes from 8.8.8.8: icmp_seq=2 ttl=103 time=11.1 ms
^C
--- 8.8.8.8 ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 1000ms
rtt min/avg/max/mdev = 11.161/145.151/279.141/133.990 ms
[root@bogon ~]# ip netns exec cali  ping 114.114.114.114
PING 114.114.114.114 (114.114.114.114) 56(84) bytes of data.
64 bytes from 114.114.
[root@bogon ~]# ip netns exec cali  ip n
10.10.16.81 dev v-cali lladdr 92:07:52:14:06:42 STALE
169.254.1.1 dev v-cali lladdr 92:07:52:14:06:42 REACHABLE
[root@bogon ~]# ip netns exec cali  ip a
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
98: v-cali@if97: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 16:95:64:35:e3:17 brd ff:ff:ff:ff:ff:ff link-netnsid 0
    inet 10.2.0.1/32 scope global v-cali
       valid_lft forever preferred_lft forever
    inet6 fe80::1495:64ff:fe35:e317/64 scope link 
       valid_lft forever preferred_lft forever
[root@bogon ~]# ip a | grep cali
97: v-cali-peer@if98: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
[root@bogon ~]# ip a sh v-cali-peer
97: v-cali-peer@if98: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 92:07:52:14:06:42 brd ff:ff:ff:ff:ff:ff link-netnsid 5
    inet6 fe80::9007:52ff:fe14:642/64 scope link 
       valid_lft forever preferred_lft forever
[root@bogon ~]# 

 arp请求报文

[root@bogon ~]# tcpdump -i v-cali-peer arp -nv
tcpdump: listening on v-cali-peer, link-type EN10MB (Ethernet), capture size 262144 bytes
12:02:08.852461 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 10.2.0.1 tell 10.10.16.81, length 28
12:02:08.852487 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 169.254.1.1 tell 10.2.0.1, length 28
12:02:08.852499 ARP, Ethernet (len 6), IPv4 (len 4), Reply 169.254.1.1 is-at 92:07:52:14:06:42, length 28
12:02:08.852695 ARP, Ethernet (len 6), IPv4 (len 4), Reply 10.2.0.1 is-at 16:95:64:35:e3:17, length 28
[root@bogon ~]# iptables -t nat -A POSTROUTING -s 10.2.0.1/32 -j MASQUERADE
[root@bogon ~]# ip netns exec cali  ping 8.8.8.8
PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.
64 bytes from 8.8.8.8: icmp_seq=1 ttl=103 time=11.5 ms
64 bytes from 8.8.8.8: icmp_seq=2 ttl=103 time=11.0 ms
64 bytes from 8.8.8.8: icmp_seq=3 ttl=103 time=11.0 ms
^C
--- 8.8.8.8 ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 2002ms
rtt min/avg/max/mdev = 11.082/11.229/11.513/0.234 ms
[root@bogon ~]# 
[root@bogon ~]# NS=cali
[root@bogon ~]# VETH=v-cali
[root@bogon ~]# ip netns add $NS
[root@bogon ~]# ip l add $VETH type veth peer name $VETH-peer
[root@bogon ~]# ip l set $VETH-peer up
[root@bogon ~]# ip l set $VETH netns $NS
[root@bogon ~]# ip netns exec $NS ip l set $VETH up
[root@bogon ~]# ip netns exec $NS ip a add 10.2.0.1/32 dev $VETH
[root@bogon ~]# ip r add 10.2.0.1/32 dev $VETH-peer
[root@bogon ~]# ip netns exec $NS ip r add default dev $VETH
[root@bogon ~]# 

calico plugin源码解析

func Main(version string) {
    // ...
    err := flagSet.Parse(os.Args[1:])
    // ...
    // 注册 `ADD` 和 `DEL` 命令
    skel.PluginMain(cmdAdd, nil, cmdDel,
        cniSfunc Main(version string) {
    // ...
    err := flagSet.Parse(os.Args[1:])
    // ...
    // 注册 `ADD` 和 `DEL` 命令
    skel.PluginMain(cmdAdd, nil, cmdDel,
        cniSpecVersion.PluginSupports("0.1.0", "0.2.0", "0.3.0", "0.3.1"),
        "Calico CNI plugin "+version)
}pecVersion.PluginSupports("0.1.0", "0.2.0", "0.3.0", "0.3.1"),
        "Calico CNI plugin "+version)
}

ADD 命令里,主要做了三个逻辑:

  • 查询calico datastore里有没有WorkloadEndpoint对象和当前的pod名字匹配,没有匹配,则会创建新的WorkloadEndpoint对象,该对象内主要保存该pod在host network namespace内的网卡名字和pod ip地址,以及container network namespace的网卡名字等等信息,对象示例如下。
  • 创建一个veth pair,并把其中一个网卡置于宿主机端网络命名空间,另一个置于容器端网络命名空间。在container network namespace内创建网卡如eth0,并通过调用calico-ipam获得的IP地址赋值给该eth0网卡;在host network namespace内创建网卡,网卡名格式为 "cali" + sha1(namespace.pod)[:11] ,并设置MAC地址"ee:ee:ee:ee:ee:ee"。
  • 在容器端和宿主机端创建路由。在容器端,设置默认网关为 169.254.1.1 ,该网关地址代码写死的;在宿主机端,添加路由如 10.217.120.85 dev calid0bda9976d5 scope link ,其中 10.217.120.85 是pod ip地址,calid0bda9976d5 是该pod在宿主机端的网卡,也就是veth pair在宿主机这端的virtual ethernet interface虚拟网络设备。
原文地址:https://www.cnblogs.com/dream397/p/14862268.html