kube-proxy ipvs

IPVS用法#

IPVS可以通过ipvsadm 命令进行配置,如-L列举,-A添加,-D删除。

如下命令创建一个service实例172.17.0.1:32016-t指定监听的为TCP端口,-s指定算法为轮询算法rr(Round Robin),ipvs支持简单轮询(rr)、加权轮询(wrr)、最少连接(lc)、源地址或者目标地址散列(sh、dh)等10种调度算法。

ipvsadm -A -t 172.17.0.1:32016 -s rr

在添加调度算法的时候还需要用-r指定server地址,-w指定权值,-m指定转发模式,-m设置masquerading表示NAT模式(-g为gatewaying,即直连路由模式),如下所示:

ipvsadm -a -t 172.17.0.1:32016 -r 10.244.1.2:8080 -m -w 1
ipvsadm -a -t 172.17.0.1:32016 -r 10.244.1.3:8080 -m -w 1
ipvsadm -a -t 172.17.0.1:32016 -r 10.244.3.2:8080 -m -w 1
root@cloud:~# ipvsadm -S -n | grep 30091
-A -t 10.10.16.47:30091 -s rr
-a -t 10.10.16.47:30091 -r 10.244.41.7:80 -m -w 1
-a -t 10.10.16.47:30091 -r 10.244.129.131:80 -m -w 1
-A -t 10.244.2.1:30091 -s rr
-a -t 10.244.2.1:30091 -r 10.244.41.7:80 -m -w 1
-a -t 10.244.2.1:30091 -r 10.244.129.131:80 -m -w 1
-A -t 127.0.0.1:30091 -s rr
-a -t 127.0.0.1:30091 -r 10.244.41.7:80 -m -w 1
-a -t 127.0.0.1:30091 -r 10.244.129.131:80 -m -w 1
-A -t 172.17.0.1:30091 -s rr
-a -t 172.17.0.1:30091 -r 10.244.41.7:80 -m -w 1
-a -t 172.17.0.1:30091 -r 10.244.129.131:80 -m -w 1

以 ipvs 模式 运行kube-proxy

前提条件

ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
nf_conntrack_ipv4
grep -e ipvs -e nf_conntrack_ipv4 /lib/modules/$(uname -r)/modules.builtin

确保IPVS需要内核模块

root@cloud:~# grep -e ipvs -e nf_conntrack_ipv4 /lib/modules/$(uname -r)/modules.builtin
root@cloud:~# modprobe -- ip_vs
root@cloud:~# modprobe -- ip_vs_rr
root@cloud:~# modprobe -- ip_vs_wrr
root@cloud:~# modprobe -- ip_vs_sh
root@cloud:~# modprobe -- nf_conntrack_ipv4
modprobe: FATAL: Module nf_conntrack_ipv4 not found in directory /lib/modules/5.5.19-050519-generic
root@cloud:~# 
root@cloud:~# modprobe -- nf_conntrack
root@cloud:~# grep  -e nf_conntrack /lib/modules/$(uname -r)/modules.builtin
root@cloud:~# 
linux kernel 4.19版本已经将nf_conntrack_ipv4 更新为 nf_conntrack, 而 kube-proxy 1.13 以下版本,强依赖 nf_conntrack_ipv4。

 
modprobe br_netfilter

cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack
EOF

chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack
root@ubuntu:~# mkdir -p /etc/sysconfig/modules/
root@ubuntu:~# cat > /etc/sysconfig/modules/ipvs.modules <<EOF
> #!/bin/bash
> modprobe -- ip_vs
> modprobe -- ip_vs_rr
> modprobe -- ip_vs_wrr
> modprobe -- ip_vs_sh
> modprobe -- nf_conntrack
> EOF
root@ubuntu:~# chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack
ip_vs_sh               16384  0
ip_vs_wrr              16384  0
ip_vs_rr               16384  0
ip_vs                 167936  6 ip_vs_rr,ip_vs_sh,ip_vs_wrr
nf_conntrack_netlink    53248  0
nfnetlink              20480  3 nf_conntrack_netlink,ip_set
nf_conntrack          155648  8 xt_conntrack,nf_nat,nf_nat_ipv6,ipt_MASQUERADE,nf_nat_ipv4,xt_nat,nf_conntrack_netlink,ip_vs
nf_defrag_ipv6         24576  2 nf_conntrack,ip_vs
nf_defrag_ipv4         16384  1 nf_conntrack
libcrc32c              16384  5 nf_conntrack,nf_nat,btrfs,raid456,ip_vs
root@ubuntu:~# 
安装ipvs相关软件包


yum -y install ipvsadm ipset
 kubectl edit configmap kube-proxy -n kube-system
mode: "ipvs"
root@ubuntu:~# kubectl get pods -n kube-system -o wide
NAME                                       READY   STATUS    RESTARTS   AGE     IP               NODE      NOMINATED NODE   READINESS GATES
calico-kube-controllers-5978c5f6b5-tk6pg   1/1     Running   0          6d16h   10.244.243.194   ubuntu    <none>           <none>
calico-node-2fp7r                          1/1     Running   0          42h     10.10.16.251     centos7   <none>           <none>
calico-node-j4g4h                          1/1     Running   0          24h     10.10.16.82      ubuntu    <none>           <none>
calico-node-knqxw                          1/1     Running   0          42h     10.10.16.81      bogon     <none>           <none>
calico-node-sl4sz                          1/1     Running   0          42h     10.10.16.47      cloud     <none>           <none>
coredns-66bff467f8-hlbzk                   1/1     Running   0          3d21h   10.244.29.1      bogon     <none>           <none>
coredns-66bff467f8-zx85v                   1/1     Running   0          3d21h   10.244.41.1      cloud     <none>           <none>
etcd-ubuntu                                1/1     Running   4          6d16h   10.10.16.82      ubuntu    <none>           <none>
kube-apiserver-ubuntu                      1/1     Running   7          6d16h   10.10.16.82      ubuntu    <none>           <none>
kube-controller-manager-ubuntu             1/1     Running   5          6d16h   10.10.16.82      ubuntu    <none>           <none>
kube-proxy-798sq                           1/1     Running   0          6d16h   10.10.16.47      cloud     <none>           <none>
kube-proxy-8hh62                           1/1     Running   0          6d16h   10.10.16.82      ubuntu    <none>           <none>
kube-proxy-kwcdg                           1/1     Running   0          44h     10.10.16.251     centos7   <none>           <none>
kube-proxy-l268b                           1/1     Running   0          6d16h   10.10.16.81      bogon     <none>           <none>
kube-scheduler-ubuntu                      1/1     Running   7          6d16h   10.10.16.82      ubuntu    <none>           <none>
root@ubuntu:~# kubectl get pod  -n kube-system|grep kube-proxy|awk '{print "kubectl delete po "$1" -n kube-system"}'|sh
pod "kube-proxy-798sq" deleted
pod "kube-proxy-8hh62" deleted
pod "kube-proxy-kwcdg" deleted
pod "kube-proxy-l268b" deleted
root@ubuntu:~# kubectl logs kube-proxy -n kube-system
Error from server (NotFound): pods "kube-proxy" not found
root@ubuntu:~# kubectl get pods -n kube-system -o wide
NAME                                       READY   STATUS    RESTARTS   AGE     IP               NODE      NOMINATED NODE   READINESS GATES
calico-kube-controllers-5978c5f6b5-tk6pg   1/1     Running   0          6d16h   10.244.243.194   ubuntu    <none>           <none>
calico-node-2fp7r                          1/1     Running   0          42h     10.10.16.251     centos7   <none>           <none>
calico-node-j4g4h                          1/1     Running   0          24h     10.10.16.82      ubuntu    <none>           <none>
calico-node-knqxw                          1/1     Running   0          42h     10.10.16.81      bogon     <none>           <none>
calico-node-sl4sz                          1/1     Running   0          42h     10.10.16.47      cloud     <none>           <none>
coredns-66bff467f8-hlbzk                   1/1     Running   0          3d21h   10.244.29.1      bogon     <none>           <none>
coredns-66bff467f8-zx85v                   1/1     Running   0          3d21h   10.244.41.1      cloud     <none>           <none>
etcd-ubuntu                                1/1     Running   4          6d16h   10.10.16.82      ubuntu    <none>           <none>
kube-apiserver-ubuntu                      1/1     Running   7          6d16h   10.10.16.82      ubuntu    <none>           <none>
kube-controller-manager-ubuntu             1/1     Running   5          6d16h   10.10.16.82      ubuntu    <none>           <none>
kube-proxy-5w89t                           1/1     Running   0          99s     10.10.16.47      cloud     <none>           <none>
kube-proxy-96qlg                           1/1     Running   0          97s     10.10.16.82      ubuntu    <none>           <none>
kube-proxy-cqn7b                           1/1     Running   0          87s     10.10.16.81      bogon     <none>           <none>
kube-proxy-xrqsb                           1/1     Running   0          94s     10.10.16.251     centos7   <none>           <none>
kube-scheduler-ubuntu                      1/1     Running   7          6d16h   10.10.16.82      ubuntu    <none>           <none>
root@ubuntu:~# kubectl logs kube-proxy-5w89t   -n kube-system 
I0625 03:47:54.614160       1 node.go:136] Successfully retrieved node IP: 10.10.16.47
I0625 03:47:54.614248       1 server_others.go:259] Using ipvs Proxier.
W0625 03:47:54.614794       1 proxier.go:429] IPVS scheduler not specified, use rr by default
I0625 03:47:54.615248       1 server.go:583] Version: v1.18.1
I0625 03:47:54.616438       1 conntrack.go:52] Setting nf_conntrack_max to 2097152
I0625 03:47:54.616952       1 config.go:315] Starting service config controller
I0625 03:47:54.616987       1 config.go:133] Starting endpoints config controller
I0625 03:47:54.617008       1 shared_informer.go:223] Waiting for caches to sync for service config
I0625 03:47:54.617041       1 shared_informer.go:223] Waiting for caches to sync for endpoints config
I0625 03:47:54.717171       1 shared_informer.go:230] Caches are synced for service config 
I0625 03:47:54.717256       1 shared_informer.go:230] Caches are synced for endpoints config 

创建pod

root@ubuntu:~# cat web-anti-affinity-two.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: web-ipvs
spec:
  selector:
    matchLabels:
      app: web-ipvs
  replicas: 2
  template:
    metadata:
      labels:
        app: web-ipvs
    spec:
      affinity:
              #pod 反亲和性, 打散 web-ipvs 各个副本
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
          - labelSelector:
              matchExpressions:
              - key: app
                operator: In
                values:
                - web-ipvs
            topologyKey: "kubernetes.io/hostname"
      containers:
      - image: nginx
        imagePullPolicy: IfNotPresent
        name: web2-worker
        ports:
        - containerPort: 80
          protocol: TCP
      nodeSelector:
        rr-group: rr2
root@ubuntu:~# kubectl get svc
NAME                TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)           AGE
ipvs-nodeport-svc   NodePort    10.111.249.68   <none>        30001:30091/TCP   6m53s
kubernetes          ClusterIP   10.96.0.1       <none>        443/TCP           6d19h
nodeport-svc        NodePort    10.102.82.74    <none>        3000:30090/TCP    3d3h
root@ubuntu:~# kubectl get pods  -o wide
NAME                         READY   STATUS    RESTARTS   AGE   IP               NODE      NOMINATED NODE   READINESS GATES
web-ipvs-777f69dbf8-qc27c    1/1     Running   0          15m   10.244.129.131   centos7   <none>           <none>
web-ipvs-777f69dbf8-xsscw    1/1     Running   0          15m   10.244.41.7      cloud     <none>           <none>

创建svc

root@ubuntu:~# cat web-ipvs-svc.yml 
apiVersion: v1
kind: Service
metadata: 
  name: ipvs-nodeport-svc
spec:
  type: NodePort
  selector:
    app: web-ipvs
  ports:
  - protocol: TCP
    port: 30001
    targetPort: 80
    nodePort: 30091
root@ubuntu:~# ipvsadm -ln

root@cloud:~# ipvsadm -ln --stats
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port               Conns   InPkts  OutPkts  InBytes OutBytes
  -> RemoteAddress:Port
TCP  172.17.0.1:30090                    0        0        0        0        0
  -> 10.244.29.6:80                      0        0        0        0        0
  -> 10.244.41.5:80                      0        0        0        0        0
  -> 10.244.129.129:80                   0        0        0        0        0
  -> 10.244.243.199:80                   0        0        0        0        0
TCP  10.10.16.47:30090                   0        0        0        0        0
  -> 10.244.29.6:80                      0        0        0        0        0
  -> 10.244.41.5:80                      0        0        0        0        0
  -> 10.244.129.129:80                   0        0        0        0        0
  -> 10.244.243.199:80                   0        0        0        0        0
TCP  10.10.16.47:30091                   2       12        8     1456     1992
  -> 10.244.41.7:80                      1        4        2      216      112
  -> 10.244.129.131:80                   1        8        6     1240     1880

dnat不经过iptables 

root@cloud:~# iptables -nvL -t nat | grep 30091
root@cloud:~# 

snat contrack

root@cloud:~# conntrack -L -o ktimestamp  | grep 10.244.41.7
conntrack v1.4.4 (conntrack-tools): 101 flow entries have been shown.
tcp      6 110 TIME_WAIT src=192.168.117.51 dst=10.10.16.47 sport=57852 dport=30091 src=10.244.41.7 dst=10.10.16.47 sport=80 dport=49282 [ASSURED] mark=0 use=1
[root@centos7 ~]# conntrack -L -o ktimestamp  | grep 10.244.129.131
conntrack v1.4.4 (conntrack-tools): 53 flow entries have been shown.
tcp      6 85 TIME_WAIT src=10.10.16.47 dst=10.244.129.131 sport=57982 dport=80 src=10.244.129.131 dst=10.10.16.47 sport=80 dport=57982 [ASSURED] mark=0 secctx=system_u:object_r:unlabeled_t:s0 use=1
[root@centos7 ~]# 

centos  pod tcpdump

root@cloud:~# nsenter -n --target 989208
root@cloud:~#  tcpdump -i eth0 tcp and port 80 -eennvv
tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 262144 bytes
14:42:28.436925 ee:ee:ee:ee:ee:ee > ee:14:08:88:a5:c1, ethertype IPv4 (0x0800), length 74: (tos 0x0, ttl 56, id 32767, offset 0, flags [DF], proto TCP (6), length 60)
    10.10.16.47.49282 > 10.244.41.7.80: Flags [S], cksum 0x9ad6 (correct), seq 1352807746, win 64240, options [mss 1460,nop,wscale 8,sackOK,TS val 279186601 ecr 0], length 0
14:42:28.437001 ee:14:08:88:a5:c1 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 74: (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
    10.244.41.7.80 > 10.10.16.47.49282: Flags [S.], cksum 0x4e62 (incorrect -> 0xcb71), seq 394415451, ack 1352807747, win 65236, options [mss 1400,sackOK,TS val 1332352869 ecr 279186601,nop,wscale 7], length 0
14:42:28.438410 ee:ee:ee:ee:ee:ee > ee:14:08:88:a5:c1, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 56, id 32769, offset 0, flags [DF], proto TCP (6), length 52)
    10.10.16.47.49282 > 10.244.41.7.80: Flags [.], cksum 0xf4d4 (correct), seq 1, ack 1, win 1024, options [nop,nop,TS val 279186603 ecr 1332352869], length 0
14:42:31.552422 ee:ee:ee:ee:ee:ee > ee:14:08:88:a5:c1, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 56, id 32777, offset 0, flags [DF], proto TCP (6), length 52)
    10.10.16.47.49282 > 10.244.41.7.80: Flags [F.], cksum 0xe8a9 (correct), seq 1, ack 1, win 1024, options [nop,nop,TS val 279189717 ecr 1332352869], length 0
14:42:31.552579 ee:14:08:88:a5:c1 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 64, id 37122, offset 0, flags [DF], proto TCP (6), length 52)
    10.244.41.7.80 > 10.10.16.47.49282: Flags [F.], cksum 0x4e5a (incorrect -> 0xde7e), seq 1, ack 2, win 510, options [nop,nop,TS val 1332355985 ecr 279189717], length 0
14:42:31.555556 ee:ee:ee:ee:ee:ee > ee:14:08:88:a5:c1, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 56, id 32780, offset 0, flags [DF], proto TCP (6), length 52)
    10.10.16.47.49282 > 10.244.41.7.80: Flags [.], cksum 0xdc79 (correct), seq 2, ack 2, win 1024, options [nop,nop,TS val 279189720 ecr 1332355985], length 0
root@cloud:~# route -n
Kernel IP routing table
Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
0.0.0.0         10.10.16.254    0.0.0.0         UG    0      0        0 enahisic2i0
9.251.0.0       172.17.0.1      255.255.0.0     UG    0      0        0 docker0
10.10.16.0      0.0.0.0         255.255.255.0   U     0      0        0 enahisic2i0
10.99.1.231     10.10.16.82     255.255.255.255 UGH   0      0        0 enahisic2i0
10.110.79.116   10.10.16.82     255.255.255.255 UGH   0      0        0 enahisic2i0
10.110.171.213  10.10.16.82     255.255.255.255 UGH   0      0        0 enahisic2i0
10.244.2.0      0.0.0.0         255.255.255.0   U     0      0        0 cni0
10.244.41.0     0.0.0.0         255.255.255.192 U     0      0        0 *
10.244.41.1     0.0.0.0         255.255.255.255 UH    0      0        0 cali027a65c4a41
10.244.41.5     0.0.0.0         255.255.255.255 UH    0      0        0 cali4cba7a26a1f
10.244.41.7     0.0.0.0         255.255.255.255 UH    0      0        0 calid88772f1084
10.244.129.128  10.10.16.251    255.255.255.192 UG    0      0        0 enahisic2i0
31.31.31.31     10.10.16.254    255.255.255.255 UGH   0      0        0 enahisic2i0
172.17.0.0      0.0.0.0         255.255.0.0     U     0      0        0 docker0
root@cloud:~# ip  a | grep  10.244.41   ---------没没有设备有10.244.41.xx段的ip
 root@cloud:~#

centos7 pod tcpdump

设备有10.244.129.xx段的ip

[root@centos7 ~]# ip a | grep 10.244.41
[root@centos7 ~]# ip a | grep 10.244.129
[root@centos7 ~]#

源ip是10.10.16.47

[root@centos7 ~]# nsenter -n --target  120293
[root@centos7 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: tunl0@NONE: <NOARP> mtu 1480 qdisc noop state DOWN group default qlen 1000
    link/ipip 0.0.0.0 brd 0.0.0.0
4: eth0@if24: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1480 qdisc noqueue state UP group default 
    link/ether ae:ef:a2:57:fc:f2 brd ff:ff:ff:ff:ff:ff link-netnsid 0
    inet 10.244.129.131/32 brd 10.244.129.131 scope global eth0
       valid_lft forever preferred_lft forever
    inet6 fe80::acef:a2ff:fe57:fcf2/64 scope link 
       valid_lft forever preferred_lft forever
[root@centos7 ~]# tcpdump -i eth0 tcp and port 80 -eennvv
tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 262144 bytes
02:42:28.443016 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 74: (tos 0x0, ttl 55, id 32768, offset 0, flags [DF], proto TCP (6), length 60)
    10.10.16.47.57982 > 10.244.129.131.80: Flags [S], cksum 0x8413 (correct), seq 3100666206, win 64240, options [mss 1460,nop,wscale 8,sackOK,TS val 279186601 ecr 0], length 0
02:42:28.443050 ae:ef:a2:57:fc:f2 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 74: (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
    10.244.129.131.80 > 10.10.16.47.57982: Flags [S.], cksum 0xa6de (incorrect -> 0x0b87), seq 1401284533, ack 3100666207, win 28560, options [mss 1440,sackOK,TS val 1717461591 ecr 279186601,nop,wscale 7], length 0
02:42:28.444316 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 55, id 32771, offset 0, flags [DF], proto TCP (6), length 52)
    10.10.16.47.57982 > 10.244.129.131.80: Flags [.], cksum 0xa5cb (correct), seq 1, ack 1, win 1026, options [nop,nop,TS val 279186603 ecr 1717461591], length 0
02:42:28.454514 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 501: (tos 0x0, ttl 55, id 32772, offset 0, flags [DF], proto TCP (6), length 487)
    10.10.16.47.57982 > 10.244.129.131.80: Flags [P.], cksum 0xc92a (correct), seq 1:436, ack 1, win 1026, options [nop,nop,TS val 279186612 ecr 1717461591], length 435: HTTP, length: 435
        GET / HTTP/1.1
        Host: 10.10.16.47:30091
        Connection: keep-alive
        Upgrade-Insecure-Requests: 1
        User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36
        Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
        Accept-Encoding: gzip, deflate
        Accept-Language: zh-CN,zh;q=0.9

02:42:28.454527 ae:ef:a2:57:fc:f2 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 64, id 398, offset 0, flags [DF], proto TCP (6), length 52)
    10.244.129.131.80 > 10.10.16.47.57982: Flags [.], cksum 0xa6d6 (incorrect -> 0xa71e), seq 1, ack 436, win 232, options [nop,nop,TS val 1717461602 ecr 279186612], length 0
02:42:28.455151 ae:ef:a2:57:fc:f2 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 304: (tos 0x0, ttl 64, id 399, offset 0, flags [DF], proto TCP (6), length 290)
    10.244.129.131.80 > 10.10.16.47.57982: Flags [P.], cksum 0xa7c4 (incorrect -> 0xdb6c), seq 1:239, ack 436, win 232, options [nop,nop,TS val 1717461603 ecr 279186612], length 238: HTTP, length: 238
        HTTP/1.1 200 OK
        Server: nginx/1.21.0
        Date: Fri, 25 Jun 2021 06:42:28 GMT
        Content-Type: text/html
        Content-Length: 612
        Last-Modified: Tue, 25 May 2021 12:28:56 GMT
        Connection: keep-alive
        ETag: "60aced88-264"
        Accept-Ranges: bytes

02:42:28.455420 ae:ef:a2:57:fc:f2 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 678: (tos 0x0, ttl 64, id 400, offset 0, flags [DF], proto TCP (6), length 664)
    10.244.129.131.80 > 10.10.16.47.57982: Flags [P.], cksum 0xa93a (incorrect -> 0xe7f7), seq 239:851, ack 436, win 232, options [nop,nop,TS val 1717461603 ecr 279186612], length 612: HTTP
02:42:28.458165 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 55, id 32773, offset 0, flags [DF], proto TCP (6), length 52)
    10.10.16.47.57982 > 10.244.129.131.80: Flags [.], cksum 0xa0af (correct), seq 436, ack 851, win 1023, options [nop,nop,TS val 279186617 ecr 1717461603], length 0
02:42:28.558161 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 447: (tos 0x0, ttl 55, id 32775, offset 0, flags [DF], proto TCP (6), length 433)
    10.10.16.47.57982 > 10.244.129.131.80: Flags [P.], cksum 0xa76c (correct), seq 436:817, ack 851, win 1023, options [nop,nop,TS val 279186716 ecr 1717461603], length 381: HTTP, length: 381
        GET /favicon.ico HTTP/1.1
        Host: 10.10.16.47:30091
        Connection: keep-alive
        User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36
        Accept: image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8
        Referer: http://10.10.16.47:30091/
        Accept-Encoding: gzip, deflate
        Accept-Language: zh-CN,zh;q=0.9

02:42:28.558313 ae:ef:a2:57:fc:f2 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 776: (tos 0x0, ttl 64, id 401, offset 0, flags [DF], proto TCP (6), length 762)
    10.244.129.131.80 > 10.10.16.47.57982: Flags [P.], cksum 0xa99c (incorrect -> 0x7286), seq 851:1561, ack 817, win 240, options [nop,nop,TS val 1717461706 ecr 279186716], length 710: HTTP, length: 710
        HTTP/1.1 404 Not Found
        Server: nginx/1.21.0
        Date: Fri, 25 Jun 2021 06:42:28 GMT
        Content-Type: text/html
        Content-Length: 555
        Connection: keep-alive

        <html>
        <head><title>404 Not Found</title></head>
        <body>
        <center><h1>404 Not Found</h1></center>
        <hr><center>nginx/1.21.0</center>
        </body>
        </html>
        <!-- a padding to disable MSIE and Chrome friendly error page -->
        <!-- a padding to disable MSIE and Chrome friendly error page -->
        <!-- a padding to disable MSIE and Chrome friendly error page -->
        <!-- a padding to disable MSIE and Chrome friendly error page -->
        <!-- a padding to disable MSIE and Chrome friendly error page -->
        <!-- a padding to disable MSIE and Chrome friendly error page -->
02:42:28.605575 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 55, id 32776, offset 0, flags [DF], proto TCP (6), length 52)
    10.10.16.47.57982 > 10.244.129.131.80: Flags [.], cksum 0x9b6f (correct), seq 817, ack 1561, win 1026, options [nop,nop,TS val 279186764 ecr 1717461706], length 0
02:42:31.558228 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 55, id 32778, offset 0, flags [DF], proto TCP (6), length 52)
    10.10.16.47.57982 > 10.244.129.131.80: Flags [F.], cksum 0x8fe5 (correct), seq 817, ack 1561, win 1026, options [nop,nop,TS val 279189717 ecr 1717461706], length 0
02:42:31.558342 ae:ef:a2:57:fc:f2 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 64, id 402, offset 0, flags [DF], proto TCP (6), length 52)
    10.244.129.131.80 > 10.10.16.47.57982: Flags [F.], cksum 0xa6d6 (incorrect -> 0x873e), seq 1561, ack 818, win 240, options [nop,nop,TS val 1717464706 ecr 279189717], length 0
02:42:31.561333 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 55, id 32779, offset 0, flags [DF], proto TCP (6), length 52)
    10.10.16.47.57982 > 10.244.129.131.80: Flags [.], cksum 0x8429 (correct), seq 818, ack 1562, win 1026, options [nop,nop,TS val 279189720 ecr 1717464706], length 0
root@cloud:~# iptables -nvL -t nat | grep 30091
root@cloud:~# iptables -t nat -nL KUBE-SERVICES
Chain KUBE-SERVICES (2 references)
target     prot opt source               destination         
KUBE-MARK-MASQ  all  -- !10.244.0.0/16        0.0.0.0/0            /* Kubernetes service cluster ip + port for masquerade purpose */ match-set KUBE-CLUSTER-IP dst,dst
KUBE-NODE-PORT  all  --  0.0.0.0/0            0.0.0.0/0            ADDRTYPE match dst-type LOCAL
ACCEPT     all  --  0.0.0.0/0            0.0.0.0/0            match-set KUBE-CLUSTER-IP dst,dst
root@cloud:~# iptables -t nat -nL KUBE-NODE-PORT
Chain KUBE-NODE-PORT (1 references)
target     prot opt source               destination         
KUBE-MARK-MASQ  tcp  --  0.0.0.0/0            0.0.0.0/0            /* Kubernetes nodeport TCP port for masquerade purpose */ match-set KUBE-NODE-PORT-TCP dst
root@cloud:~# iptables -t nat -nL KUBE-MARK-MASQ
Chain KUBE-MARK-MASQ (15 references)
target     prot opt source               destination         
MARK       all  --  0.0.0.0/0            0.0.0.0/0            MARK or 0x4000
root@cloud:~#  ipset list KUBE-NODE-PORT-TCP
Name: KUBE-NODE-PORT-TCP
Type: bitmap:port
Revision: 3
Header: range 0-65535
Size in memory: 8264
References: 1
Number of entries: 2
Members:
30090
30091
root@cloud:~# iptables -t nat -nL POSTROUTING
Chain POSTROUTING (policy ACCEPT)
target     prot opt source               destination         
cali-POSTROUTING  all  --  0.0.0.0/0            0.0.0.0/0            /* cali:O3lYWMrLQYEMJtB5 */
KUBE-POSTROUTING  all  --  0.0.0.0/0            0.0.0.0/0            /* kubernetes postrouting rules */
MASQUERADE  all  --  172.17.0.0/16        0.0.0.0/0           
ANTREA-POSTROUTING  all  --  0.0.0.0/0            0.0.0.0/0            /* Antrea: jump to Antrea postrouting rules */
RETURN     all  --  10.244.0.0/16        10.244.0.0/16       
MASQUERADE  all  --  10.244.0.0/16       !224.0.0.0/4         
RETURN     all  -- !10.244.0.0/16        10.244.2.0/24       
MASQUERADE  all  -- !10.244.0.0/16        10.244.0.0/16       
root@cloud:~# iptables -t nat -nL KUBE-POSTROUTING
Chain KUBE-POSTROUTING (1 references)
target     prot opt source               destination         
MASQUERADE  all  --  0.0.0.0/0            0.0.0.0/0            /* kubernetes service traffic requiring SNAT */ mark match 0x4000/0x4000
MASQUERADE  all  --  0.0.0.0/0            0.0.0.0/0            /* Kubernetes endpoints dst ip:port, source ip for solving hairpin purpose */ match-set KUBE-LOOP-BACK dst,dst,src
root@cloud:~# 

 ipvs 实现Kube Proxy 的功能

root@ubuntu:~# kubectl delete svc ipvs-nodeport-svc
service "ipvs-nodeport-svc" deleted
root@ubuntu:~# kubectl get svc
NAME           TYPE        CLUSTER-IP     EXTERNAL-IP   PORT(S)          AGE
kubernetes     ClusterIP   10.96.0.1      <none>        443/TCP          6d20h
nodeport-svc   NodePort    10.102.82.74   <none>        3000:30090/TCP   3d4h
root@ubuntu:~# 
root@ubuntu:~# ipvsadm -A -t 10.10.16.82:30091 -s rr

root@ubuntu:~# ipvsadm -a -t 10.10.16.82:30091  -r  10.244.41.7:80  -m
Memory allocation problem
root@ubuntu:~# ipvsadm -a -t 10.10.16.82:30091  -r  10.244.129.131:80  -m
Memory allocation problem
root@ubuntu:~# 
root@ubuntu:~# ipvsadm -D -t 10.10.16.82:30091
Memory allocation problem
root@ubuntu:~# 
root@cloud:~# ipvsadm -A -t 10.10.16.47:30091 -s rr
root@cloud:~# ipvsadm -a -t 10.10.16.47:30091  -r  10.244.41.7:80  -m
root@cloud:~# ipvsadm -a -t 10.10.16.47:30091  -r  10.244.129.131:80  -m
Memory allocation problem
root@cloud:~# 

一些解释:

  • 对于所有发往 10.10.16.82:30091 
    的流量,将负载均衡到 10.244.41.7:80
    和 10.244.129.131:80
  • 使用轮询 (rr) 算法实现负载均衡
  • 两个后端,每个后端的权重为 1(各 50%)
  • 使用 MASQ(增强型 SNAT)在 VIP 和 RealIP 之间进行流量转发

KubeProxy的IPVS模式

深入理解 Kubernetes 网络模型:自己实现 Kube Proxy 的功能

ipvsadm使用

原文地址:https://www.cnblogs.com/dream397/p/14930256.html