IP输入 之 ip_rcv && ip_rcv_finish

ip层收包流程概述:

(1) 在inet_init中注册了类型为ETH_P_IP协议的数据包的回调ip_rcv

(2) 当二层数据包接收完毕,会调用netif_receive_skb根据协议进行向上层分发

(3) 类型为ETH_P_IP类型的数据包,被传递到三层,调用ip_rcv函数

(4) ip_rcv完成基本的校验和处理工作后,经过PRE_ROUTING钩子点

(5) 经过PRE_ROUTING钩子点之后,调用ip_rcv_finish完成数据包接收,包括选项处理,路由查询,并且根据路由决定数据包是发往本机还是转发

以下为源码分析:

1 static struct packet_type ip_packet_type __read_mostly = {
2     .type = cpu_to_be16(ETH_P_IP),
3     .func = ip_rcv,
4 };
  1 /*
  2  *     Main IP Receive routine.
  3  */
  4 int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
  5 {
  6     const struct iphdr *iph;
  7     struct net *net;
  8     u32 len;
  9 
 10     /* When the interface is in promisc. mode, drop all the crap
 11      * that it receives, do not try to analyse it.
 12      */
 13     /* 混杂模式下,非本机包 */
 14     if (skb->pkt_type == PACKET_OTHERHOST)
 15         goto drop;
 16 
 17 
 18     /* 获取net */
 19     net = dev_net(dev);
 20     __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
 21 
 22     /* 检查skb共享 */
 23     skb = skb_share_check(skb, GFP_ATOMIC);
 24     if (!skb) {
 25         __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
 26         goto out;
 27     }
 28 
 29     /* 测试是否可以取得ip头 */
 30     if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 31         goto inhdr_error;
 32 
 33     /* 取ip头 */
 34     iph = ip_hdr(skb);
 35 
 36     /*
 37      *    RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
 38      *
 39      *    Is the datagram acceptable?
 40      *
 41      *    1.    Length at least the size of an ip header
 42      *    2.    Version of 4
 43      *    3.    Checksums correctly. [Speed optimisation for later, skip loopback checksums]
 44      *    4.    Doesn't have a bogus length
 45      */
 46 
 47     /* 头部长度不足20 或者版本不是4 */
 48     if (iph->ihl < 5 || iph->version != 4)
 49         goto inhdr_error;
 50 
 51     BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
 52     BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
 53     BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
 54     __IP_ADD_STATS(net,
 55                IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
 56                max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
 57 
 58     /* 测试实际应取的ip头 */
 59     if (!pskb_may_pull(skb, iph->ihl*4))
 60         goto inhdr_error;
 61 
 62     /* 取ip头 */
 63     iph = ip_hdr(skb);
 64 
 65     /* 校验和错误 */
 66     if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
 67         goto csum_error;
 68 
 69     /* 取总长度 */
 70     len = ntohs(iph->tot_len);
 71 
 72     /* skb长度比ip包总长度小 */
 73     if (skb->len < len) {
 74         __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
 75         goto drop;
 76     } 
 77     /* 比头部长度还小 */
 78     else if (len < (iph->ihl*4))
 79         goto inhdr_error;
 80 
 81     /* Our transport medium may have padded the buffer out. Now we know it
 82      * is IP we can trim to the true length of the frame.
 83      * Note this now means skb->len holds ntohs(iph->tot_len).
 84      */
 85     /* 设置总长度为ip包的长度 */
 86     if (pskb_trim_rcsum(skb, len)) {
 87         __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
 88         goto drop;
 89     }
 90 
 91     /* 取得传输层头部 */
 92     skb->transport_header = skb->network_header + iph->ihl*4;
 93 
 94     /* Remove any debris in the socket control block */
 95     /* 重置cb */
 96     memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
 97 
 98     /* 保存输入设备信息 */
 99     IPCB(skb)->iif = skb->skb_iif;
100 
101     /* Must drop socket now because of tproxy. */
102     skb_orphan(skb);
103 
104     /* 经过PRE_ROUTING钩子点 */
105     return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
106                net, NULL, skb, dev, NULL,
107                ip_rcv_finish);
108 
109 csum_error:
110     __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
111 inhdr_error:
112     __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
113 drop:
114     kfree_skb(skb);
115 out:
116     return NET_RX_DROP;
117 }
  1 static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
  2 {
  3     const struct iphdr *iph = ip_hdr(skb);
  4     struct rtable *rt;
  5     struct net_device *dev = skb->dev;
  6     void (*edemux)(struct sk_buff *skb);
  7 
  8     /* if ingress device is enslaved to an L3 master device pass the
  9      * skb to its handler for processing
 10      */
 11     skb = l3mdev_ip_rcv(skb);
 12     if (!skb)
 13         return NET_RX_SUCCESS;
 14 
 15     /* 
 16         启用了early_demux
 17         skb路由缓存为空
 18         skb的sock为空
 19         不是分片包
 20     */
 21     if (net->ipv4.sysctl_ip_early_demux &&
 22         !skb_dst(skb) &&
 23         !skb->sk &&
 24         !ip_is_fragment(iph)) {
 25         const struct net_protocol *ipprot;
 26 
 27         /* 找到上层协议 */
 28         int protocol = iph->protocol;
 29 
 30         /* 获取协议对应的prot */
 31         ipprot = rcu_dereference(inet_protos[protocol]);
 32 
 33         /* 找到early_demux函数,如tcp_v4_early_demux */
 34         if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
 35 
 36             /* 调用该函数,将路由信息缓存到skb->refdst */
 37             edemux(skb);
 38             /* must reload iph, skb->head might have changed */
 39             /* 重新取ip头 */
 40             iph = ip_hdr(skb);
 41         }
 42     }
 43 
 44     /*
 45      *    Initialise the virtual path cache for the packet. It describes
 46      *    how the packet travels inside Linux networking.
 47      */
 48     /* 校验路由失败 */
 49     if (!skb_valid_dst(skb)) {
 50         /* 查路由 */
 51         int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
 52                            iph->tos, dev);
 53         if (unlikely(err)) {
 54             if (err == -EXDEV)
 55                 __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
 56             goto drop;
 57         }
 58     }
 59 
 60 #ifdef CONFIG_IP_ROUTE_CLASSID
 61     if (unlikely(skb_dst(skb)->tclassid)) {
 62         struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
 63         u32 idx = skb_dst(skb)->tclassid;
 64         st[idx&0xFF].o_packets++;
 65         st[idx&0xFF].o_bytes += skb->len;
 66         st[(idx>>16)&0xFF].i_packets++;
 67         st[(idx>>16)&0xFF].i_bytes += skb->len;
 68     }
 69 #endif
 70 
 71     /* 处理ip选项 */
 72     if (iph->ihl > 5 && ip_rcv_options(skb))
 73         goto drop;
 74 
 75     /* 找到路由缓存项 */
 76     rt = skb_rtable(skb);
 77     if (rt->rt_type == RTN_MULTICAST) {
 78         __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len);
 79     } else if (rt->rt_type == RTN_BROADCAST) {
 80         __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
 81     } else if (skb->pkt_type == PACKET_BROADCAST ||
 82            skb->pkt_type == PACKET_MULTICAST) {
 83         struct in_device *in_dev = __in_dev_get_rcu(dev);
 84 
 85         /* RFC 1122 3.3.6:
 86          *
 87          *   When a host sends a datagram to a link-layer broadcast
 88          *   address, the IP destination address MUST be a legal IP
 89          *   broadcast or IP multicast address.
 90          *
 91          *   A host SHOULD silently discard a datagram that is received
 92          *   via a link-layer broadcast (see Section 2.4) but does not
 93          *   specify an IP multicast or broadcast destination address.
 94          *
 95          * This doesn't explicitly say L2 *broadcast*, but broadcast is
 96          * in a way a form of multicast and the most common use case for
 97          * this is 802.11 protecting against cross-station spoofing (the
 98          * so-called "hole-196" attack) so do it for both.
 99          */
100         if (in_dev &&
101             IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
102             goto drop;
103     }
104 
105     /* 调用路由项的input函数,可能为ip_local_deliver或者ip_forward */
106     return dst_input(skb);
107 
108 drop:
109     kfree_skb(skb);
110     return NET_RX_DROP;
111 }
原文地址:https://www.cnblogs.com/wanpengcoder/p/7577398.html