IP输入 之 分片重组ip_defrag

概述

在ip_local_deliver中,如果检测到是分片包,则需要进行分片重组;

其涉及的函数调用关系如下所示:

 1 /**
 2  * ip_local_deliver
 3  *  |-->ip_is_fragment //判断是否为分片包
 4  *  |-->ip_defrag //分片缓存&重组
 5  *    |-->ip_find //查找ipq
 6  *    |  |-->ip_frag_find //查找frag_queue
 7  *        |
 8  *    |-->ip_defrag_queue //分片接收组合
 9  *      |-->ip_frag_reasm //接收完整的分片组成新的ip包
10  */
函数源码分析
ip_local_deliver
 1 /*
 2  *     Deliver IP Packets to the higher protocol layers.
 3  */
 4 int ip_local_deliver(struct sk_buff *skb)
 5 {
 6     /*
 7      *    Reassemble IP fragments.
 8      */
 9     struct net *net = dev_net(skb->dev);
10 
11     /* 分片重组 */
12     if (ip_is_fragment(ip_hdr(skb))) {
13         if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER))
14             return 0;
15     }
16 
17     /* 经过LOCAL_IN钩子点 */
18     return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
19                net, NULL, skb, skb->dev, NULL,
20                ip_local_deliver_finish);
21 }
ip_is_fragment
1 /* 判断是否为分片包 */
2 static inline bool ip_is_fragment(const struct iphdr *iph)
3 {
4     /* 
5         根据(n-1)(mtu-ip头)计算值,第一片的offset=0,其余偏移为1480倍数
6         除最后一片外,其余片标记MF
7     */
8     return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
9 }
ip_defrag
 1 /* Process an incoming IP datagram fragment. */
 2 int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
 3 {
 4     struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
 5     int vif = l3mdev_master_ifindex_rcu(dev);
 6     struct ipq *qp;
 7 
 8     __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
 9     skb_orphan(skb);
10 
11     /* Lookup (or create) queue header */
12     /* 查找或创建分片队列 */
13     qp = ip_find(net, ip_hdr(skb), user, vif);
14 
15     /* 分片队列存在 */
16     if (qp) {
17         int ret;
18 
19         spin_lock(&qp->q.lock);
20 
21         /* 分片加入到队列中,能重组则重组 */
22         ret = ip_frag_queue(qp, skb);
23 
24         spin_unlock(&qp->q.lock);
25         ipq_put(qp);
26         return ret;
27     }
28 
29     /* 无法创建新的ip分片队列,内存不足 */
30     __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
31     kfree_skb(skb);
32     return -ENOMEM;
33 }
ip_find
 1 /* Find the correct entry in the "incomplete datagrams" queue for
 2  * this IP datagram, and create new one, if nothing is found.
 3  */
 4 static struct ipq *ip_find(struct net *net, struct iphdr *iph,
 5                u32 user, int vif)
 6 {
 7     struct inet_frag_queue *q;
 8     struct ip4_create_arg arg;
 9     unsigned int hash;
10 
11     /* 记录ip头和输入信息 */
12     arg.iph = iph;
13     arg.user = user;
14     arg.vif = vif;
15 
16     /* 通过id,源地址,目的地址,协议计算hash */
17     hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
18 
19     /* 根据hash值查找或创建队列 */
20     q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
21     if (IS_ERR_OR_NULL(q)) {
22         inet_frag_maybe_warn_overflow(q, pr_fmt());
23         return NULL;
24     }
25 
26     /* 返回队列q对应的ipq */
27     return container_of(q, struct ipq, q);
28 }
inet_frag_find
 1 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 2                        struct inet_frags *f, void *key,
 3                        unsigned int hash)
 4 {
 5     struct inet_frag_bucket *hb;
 6     struct inet_frag_queue *q;
 7     int depth = 0;
 8 
 9     /* 分片内存已经超过了低限 */
10     if (frag_mem_limit(nf) > nf->low_thresh)
11         /* 进行节点回收 */
12         inet_frag_schedule_worker(f);
13 
14     /* 找到hash桶 */
15     hash &= (INETFRAGS_HASHSZ - 1);
16     hb = &f->hash[hash];
17 
18     spin_lock(&hb->chain_lock);
19 
20     /* 遍历链表 */
21     hlist_for_each_entry(q, &hb->chain, list) {
22 
23         /* 找到节点 */
24         if (q->net == nf && f->match(q, key)) {
25 
26             /* 增加引用计数 */
27             atomic_inc(&q->refcnt);
28             spin_unlock(&hb->chain_lock);
29 
30             /* 返回节点 */
31             return q;
32         }
33 
34         /* 记录查找深度 */
35         depth++;
36     }
37     spin_unlock(&hb->chain_lock);
38 
39     /* 未找到的情况下 */
40 
41     /* 桶节点的链表深度不超过限定 */
42     if (depth <= INETFRAGS_MAXDEPTH)
43         /* 创建节点返回 */
44         return inet_frag_create(nf, f, key);
45 
46     /* 如果已经超过了重建间隔时间,则重建 */
47     if (inet_frag_may_rebuild(f)) {
48         /* 打重建标记 */
49         if (!f->rebuild)
50             f->rebuild = true;
51         /* 进行节点回收 */
52         inet_frag_schedule_worker(f);
53     }
54 
55     return ERR_PTR(-ENOBUFS);
56 }
inet_frag_worker
 1 static void inet_frag_worker(struct work_struct *work)
 2 {
 3 
 4     /*  本次回收的桶节点数 */
 5     unsigned int budget = INETFRAGS_EVICT_BUCKETS;
 6     unsigned int i, evicted = 0;
 7     struct inet_frags *f;
 8 
 9     /* 找到hash表 */
10     f = container_of(work, struct inet_frags, frags_work);
11 
12     BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
13 
14     local_bh_disable();
15 
16     /* 从上次回收完的下一个节点开始,进行回收 */
17     for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
18 
19         /* 回收并统计回收数量 */
20         evicted += inet_evict_bucket(f, &f->hash[i]);
21 
22         /* 下一个未回收桶节点 */
23         i = (i + 1) & (INETFRAGS_HASHSZ - 1);
24 
25         /* 回收节点数超过最大值,停止 */
26         if (evicted > INETFRAGS_EVICT_MAX)
27             break;
28     }
29 
30     /* 记录下次需要开始回收的桶节点 */
31     f->next_bucket = i;
32 
33     local_bh_enable();
34 
35     /* 如果需要重建,则重建 */
36     if (f->rebuild && inet_frag_may_rebuild(f))
37         inet_frag_secret_rebuild(f);
38 }
inet_evict_bucket
 1 static unsigned int
 2 inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
 3 {
 4     struct inet_frag_queue *fq;
 5     struct hlist_node *n;
 6     unsigned int evicted = 0;
 7     HLIST_HEAD(expired);
 8 
 9     spin_lock(&hb->chain_lock);
10 
11     /* 遍历桶下的链表 */
12     hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
13 
14         /* 未超过限定,无需回收 */
15         if (!inet_fragq_should_evict(fq))
16             continue;
17 
18         /* 定时器无法删除 */
19         if (!del_timer(&fq->timer))
20             continue;
21 
22         /* 能够回收的节点加入到临时hash */
23         hlist_add_head(&fq->list_evictor, &expired);
24 
25         /* 记录回收数量 */
26         ++evicted;
27     }
28 
29     spin_unlock(&hb->chain_lock);
30 
31     /* 依次调用回收函数进行回收 */
32     hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
33         f->frag_expire((unsigned long) fq);
34 
35     /* 返回回收节点数 */
36     return evicted;
37 }
inet_frag_secret_rebuild
 1 static void inet_frag_secret_rebuild(struct inet_frags *f)
 2 {
 3     int i;
 4 
 5     write_seqlock_bh(&f->rnd_seqlock);
 6 
 7     /* 无需重建 */
 8     if (!inet_frag_may_rebuild(f))
 9         goto out;
10 
11     /* 获取新的用于计算hash的随机值 */
12     get_random_bytes(&f->rnd, sizeof(u32));
13 
14     /* 遍历hash表 */
15     for (i = 0; i < INETFRAGS_HASHSZ; i++) {
16         struct inet_frag_bucket *hb;
17         struct inet_frag_queue *q;
18         struct hlist_node *n;
19 
20         /* 取的桶节点 */
21         hb = &f->hash[i];
22         spin_lock(&hb->chain_lock);
23 
24         /* 遍历桶节点下面的链表 */
25         hlist_for_each_entry_safe(q, n, &hb->chain, list) {
26 
27             /* 计算hash */
28             unsigned int hval = inet_frag_hashfn(f, q);
29 
30             /* 节点不属于当前桶 */
31             if (hval != i) {
32                 struct inet_frag_bucket *hb_dest;
33 
34                 /* 从当前桶中删除该节点 */
35                 hlist_del(&q->list);
36 
37                 /* Relink to new hash chain. */
38                 /* 找到目标桶 */
39                 hb_dest = &f->hash[hval];
40 
41                 /* This is the only place where we take
42                  * another chain_lock while already holding
43                  * one.  As this will not run concurrently,
44                  * we cannot deadlock on hb_dest lock below, if its
45                  * already locked it will be released soon since
46                  * other caller cannot be waiting for hb lock
47                  * that we've taken above.
48                  */
49                 spin_lock_nested(&hb_dest->chain_lock,
50                          SINGLE_DEPTH_NESTING);
51                 /* 节点加入目标桶的链表中 */
52                 hlist_add_head(&q->list, &hb_dest->chain);
53                 spin_unlock(&hb_dest->chain_lock);
54             }
55         }
56         spin_unlock(&hb->chain_lock);
57     }
58 
59     /* 设置重建标记和重建时间 */
60     f->rebuild = false;
61     f->last_rebuild_jiffies = jiffies;
62 out:
63     write_sequnlock_bh(&f->rnd_seqlock);
64 }
ip_frag_queue
  1 /* Add new segment to existing queue. */
  2 static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
  3 {
  4     struct sk_buff *prev, *next;
  5     struct net_device *dev;
  6     unsigned int fragsize;
  7     int flags, offset;
  8     int ihl, end;
  9     int err = -ENOENT;
 10     u8 ecn;
 11 
 12     /* 分片接收完毕 */
 13     if (qp->q.flags & INET_FRAG_COMPLETE)
 14         goto err;
 15 
 16     /* 
 17         不是本机发出的报文
 18         检测存在dos攻击
 19         则重新初始化队列
 20         初始化失败则释放
 21     */
 22     if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
 23         unlikely(ip_frag_too_far(qp)) &&
 24         unlikely(err = ip_frag_reinit(qp))) {
 25         ipq_kill(qp);
 26         goto err;
 27     }
 28 
 29     ecn = ip4_frag_ecn(ip_hdr(skb)->tos);
 30 
 31     /* 找到分片字段 */
 32     offset = ntohs(ip_hdr(skb)->frag_off);
 33 
 34     /* 取前三位标记字段 */
 35     flags = offset & ~IP_OFFSET;
 36 
 37     /* 取分片偏移 */
 38     offset &= IP_OFFSET;
 39 
 40     /* 计算实际偏移数 */
 41     offset <<= 3;        /* offset is in 8-byte chunks */
 42 
 43     /* ip头部长度 */
 44     ihl = ip_hdrlen(skb);
 45 
 46     /* Determine the position of this fragment. */
 47 
 48     /* 
 49         计算当前分片的结束位置
 50         数据长度需要减去ip头以及前面的偏移长度
 51     */
 52     end = offset + skb->len - skb_network_offset(skb) - ihl;
 53     err = -EINVAL;
 54 
 55     /* Is this the final fragment? */
 56     /* 如果是最后一个分片 */
 57     if ((flags & IP_MF) == 0) {
 58         /* If we already have some bits beyond end
 59          * or have different end, the segment is corrupted.
 60          */
 61         /* 当前分片结束为止小于分片总位置,有超过这个位置的数据 */
 62         /* 已经接收到最后分片,两个分片位置不同 */
 63         if (end < qp->q.len ||
 64             ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
 65             goto err;
 66 
 67         /* 标记最后一个分片到达*/
 68         qp->q.flags |= INET_FRAG_LAST_IN;
 69 
 70         /* 设置长度为结束位置偏移 */
 71         qp->q.len = end;
 72     }
 73     /* 不是最后一个分片*/
 74     else {
 75         /* 不是8字节对齐 */
 76         if (end&7) {
 77             /* 截断成8字节对齐 */
 78             end &= ~7;
 79 
 80             /* 需要重新计算校验和 */
 81             if (skb->ip_summed != CHECKSUM_UNNECESSARY)
 82                 skb->ip_summed = CHECKSUM_NONE;
 83         }
 84 
 85         /* 接收到新片在已接收分片之后 */
 86         if (end > qp->q.len) {
 87             /* Some bits beyond end -> corruption. */
 88             /* 最后一片已经达到,数据溢出 */
 89             if (qp->q.flags & INET_FRAG_LAST_IN)
 90                 goto err;
 91 
 92             /* 记录最大偏移分片结束偏移 */
 93             qp->q.len = end;
 94         }
 95     }
 96 
 97     /* 无数据 */
 98     if (end == offset)
 99         goto err;
100 
101     err = -ENOMEM;
102 
103     /* 去掉ip头 */
104     if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
105         goto err;
106 
107     /* 调整有效负载 */
108     err = pskb_trim_rcsum(skb, end - offset);
109     if (err)
110         goto err;
111 
112     /* Find out which fragments are in front and at the back of us
113      * in the chain of fragments so far.  We must know where to put
114      * this fragment, right?
115      */
116     /* 设置前一个分片为最后一个分片 */
117     prev = qp->q.fragments_tail;
118 
119     /* 只有当前分片或者 最后一个分片的偏移小于当前分片偏移 */
120     if (!prev || FRAG_CB(prev)->offset < offset) {
121         next = NULL;
122         /* 找到该位置 */
123         goto found;
124     }
125 
126     /* 分片在前面 */
127     
128     prev = NULL;
129     /* 遍历分片列表 */
130     for (next = qp->q.fragments; next != NULL; next = next->next) {
131         /* 找到当前分片的下一个分片 */
132         if (FRAG_CB(next)->offset >= offset)
133             break;    /* bingo! */
134 
135         /* 记录前一个分片 */
136         prev = next;
137     }
138 
139 found:
140     /* We found where to put this one.  Check for overlap with
141      * preceding fragment, and, if needed, align things so that
142      * any overlaps are eliminated.
143      */
144 
145     /* 与前一片有重叠 */
146 
147     /* 存在前一个分片位置 */
148     if (prev) {
149 
150         /* 计算重叠部分 */
151         int i = (FRAG_CB(prev)->offset + prev->len) - offset;
152 
153         /* 如果有重叠 */
154         if (i > 0) {
155             /* 偏移去掉重叠部分 */
156             offset += i;
157             err = -EINVAL;
158 
159             /* 去掉重叠后超过了尾端 */
160             if (end <= offset)
161                 goto err;
162             err = -ENOMEM;
163             /* 去掉重叠部分 */
164             if (!pskb_pull(skb, i))
165                 goto err;
166             /* 需要重新计算校验和 */
167             if (skb->ip_summed != CHECKSUM_UNNECESSARY)
168                 skb->ip_summed = CHECKSUM_NONE;
169         }
170     }
171 
172     err = -ENOMEM;
173 
174     /* 与后片有重叠 */
175 
176     /* 存在下一个分片&& 分片偏移与当前有重叠 */
177     while (next && FRAG_CB(next)->offset < end) {
178 
179         /*  计算重叠部分 */
180         int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
181 
182         /* 重叠部分未超过下一分片总长度 */
183         if (i < next->len) {
184             /* Eat head of the next overlapped fragment
185              * and leave the loop. The next ones cannot overlap.
186              */
187             /* 去掉下一个分片的重叠部分 */
188             if (!pskb_pull(next, i))
189                 goto err;
190             /* 计算偏移 */
191             FRAG_CB(next)->offset += i;
192 
193             /* 减少已经接收到的长度 */
194             qp->q.meat -= i;
195 
196             /* 需要重新计算校验和 */
197             if (next->ip_summed != CHECKSUM_UNNECESSARY)
198                 next->ip_summed = CHECKSUM_NONE;
199             break;
200         } 
201         /* 重叠部分超过一片 */
202         else {
203 
204             /* 记录当前分片用于释放 */
205             struct sk_buff *free_it = next;
206 
207             /* Old fragment is completely overridden with
208              * new one drop it.
209              */
210             /* 记录下一分片 */
211             next = next->next;
212 
213             /* 调整指针 */
214             if (prev)
215                 prev->next = next;
216             else
217                 qp->q.fragments = next;
218 
219             /* 减少接收到长度 */
220             qp->q.meat -= free_it->len;
221 
222             /* 减少内存统计 */
223             sub_frag_mem_limit(qp->q.net, free_it->truesize);
224 
225             /* 释放分片 */
226             kfree_skb(free_it);
227         }
228 
229         /* 继续判断新的下一片是否有重叠 */
230     }
231 
232     /* 设置新的偏移 */
233     FRAG_CB(skb)->offset = offset;
234 
235     /* Insert this fragment in the chain of fragments. */
236 
237     /* 插入该分片节点 */
238     skb->next = next;
239     if (!next)
240         qp->q.fragments_tail = skb;
241     if (prev)
242         prev->next = skb;
243     else
244         qp->q.fragments = skb;
245 
246     /* 记录设备的输入接口 */
247     dev = skb->dev;
248     if (dev) {
249         qp->iif = dev->ifindex;
250         skb->dev = NULL;
251     }
252 
253     /* 设置时间戳 */
254     qp->q.stamp = skb->tstamp;
255     /* 设置接收到的数据长度 */
256     qp->q.meat += skb->len;
257     qp->ecn |= ecn;
258     /* 增加内存统计 */
259     add_frag_mem_limit(qp->q.net, skb->truesize);
260 
261     /* 偏移为0,则标记首片到达 */
262     if (offset == 0)
263         qp->q.flags |= INET_FRAG_FIRST_IN;
264 
265     /* 分片大小 */
266     fragsize = skb->len + ihl;
267 
268     /* 记录最大分片大小 */
269     if (fragsize > qp->q.max_size)
270         qp->q.max_size = fragsize;
271 
272     /* 记录最大不允许分片的大小 */
273     if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
274         fragsize > qp->max_df_size)
275         qp->max_df_size = fragsize;
276 
277     /* 收尾分片均已到达,接收长度与分片长度一致 */
278     if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
279         qp->q.meat == qp->q.len) {
280         unsigned long orefdst = skb->_skb_refdst;
281 
282         skb->_skb_refdst = 0UL;
283         /* 重组成新的ip包 */
284         err = ip_frag_reasm(qp, prev, dev);
285         skb->_skb_refdst = orefdst;
286         return err;
287     }
288 
289     /* 释放路由引用 */
290     skb_dst_drop(skb);
291     
292     /* 缓存了该包 */
293     return -EINPROGRESS;
294 
295 err:
296     kfree_skb(skb);
297     return err;
298 }
ip_frag_reasm
  1 /* Build a new IP datagram from all its fragments. */
  2 
  3 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
  4              struct net_device *dev)
  5 {
  6     struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
  7     struct iphdr *iph;
  8     struct sk_buff *fp, *head = qp->q.fragments;
  9     int len;
 10     int ihlen;
 11     int err;
 12     u8 ecn;
 13 
 14     /* 移除队列 */
 15     ipq_kill(qp);
 16 
 17     ecn = ip_frag_ecn_table[qp->ecn];
 18     if (unlikely(ecn == 0xff)) {
 19         err = -EINVAL;
 20         goto out_fail;
 21     }
 22     /* Make the one we just received the head. */
 23 
 24     /* 如果前一片存在 */
 25     if (prev) {
 26         /* 头部为当前片 */
 27         head = prev->next;
 28 
 29         /* 克隆当前片 */
 30         fp = skb_clone(head, GFP_ATOMIC);
 31         if (!fp)
 32             goto out_nomem;
 33 
 34         /* 设置下一片指针 */
 35         fp->next = head->next;
 36 
 37         /* 下一片为空则记录尾指针 */
 38         if (!fp->next)
 39             qp->q.fragments_tail = fp;
 40 
 41         //加入当前片
 42         prev->next = fp;
 43 
 44         /* 替换头部,释放原有头部 */
 45         skb_morph(head, qp->q.fragments);
 46         head->next = qp->q.fragments->next;
 47 
 48         consume_skb(qp->q.fragments);
 49         qp->q.fragments = head;
 50     }
 51 
 52     WARN_ON(!head);
 53     WARN_ON(FRAG_CB(head)->offset != 0);
 54 
 55     /* Allocate a new buffer for the datagram. */
 56 
 57     /* 计算新的ip包空间 */
 58     ihlen = ip_hdrlen(head);
 59     len = ihlen + qp->q.len;
 60 
 61     err = -E2BIG;
 62 
 63     /* 长度超过最大值 */
 64     if (len > 65535)
 65         goto out_oversize;
 66 
 67     /* Head of list must not be cloned. */
 68     /* 头部不能是克隆的 */
 69     if (skb_unclone(head, GFP_ATOMIC))
 70         goto out_nomem;
 71 
 72     /* If the first fragment is fragmented itself, we split
 73      * it to two chunks: the first with data and paged part
 74      * and the second, holding only fragments. */
 75 
 76     /* 如果头部有fraglist ,将其分开成两个部分,头不能有frag_list*/
 77     if (skb_has_frag_list(head)) {
 78         struct sk_buff *clone;
 79         int i, plen = 0;
 80 
 81         clone = alloc_skb(0, GFP_ATOMIC);
 82         if (!clone)
 83             goto out_nomem;
 84 
 85         /* 分开后的链接到头部下一个 */
 86         clone->next = head->next;
 87         head->next = clone;
 88 
 89         /* 将fraglist给clone */
 90         skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
 91         skb_frag_list_init(head);
 92 
 93         /* 重新调整长度等 */
 94         for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
 95             plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
 96         clone->len = clone->data_len = head->data_len - plen;
 97         head->data_len -= clone->len;
 98         head->len -= clone->len;
 99         clone->csum = 0;
100         clone->ip_summed = head->ip_summed;
101         add_frag_mem_limit(qp->q.net, clone->truesize);
102     }
103 
104     /* 将后面的分片链接到frag_list上 */
105     skb_shinfo(head)->frag_list = head->next;
106     skb_push(head, head->data - skb_network_header(head));
107 
108     /*统计分片长度等信息 */
109     for (fp=head->next; fp; fp = fp->next) {
110         head->data_len += fp->len;
111         head->len += fp->len;
112         if (head->ip_summed != fp->ip_summed)
113             head->ip_summed = CHECKSUM_NONE;
114         else if (head->ip_summed == CHECKSUM_COMPLETE)
115             head->csum = csum_add(head->csum, fp->csum);
116         head->truesize += fp->truesize;
117     }
118     sub_frag_mem_limit(qp->q.net, head->truesize);
119 
120     /* 设置新的ip包字段值 */
121     head->next = NULL;
122     head->dev = dev;
123     head->tstamp = qp->q.stamp;
124     /* 分片流程中如果frag_max_size比MTU小,则使用frag_max_size作为分片MTU */
125     IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
126 
127     iph = ip_hdr(head);
128     iph->tot_len = htons(len);
129     iph->tos |= ecn;
130 
131     /* When we set IP_DF on a refragmented skb we must also force a
132      * call to ip_fragment to avoid forwarding a DF-skb of size s while
133      * original sender only sent fragments of size f (where f < s).
134      *
135      * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest
136      * frag seen to avoid sending tiny DF-fragments in case skb was built
137      * from one very small df-fragment and one large non-df frag.
138      */
139      /*  
140        设置了DF标记,则输出过程中需要强制进入分片流程,
141        来限制DF分片的大小, 不能超过原始原始的大小
142 
143        最大分片长度==最大不分片的长度,打标记DF/IPSKB_FRAG_PMTU,
144        以避免发送小的DF分片和大的非DF分片  
145     */
146     if (qp->max_df_size == qp->q.max_size) {
147         /* 设置FRAG_PMTU */
148         IPCB(head)->flags |= IPSKB_FRAG_PMTU;
149         /* 设置不分片 */
150         iph->frag_off = htons(IP_DF);
151     } 
152     /* 否则不设置标记 */
153     else {
154         iph->frag_off = 0;
155     }
156 
157     /* 计算校验和 */
158     ip_send_check(iph);
159 
160     __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
161 
162     /* 重置队列标记 */
163     qp->q.fragments = NULL;
164     qp->q.fragments_tail = NULL;
165     return 0;
166 
167 out_nomem:
168     net_dbg_ratelimited("queue_glue: no memory for gluing queue %p
", qp);
169     err = -ENOMEM;
170     goto out_fail;
171 out_oversize:
172     net_info_ratelimited("Oversized IP packet from %pI4
", &qp->saddr);
173 out_fail:
174     __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
175     return err;
176 }
原文地址:https://www.cnblogs.com/wanpengcoder/p/7604715.html