TCP被动打开 之 第三次握手-接收ACK

假定客户端主动打开,发送syn包到服务器,服务器创建连接请求控制块加入到队列,进入TCP_NEW_SYN_RECV 状态,发送syn+ack给客户端,并启动定时器,等待客户端回复最后一个握手ack;

tcp_v4_rcv上来的包,会判断连接状态,当状态为TCP_NEW_SYN_RECV时,期望得到对端发来的ack,以完成三次握手正式建立连接;函数通过调用tcp_check_req处理ack,成功会返回新建的子控制块,然后调用tcp_child_process进行进一步的处理,包括更新状态为已连接状态,通知正在等待的应用程序等;

 1 int tcp_v4_rcv(struct sk_buff *skb)
 2 {
 3         /* 省略一些无关代码 */
 4 
 5     if (sk->sk_state == TCP_NEW_SYN_RECV) {
 6         struct request_sock *req = inet_reqsk(sk);
 7         struct sock *nsk;
 8 
 9         /* 获取控制块 */
10         sk = req->rsk_listener;
11         if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
12             sk_drops_add(sk, skb);
13             reqsk_put(req);
14             goto discard_it;
15         }
16 
17         /* 不是listen状态 */
18         if (unlikely(sk->sk_state != TCP_LISTEN)) {
19             /* 从连接队列移除控制块 */
20             inet_csk_reqsk_queue_drop_and_put(sk, req);
21 
22             /* 根据skb参数重新查找控制块 */
23             goto lookup;
24         }
25         /* We own a reference on the listener, increase it again
26          * as we might lose it too soon.
27          */
28         sock_hold(sk);
29         refcounted = true;
30 
31         /* 处理第三次握手ack,成功返回新控制块 */
32         nsk = tcp_check_req(sk, skb, req, false);
33 
34         /* 失败 */
35         if (!nsk) {
36             reqsk_put(req);
37             goto discard_and_relse;
38         }
39 
40         /* 未新建控制块,进一步处理 */
41         if (nsk == sk) {
42             reqsk_put(req);
43         } 
44         /* 有新建控制块,进行初始化等 */
45         else if (tcp_child_process(sk, nsk, skb)) {
46             /* 失败发送rst */
47             tcp_v4_send_reset(nsk, skb);
48             goto discard_and_relse;
49         } else {
50             sock_put(sk);
51             return 0;
52         }
53     }
54 
55     /* 省略一些无关代码 */
56 }

tcp_check_req为处理ack的核心流程,除了各种状态的检查之外,最主要的是在状态检查通过之后(1)调用child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, req, &own_req);创建子控制块,这里需要注意,子控制块的状态为TCP_SYN_RECV,这与刚收到syn建立的控制块状态不一样,那时创建的控制块为TCP_NEW_SYN_RECV;然后(2)将请求控制块从未完成连接队列中删除,加入到已完成连接队列中;

  1 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
  2                struct request_sock *req,
  3                bool fastopen)
  4 {
  5     struct tcp_options_received tmp_opt;
  6     struct sock *child;
  7     const struct tcphdr *th = tcp_hdr(skb);
  8     __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
  9     bool paws_reject = false;
 10     bool own_req;
 11 
 12     tmp_opt.saw_tstamp = 0;
 13 
 14     /* 如果有tcp选项 */
 15     if (th->doff > (sizeof(struct tcphdr)>>2)) {
 16 
 17         /* 解析选项 */
 18         tcp_parse_options(skb, &tmp_opt, 0, NULL);
 19 
 20         /* 有时间戳选项处理 */
 21         if (tmp_opt.saw_tstamp) {
 22             tmp_opt.ts_recent = req->ts_recent;
 23             if (tmp_opt.rcv_tsecr)
 24                 tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
 25             /* We do not store true stamp, but it is not required,
 26              * it can be estimated (approximately)
 27              * from another data.
 28              */
 29             /* 序号回绕检查 */
 30             tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
 31             paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
 32         }
 33     }
 34 
 35     /* Check for pure retransmitted SYN. */
 36     /* 客户端重传的syn包 */
 37     if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
 38         flg == TCP_FLAG_SYN &&
 39         !paws_reject) {
 40         /*
 41          * RFC793 draws (Incorrectly! It was fixed in RFC1122)
 42          * this case on figure 6 and figure 8, but formal
 43          * protocol description says NOTHING.
 44          * To be more exact, it says that we should send ACK,
 45          * because this segment (at least, if it has no data)
 46          * is out of window.
 47          *
 48          *  CONCLUSION: RFC793 (even with RFC1122) DOES NOT
 49          *  describe SYN-RECV state. All the description
 50          *  is wrong, we cannot believe to it and should
 51          *  rely only on common sense and implementation
 52          *  experience.
 53          *
 54          * Enforce "SYN-ACK" according to figure 8, figure 6
 55          * of RFC793, fixed by RFC1122.
 56          *
 57          * Note that even if there is new data in the SYN packet
 58          * they will be thrown away too.
 59          *
 60          * Reset timer after retransmitting SYNACK, similar to
 61          * the idea of fast retransmit in recovery.
 62          */
 63         /* 限速检查 */
 64         if (!tcp_oow_rate_limited(sock_net(sk), skb,
 65                       LINUX_MIB_TCPACKSKIPPEDSYNRECV,
 66                       &tcp_rsk(req)->last_oow_ack_time) &&
 67             /* 重新发送syn+ack */
 68             !inet_rtx_syn_ack(sk, req)) {
 69 
 70             /* 计算超时时间,调整定时器 */
 71             unsigned long expires = jiffies;
 72 
 73             expires += min(TCP_TIMEOUT_INIT << req->num_timeout,
 74                        TCP_RTO_MAX);
 75             if (!fastopen)
 76                 mod_timer_pending(&req->rsk_timer, expires);
 77             else
 78                 req->rsk_timer.expires = expires;
 79         }
 80 
 81         /* 处理完毕,无需后续处理 */
 82         return NULL;
 83     }
 84 
 85     /* Further reproduces section "SEGMENT ARRIVES"
 86        for state SYN-RECEIVED of RFC793.
 87        It is broken, however, it does not work only
 88        when SYNs are crossed.
 89 
 90        You would think that SYN crossing is impossible here, since
 91        we should have a SYN_SENT socket (from connect()) on our end,
 92        but this is not true if the crossed SYNs were sent to both
 93        ends by a malicious third party.  We must defend against this,
 94        and to do that we first verify the ACK (as per RFC793, page
 95        36) and reset if it is invalid.  Is this a true full defense?
 96        To convince ourselves, let us consider a way in which the ACK
 97        test can still pass in this 'malicious crossed SYNs' case.
 98        Malicious sender sends identical SYNs (and thus identical sequence
 99        numbers) to both A and B:
100 
101         A: gets SYN, seq=7
102         B: gets SYN, seq=7
103 
104        By our good fortune, both A and B select the same initial
105        send sequence number of seven :-)
106 
107         A: sends SYN|ACK, seq=7, ack_seq=8
108         B: sends SYN|ACK, seq=7, ack_seq=8
109 
110        So we are now A eating this SYN|ACK, ACK test passes.  So
111        does sequence test, SYN is truncated, and thus we consider
112        it a bare ACK.
113 
114        If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
115        bare ACK.  Otherwise, we create an established connection.  Both
116        ends (listening sockets) accept the new incoming connection and try
117        to talk to each other. 8-)
118 
119        Note: This case is both harmless, and rare.  Possibility is about the
120        same as us discovering intelligent life on another plant tomorrow.
121 
122        But generally, we should (RFC lies!) to accept ACK
123        from SYNACK both here and in tcp_rcv_state_process().
124        tcp_rcv_state_process() does not, hence, we do not too.
125 
126        Note that the case is absolutely generic:
127        we cannot optimize anything here without
128        violating protocol. All the checks must be made
129        before attempt to create socket.
130      */
131 
132     /* RFC793 page 36: "If the connection is in any non-synchronized state ...
133      *                  and the incoming segment acknowledges something not yet
134      *                  sent (the segment carries an unacceptable ACK) ...
135      *                  a reset is sent."
136      *
137      * Invalid ACK: reset will be sent by listening socket.
138      * Note that the ACK validity check for a Fast Open socket is done
139      * elsewhere and is checked directly against the child socket rather
140      * than req because user data may have been sent out.
141      */
142     /* ACK但是序号对不上,返回原有控制块,外面不做处理 */
143     if ((flg & TCP_FLAG_ACK) && !fastopen &&
144         (TCP_SKB_CB(skb)->ack_seq !=
145          tcp_rsk(req)->snt_isn + 1))
146         return sk;
147 
148     /* Also, it would be not so bad idea to check rcv_tsecr, which
149      * is essentially ACK extension and too early or too late values
150      * should cause reset in unsynchronized states.
151      */
152 
153     /* RFC793: "first check sequence number". */
154     /* 无效序号,且接收数据不在窗口范围内 */
155     if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
156                       tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) {
157         /* Out of window: send ACK and drop. */
158         /* 如果不是rst,则给对端发送ack */
159         if (!(flg & TCP_FLAG_RST) &&
160             !tcp_oow_rate_limited(sock_net(sk), skb,
161                       LINUX_MIB_TCPACKSKIPPEDSYNRECV,
162                       &tcp_rsk(req)->last_oow_ack_time))
163             req->rsk_ops->send_ack(sk, skb, req);
164         if (paws_reject)
165             __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
166         return NULL;
167     }
168 
169     /* In sequence, PAWS is OK. */
170 
171     /* 有时间戳选项,序号合法,则记录时间戳 */
172     if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
173         req->ts_recent = tmp_opt.rcv_tsval;
174 
175     /*如果序号是syn序号,已经在窗口外,清除syn标记 */
176     if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
177         /* Truncate SYN, it is out of window starting
178            at tcp_rsk(req)->rcv_isn + 1. */
179         flg &= ~TCP_FLAG_SYN;
180     }
181 
182     /* RFC793: "second check the RST bit" and
183      *       "fourth, check the SYN bit"
184      */
185     /* 
186         有rst标记或者syn标记,上面已经检查了syn重传包了,
187         这里有syn一定是问题包,
188         则需要复位未完成的连接 
189     */
190     if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
191         __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
192         goto embryonic_reset;
193     }
194 
195     /* ACK sequence verified above, just make sure ACK is
196      * set.  If ACK not set, just silently drop the packet.
197      *
198      * XXX (TFO) - if we ever allow "data after SYN", the
199      * following check needs to be removed.
200      */
201 
202     /* 上面流程保证了有ack,若没有,直接返回 */
203     if (!(flg & TCP_FLAG_ACK))
204         return NULL;
205 
206     /* For Fast Open no more processing is needed (sk is the
207      * child socket).
208      */
209     if (fastopen)
210         return sk;
211 
212     /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
213     /* 设置了DEFER_ACCEPT,直接丢弃该ack,后面有数据的包在建立连接 */
214     if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
215         TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
216         inet_rsk(req)->acked = 1;
217         __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
218         return NULL;
219     }
220 
221     /* OK, ACK is valid, create big socket and
222      * feed this segment to it. It will repeat all
223      * the tests. THIS SEGMENT MUST MOVE SOCKET TO
224      * ESTABLISHED STATE. If it will be dropped after
225      * socket is created, wait for troubles.
226      */
227     /* ack有效,创建子控制块,注意子控制块的状态为TCP_SYN_RECV */
228     child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
229                              req, &own_req);
230     /* 创建失败 */
231     if (!child)
232         goto listen_overflow;
233 
234     sock_rps_save_rxhash(child, skb);
235     /* 计算三次握手中synack-ack消耗的时间 */
236     tcp_synack_rtt_meas(child, req);
237     /* 从未完成队列删除原控制块,加入到已完成队列 */
238     return inet_csk_complete_hashdance(sk, child, req, own_req);
239 
240 listen_overflow:
241     /* 服务器原因未建立连接的,打个标记,后续再发送syn+ack */
242     if (!sysctl_tcp_abort_on_overflow) {
243         inet_rsk(req)->acked = 1;
244         return NULL;
245     }
246 
247 embryonic_reset:
248 
249     /* 不合法的syn包,发送rst */
250     if (!(flg & TCP_FLAG_RST)) {
251         /* Received a bad SYN pkt - for TFO We try not to reset
252          * the local connection unless it's really necessary to
253          * avoid becoming vulnerable to outside attack aiming at
254          * resetting legit local connections.
255          */
256         req->rsk_ops->send_reset(sk, skb);
257     } else if (fastopen) { /* received a valid RST pkt */
258         reqsk_fastopen_remove(sk, req, true);
259         tcp_reset(sk);
260     }
261 
262     /* 从连接请求队列删除控制块 */
263     if (!fastopen) {
264         inet_csk_reqsk_queue_drop(sk, req);
265         __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
266     }
267     return NULL;
268 }

tcp_child_process对新控制块进行进一步处理,在控制块未被用户进程锁定的情况下,调用tcp_rcv_state_process进行相关初始化,并将连接状态更新到TCP_ESTABLISHED已连接状态,之后通知等待进程;如果控制块被用户进程锁住,则将数据加入到控制块的后备队列中延后处理;

 1 /*
 2  * Queue segment on the new socket if the new socket is active,
 3  * otherwise we just shortcircuit this and continue with
 4  * the new socket.
 5  *
 6  * For the vast majority of cases child->sk_state will be TCP_SYN_RECV
 7  * when entering. But other states are possible due to a race condition
 8  * where after __inet_lookup_established() fails but before the listener
 9  * locked is obtained, other packets cause the same connection to
10  * be created.
11  */
12 
13 int tcp_child_process(struct sock *parent, struct sock *child,
14               struct sk_buff *skb)
15 {
16     int ret = 0;
17     int state = child->sk_state;
18 
19     /* record NAPI ID of child */
20     sk_mark_napi_id(child, skb);
21 
22     /* 记录数据分段数 */
23     tcp_segs_in(tcp_sk(child), skb);
24 
25     /* 未被用户层锁住 */
26     if (!sock_owned_by_user(child)) {
27 
28         /* 子控制块状态的进一步处理 */
29         ret = tcp_rcv_state_process(child, skb);
30         /* Wakeup parent, send SIGIO */
31         /* 唤醒该套接口的等待进程 */
32         if (state == TCP_SYN_RECV && child->sk_state != state)
33             parent->sk_data_ready(parent);
34     } 
35     /* 被用户层锁住,加入后备队列 */
36     else {
37         /* Alas, it is possible again, because we do lookup
38          * in main socket hash table and lock on listening
39          * socket does not protect us more.
40          */
41         __sk_add_backlog(child, skb);
42     }
43 
44     bh_unlock_sock(child);
45     sock_put(child);
46     return ret;
47 }

tcp_rcv_state_process对于TCP_SYN_RECV的处理主要是完成连接建立之前的必要初始化,以及将连接状态更新为TCP_ESTABLISHED,通知进程可写入数据,判断并标记快慢路等;其中前后的公共流程,这里没有给出;

 1 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 2 {
 3     /* step 5: check the ACK field */
 4     acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
 5                       FLAG_UPDATE_TS_RECENT) > 0;
 6 
 7     switch (sk->sk_state) {
 8     case TCP_SYN_RECV:
 9 
10         /* ack处理失败 */
11         if (!acceptable)
12             return 1;
13 
14         /* RTT */
15         if (!tp->srtt_us)
16             tcp_synack_rtt_meas(sk, req);
17 
18         /* Once we leave TCP_SYN_RECV, we no longer need req
19          * so release it.
20          */
21         if (req) {
22             inet_csk(sk)->icsk_retransmits = 0;
23             reqsk_fastopen_remove(sk, req, false);
24         } else {
25             /* Make sure socket is routed, for correct metrics. */
26             /* 检查重建路由 */
27             icsk->icsk_af_ops->rebuild_header(sk);
28             /* 初始化拥塞邋控制 */
29             tcp_init_congestion_control(sk);
30             /* 路径mtu发现初始化 */
31             tcp_mtup_init(sk);
32             /* 用户待读取数据初始化 */
33             tp->copied_seq = tp->rcv_nxt;
34             /* 调整接收发送缓存以及窗口等 */
35             tcp_init_buffer_space(sk);
36         }
37         smp_mb();
38 
39         /* 连接更新为已连接状态 */
40         tcp_set_state(sk, TCP_ESTABLISHED);
41         sk->sk_state_change(sk);
42 
43         /* Note, that this wakeup is only for marginal crossed SYN case.
44          * Passively open sockets are not waked up, because
45          * sk->sk_sleep == NULL and sk->sk_socket == NULL.
46          */
47         /* 通知进程可以发送数据 */
48         if (sk->sk_socket)
49             sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
50 
51         /* 初始化窗口相关字段 */
52         tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
53         tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
54         tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
55 
56         /* 如果有时间戳,mss减去时间戳选项长度 */
57         if (tp->rx_opt.tstamp_ok)
58             tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
59 
60         if (req) {
61             /* Re-arm the timer because data may have been sent out.
62              * This is similar to the regular data transmission case
63              * when new data has just been ack'ed.
64              *
65              * (TFO) - we could try to be more aggressive and
66              * retransmitting any data sooner based on when they
67              * are sent out.
68              */
69             tcp_rearm_rto(sk);
70         } 
71         /* 根据路由缓存信息初始化控制块 */
72         else
73             tcp_init_metrics(sk);
74 
75         if (!inet_csk(sk)->icsk_ca_ops->cong_control)
76             tcp_update_pacing_rate(sk);
77 
78         /* Prevent spurious tcp_cwnd_restart() on first data packet */
79         tp->lsndtime = tcp_time_stamp;
80 
81         /* 初始化rcv_mss */
82         tcp_initialize_rcv_mss(sk);
83 
84         /* 快路检查和标记 */
85         tcp_fast_path_on(tp);
86         break;
87 }
原文地址:https://www.cnblogs.com/wanpengcoder/p/11750832.html