TCP主动打开 之 第一次握手-发送SYN

tcp客户端与服务器端建立连接需要经过三次握手过程,本文主要分析客户端主动打开中的第一次握手部分,即客户端发送syn段到服务器端;

tcp_v4_connect为发起连接主流程,首先对必要参数进行检查,获取路由信息,改变连接状态成SYN_SENT,再调用inet_hash_connect将控制块加入到ehash,最后调用tcp_connect发送syn;

  1 /* This will initiate an outgoing connection. */
  2 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
  3 {
  4     struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
  5     struct inet_sock *inet = inet_sk(sk);
  6     struct tcp_sock *tp = tcp_sk(sk);
  7     __be16 orig_sport, orig_dport;
  8     __be32 daddr, nexthop;
  9     struct flowi4 *fl4;
 10     struct rtable *rt;
 11     int err;
 12     struct ip_options_rcu *inet_opt;
 13 
 14     /* timewait控制块结构 */
 15     struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 16 
 17     /* 地址长度不合法 */
 18     if (addr_len < sizeof(struct sockaddr_in))
 19         return -EINVAL;
 20 
 21     /* 地址族不合法 */
 22     if (usin->sin_family != AF_INET)
 23         return -EAFNOSUPPORT;
 24 
 25     /* 设置下一跳和目的地址 */
 26     nexthop = daddr = usin->sin_addr.s_addr;
 27 
 28     /* 获取ip选项 */
 29     inet_opt = rcu_dereference_protected(inet->inet_opt,
 30                          lockdep_sock_is_held(sk));
 31 
 32     /* 使用了源路由选项 */
 33     if (inet_opt && inet_opt->opt.srr) {
 34         if (!daddr)
 35             return -EINVAL;
 36         /* 下一跳地址设置为选项中的地址 */
 37         nexthop = inet_opt->opt.faddr;
 38     }
 39 
 40     /* 获取源端口目的端口 */
 41     orig_sport = inet->inet_sport;
 42     orig_dport = usin->sin_port;
 43 
 44     /* 查找路由 */
 45     fl4 = &inet->cork.fl.u.ip4;
 46     rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
 47                   RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 48                   IPPROTO_TCP,
 49                   orig_sport, orig_dport, sk);
 50     /* 查找失败 */
 51     if (IS_ERR(rt)) {
 52         err = PTR_ERR(rt);
 53         if (err == -ENETUNREACH)
 54             IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 55         return err;
 56     }
 57 
 58 
 59     /* 查找成功 */
 60 
 61     /* 路由是组播或者广播 */
 62     if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 63         ip_rt_put(rt);
 64         return -ENETUNREACH;
 65     }
 66 
 67     /* 选项为空或者未启用源路由选项 */
 68     /* 设置目的地址为路由缓存中地址 */
 69     if (!inet_opt || !inet_opt->opt.srr)
 70         daddr = fl4->daddr;
 71 
 72     /* 源地址为空 */
 73     /* 使用路由缓存中的源地址 */
 74     if (!inet->inet_saddr)
 75         inet->inet_saddr = fl4->saddr;
 76     /* 设置接收地址为源地址 */
 77     sk_rcv_saddr_set(sk, inet->inet_saddr);
 78 
 79     /* 控制块中的时间戳存在&& 目的地址不是当前地址 */
 80     /* 控制块被使用过,重新初始化 */
 81     if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 82         /* Reset inherited state */
 83         tp->rx_opt.ts_recent       = 0;
 84         tp->rx_opt.ts_recent_stamp = 0;
 85         if (likely(!tp->repair))
 86             tp->write_seq       = 0;
 87     }
 88 
 89     /* 设置目的端口 */
 90     inet->inet_dport = usin->sin_port;
 91     /* 设置目的地址 */
 92     sk_daddr_set(sk, daddr);
 93 
 94     /* 获取ip选项长度 */
 95     inet_csk(sk)->icsk_ext_hdr_len = 0;
 96     if (inet_opt)
 97         inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 98 
 99     /* 设置mss */
100     tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
101 
102     /* Socket identity is still unknown (sport may be zero).
103      * However we set state to SYN-SENT and not releasing socket
104      * lock select source port, enter ourselves into the hash tables and
105      * complete initialization after this.
106      */
107     /* 设置连接状态为TCP_SYN_SENT */
108     tcp_set_state(sk, TCP_SYN_SENT);
109 
110     /* 端口绑定,加入ehash */
111     err = inet_hash_connect(tcp_death_row, sk);
112     if (err)
113         goto failure;
114 
115     /* 设置hash值 */
116     sk_set_txhash(sk);
117 
118     /* 
119         如果源端口或者目的端口发生变化,
120         重新获取路由,并更新sk的路由缓存 
121     */
122     rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
123                    inet->inet_sport, inet->inet_dport, sk);
124     if (IS_ERR(rt)) {
125         err = PTR_ERR(rt);
126         rt = NULL;
127         goto failure;
128     }
129     /* OK, now commit destination to socket.  */
130     sk->sk_gso_type = SKB_GSO_TCPV4;
131 
132     /* 存储目的路由缓存和网络设备特性到控制块 */
133     sk_setup_caps(sk, &rt->dst);
134     rt = NULL;
135 
136     if (likely(!tp->repair)) {
137         /* 获取发送序号 */
138         if (!tp->write_seq)
139             tp->write_seq = secure_tcp_seq(inet->inet_saddr,
140                                inet->inet_daddr,
141                                inet->inet_sport,
142                                usin->sin_port);
143         /* 时间戳偏移 */
144         tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr,
145                          inet->inet_daddr);
146     }
147 
148     /* 设置ip首部的id */
149     inet->inet_id = tp->write_seq ^ jiffies;
150 
151     /* fastopen */
152     if (tcp_fastopen_defer_connect(sk, &err))
153         return err;
154     if (err)
155         goto failure;
156 
157     /* 发送syn */
158     err = tcp_connect(sk);
159 
160     if (err)
161         goto failure;
162 
163     return 0;
164 
165 failure:
166     /*
167      * This unhashes the socket and releases the local port,
168      * if necessary.
169      */
170     tcp_set_state(sk, TCP_CLOSE);
171     ip_rt_put(rt);
172     sk->sk_route_caps = 0;
173     inet->inet_dport = 0;
174     return err;
175 }

__inet_hash_connect将端口检查通过的控制块加入到ehash;函数对是否设置端口进行了不同处理,若未设置端口,则需要查找一个端口;函数还调用check_established检查是否可以复用处在TIME_WAIT的控制块,以及调用inet_ehash_nolisten将端口对应的控制块加入的ehash;

  1 int __inet_hash_connect(struct inet_timewait_death_row *death_row,
  2         struct sock *sk, u32 port_offset,
  3         int (*check_established)(struct inet_timewait_death_row *,
  4             struct sock *, __u16, struct inet_timewait_sock **))
  5 {
  6     struct inet_hashinfo *hinfo = death_row->hashinfo;
  7     struct inet_timewait_sock *tw = NULL;
  8     struct inet_bind_hashbucket *head;
  9     int port = inet_sk(sk)->inet_num;
 10     struct net *net = sock_net(sk);
 11     struct inet_bind_bucket *tb;
 12     u32 remaining, offset;
 13     int ret, i, low, high;
 14     static u32 hint;
 15 
 16     /* 存在端口 */
 17     if (port) {
 18         head = &hinfo->bhash[inet_bhashfn(net, port,
 19                           hinfo->bhash_size)];
 20 
 21         /* 找到端口绑定信息 */
 22         tb = inet_csk(sk)->icsk_bind_hash;
 23         spin_lock_bh(&head->lock);
 24 
 25         /* 当前端口绑定的只有当前控制块 */
 26         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
 27             /* 将控制块加入只ehash */
 28             inet_ehash_nolisten(sk, NULL);
 29             spin_unlock_bh(&head->lock);
 30             return 0;
 31         }
 32         spin_unlock(&head->lock);
 33         /* No definite answer... Walk to established hash table */
 34         /* 检查复用情况 */
 35         ret = check_established(death_row, sk, port, NULL);
 36         local_bh_enable();
 37         return ret;
 38     }
 39 
 40 
 41     /* 没有确定端口,则随机端口 */
 42 
 43     inet_get_local_port_range(net, &low, &high);
 44     high++; /* [32768, 60999] -> [32768, 61000[ */
 45     remaining = high - low;
 46     if (likely(remaining > 1))
 47         remaining &= ~1U;
 48 
 49     offset = (hint + port_offset) % remaining;
 50     /* In first pass we try ports of @low parity.
 51      * inet_csk_get_port() does the opposite choice.
 52      */
 53     offset &= ~1U;
 54 other_parity_scan:
 55     port = low + offset;
 56 
 57     /* 遍历端口 */
 58     for (i = 0; i < remaining; i += 2, port += 2) {
 59         if (unlikely(port >= high))
 60             port -= remaining;
 61         /* 保留端口 */
 62         if (inet_is_local_reserved_port(net, port))
 63             continue;
 64 
 65         /* 找到端口对应的绑定hash桶 */
 66         head = &hinfo->bhash[inet_bhashfn(net, port,
 67                           hinfo->bhash_size)];
 68         spin_lock_bh(&head->lock);
 69 
 70         /* Does not bother with rcv_saddr checks, because
 71          * the established check is already unique enough.
 72          */
 73         /* 遍历绑定的链表中的节点 */
 74         inet_bind_bucket_for_each(tb, &head->chain) {
 75 
 76             /* 找到端口相同节点 */
 77             if (net_eq(ib_net(tb), net) && tb->port == port) {
 78 
 79                 /* 设置被重用了,继续找,随机端口不能重用 */
 80                 if (tb->fastreuse >= 0 ||
 81                     tb->fastreuseport >= 0)
 82                     goto next_port;
 83                 WARN_ON(hlist_empty(&tb->owners));
 84 
 85                 /* 检查timewait复用情况 */
 86                 if (!check_established(death_row, sk,
 87                                port, &tw))
 88                     goto ok;
 89                 goto next_port;
 90             }
 91         }
 92 
 93         /* 遍历没有重复 */
 94 
 95         /* 创建该端口的绑定信息节点,加入绑定hash */
 96         tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
 97                          net, head, port);
 98         if (!tb) {
 99             spin_unlock_bh(&head->lock);
100             return -ENOMEM;
101         }
102 
103         /* 设置默认重用标记 */
104         tb->fastreuse = -1;
105         tb->fastreuseport = -1;
106         goto ok;
107 next_port:
108         spin_unlock_bh(&head->lock);
109         cond_resched();
110     }
111 
112     /* 继续从下一半端口中找 */
113     offset++;
114     if ((offset & 1) && remaining > 1)
115         goto other_parity_scan;
116 
117     return -EADDRNOTAVAIL;
118 
119 ok:
120     hint += i + 2;
121 
122     /* Head lock still held and bh's disabled */
123 
124     /* 控制块加入该端口的使用者列表 */
125     inet_bind_hash(sk, tb, port);
126 
127     /* 初始化源端口,加入到ehash */
128     if (sk_unhashed(sk)) {
129         inet_sk(sk)->inet_sport = htons(port);
130         inet_ehash_nolisten(sk, (struct sock *)tw);
131     }
132     /*有timewait控制块则从bind列表中移除 */
133     if (tw)
134         inet_twsk_bind_unhash(tw, hinfo);
135     spin_unlock(&head->lock);
136 
137     /* 调度销毁timewait控制块 */
138     if (tw)
139         inet_twsk_deschedule_put(tw);
140     local_bh_enable();
141     return 0;
142 }

__inet_check_established用于检查与相同端口中处于TIME_WAIT状态的控制块是否可以复用;

 1 /* called with local bh disabled */
 2 static int __inet_check_established(struct inet_timewait_death_row *death_row,
 3                     struct sock *sk, __u16 lport,
 4                     struct inet_timewait_sock **twp)
 5 {
 6     struct inet_hashinfo *hinfo = death_row->hashinfo;
 7     struct inet_sock *inet = inet_sk(sk);
 8     __be32 daddr = inet->inet_rcv_saddr;
 9     __be32 saddr = inet->inet_daddr;
10     int dif = sk->sk_bound_dev_if;
11     INET_ADDR_COOKIE(acookie, saddr, daddr);
12     const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
13     struct net *net = sock_net(sk);
14     unsigned int hash = inet_ehashfn(net, daddr, lport,
15                      saddr, inet->inet_dport);
16     struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
17     spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
18     struct sock *sk2;
19     const struct hlist_nulls_node *node;
20     struct inet_timewait_sock *tw = NULL;
21 
22     spin_lock(lock);
23 
24     /* 遍历链表 */
25     sk_nulls_for_each(sk2, node, &head->chain) {
26 
27         /* hash不等 */
28         if (sk2->sk_hash != hash)
29             continue;
30 
31         /* 找到节点 */
32         if (likely(INET_MATCH(sk2, net, acookie,
33                      saddr, daddr, ports, dif))) {
34             /* 节点连接处于timewait状态 */
35             if (sk2->sk_state == TCP_TIME_WAIT) {
36                 tw = inet_twsk(sk2);
37 
38                 /* 可以复用 */
39                 if (twsk_unique(sk, sk2, twp))
40                     break;
41             }
42 
43             /* 不处于tw,或者不能复用 */
44             goto not_unique;
45         }
46     }
47 
48     /* Must record num and sport now. Otherwise we will see
49      * in hash table socket with a funny identity.
50      */
51     /* 设置端口和hash */
52     inet->inet_num = lport;
53     inet->inet_sport = htons(lport);
54     sk->sk_hash = hash;
55     WARN_ON(!sk_unhashed(sk));
56 
57     /* 节点加入ehash */
58     __sk_nulls_add_node_rcu(sk, &head->chain);
59     if (tw) {
60         /* 删除tw节点 */
61         sk_nulls_del_node_init_rcu((struct sock *)tw);
62         __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED);
63     }
64     spin_unlock(lock);
65 
66     /* 增加使用计数 */
67     sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
68 
69     /* 设置能复用的控制块 */
70     if (twp) {
71         *twp = tw;
72     } else if (tw) {
73         /* Silly. Should hash-dance instead... */
74         inet_twsk_deschedule_put(tw);
75     }
76     return 0;
77 
78 not_unique:
79     spin_unlock(lock);
80     return -EADDRNOTAVAIL;
81 }

inet_ehash_nolisten用于将控制块加入ehash,并根据结果做不同处理;

 1 /* 添加到ehash中 */
 2 bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
 3 {
 4     /* 添加到ehash中 */
 5     bool ok = inet_ehash_insert(sk, osk);
 6 
 7     if (ok) {
 8         /* 成功增加计数 */
 9         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
10     } else {
11         /* 增加孤儿数量 */
12         percpu_counter_inc(sk->sk_prot->orphan_count);
13         /* 标识连接关闭状态 */
14         sk->sk_state = TCP_CLOSE;
15         /* 设置销毁标记 */
16         sock_set_flag(sk, SOCK_DEAD);
17         /* 销毁控制块 */
18         inet_csk_destroy_sock(sk);
19     }
20     return ok;
21 }

tcp_connect用于构造syn包并发送之,发送之后需要设置syn包的重传定时器;

 1 /* Build a SYN and send it off. */
 2 int tcp_connect(struct sock *sk)
 3 {
 4     struct tcp_sock *tp = tcp_sk(sk);
 5     struct sk_buff *buff;
 6     int err;
 7 
 8     /* 检查重建路由 */
 9     if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
10         return -EHOSTUNREACH; /* Routing failure or similar. */
11 
12     /* 初始化控制块中与连接相关的成员 */
13     tcp_connect_init(sk);
14 
15     if (unlikely(tp->repair)) {
16         tcp_finish_connect(sk, NULL);
17         return 0;
18     }
19 
20     /* 分配skb */
21     buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
22     if (unlikely(!buff))
23         return -ENOBUFS;
24 
25     /* 无数据的skb相关控制信息初始化 */
26     tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
27 
28     /* 设置发送syn的时间 */
29     tp->retrans_stamp = tcp_time_stamp;
30 
31     /* 加入发送队列 */
32     tcp_connect_queue_skb(sk, buff);
33 
34     /* enc拥塞通告支持 */
35     tcp_ecn_send_syn(sk, buff);
36 
37     /* Send off SYN; include data in Fast Open. */
38     /* 发送syn */
39     err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
40           tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
41     if (err == -ECONNREFUSED)
42         return err;
43 
44     /* We change tp->snd_nxt after the tcp_transmit_skb() call
45      * in order to make this packet get counted in tcpOutSegs.
46      */
47     /* 设置序号信息 */
48     tp->snd_nxt = tp->write_seq;
49     tp->pushed_seq = tp->write_seq;
50     TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
51 
52     /* Timer for repeating the SYN until an answer. */
53     /* 启动重传定时器 */
54     inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
55                   inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
56     return 0;
57 }
原文地址:https://www.cnblogs.com/wanpengcoder/p/11750580.html