套接字之socket系统调用

scoket系统调用主要完成socket的创建,必要字段的初始化,关联传输控制块,绑定文件等任务,完成返回socket绑定的文件描述符;

1 /**
2  * socket函数调用关系
3  * sys_socket
4  *   |-->sock_create
5  *   |      |-->__sock_create
6  *   |            |-->inet_create           
7  *   |-->sock_map_fd
8  */
 1 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
 2 {
 3     int retval;
 4     struct socket *sock;
 5     int flags;
 6 
 7     /* Check the SOCK_* constants for consistency.  */
 8     BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
 9     BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
10     BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
11     BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
12 
13     /* 取得标志 */
14     flags = type & ~SOCK_TYPE_MASK;
15 
16     /* 除此标记之外还有标记,错误 */
17     if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
18         return -EINVAL;
19 
20     /* 取得类型 */
21     type &= SOCK_TYPE_MASK;
22 
23 
24     /* 标记以O_NONBLOCK为准 */
25     if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
26         flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
27 
28     /* 创建socket */
29     retval = sock_create(family, type, protocol, &sock);
30     if (retval < 0)
31         goto out;
32 
33     /* 创建socket文件并绑定描述符 */
34     retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
35     if (retval < 0)
36         goto out_release;
37 
38 out:
39     /* It may be already another descriptor 8) Not kernel problem. */
40     return retval;
41 
42 out_release:
43     sock_release(sock);
44     return retval;
45 }

因为sock_create调用层次较多,放到下面来分析,此处先看下sock_map_fd函数,该函数负责分配文件,并与socket进行绑定;

 1 /* 套接口与文件描述符绑定 */
 2 static int sock_map_fd(struct socket *sock, int flags)
 3 {
 4     struct file *newfile;
 5     /* 获取未使用的文件描述符 */
 6     int fd = get_unused_fd_flags(flags);
 7     if (unlikely(fd < 0))
 8         return fd;
 9 
10     /* 分配socket文件 */
11     newfile = sock_alloc_file(sock, flags, NULL);
12     if (likely(!IS_ERR(newfile))) {
13         /* fd和文件进行绑定 */
14         fd_install(fd, newfile);
15         return fd;
16     }
17 
18     /* 释放fd */
19     put_unused_fd(fd);
20     return PTR_ERR(newfile);
21 }

下面来分析sock_create流程,其主要工作为创建socket,并进行必要的初始化;

1 int sock_create(int family, int type, int protocol, struct socket **res)
2 {
3     return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
4 }

__socket_create函数负责必要的检查项,创建socket,必要的初始化,之后调用对应协议族的pf->create函数来创建传输控制块,并且与socket进行关联;

  1 /* 创建socket */
  2 int __sock_create(struct net *net, int family, int type, int protocol,
  3              struct socket **res, int kern)
  4 {
  5     int err;
  6     struct socket *sock;
  7     const struct net_proto_family *pf;
  8 
  9     /*
 10      *      Check protocol is in range
 11      */
 12     /* 检查协议族 */
 13     if (family < 0 || family >= NPROTO)
 14         return -EAFNOSUPPORT;
 15 
 16     /* 检查类型 */
 17     if (type < 0 || type >= SOCK_MAX)
 18         return -EINVAL;
 19 
 20     /* Compatibility.
 21 
 22        This uglymoron is moved from INET layer to here to avoid
 23        deadlock in module load.
 24      */
 25     /* ipv4协议族的packet已经废除,检测到,则替换成packet协议族 */
 26     if (family == PF_INET && type == SOCK_PACKET) {
 27         pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)
",
 28                  current->comm);
 29         family = PF_PACKET;
 30     }
 31 
 32     /* 安全模块检查套接口 */
 33     err = security_socket_create(family, type, protocol, kern);
 34     if (err)
 35         return err;
 36 
 37     /*
 38      *    Allocate the socket and allow the family to set things up. if
 39      *    the protocol is 0, the family is instructed to select an appropriate
 40      *    default.
 41      */
 42     /* 分配socket,内部和inode已经绑定 */
 43     sock = sock_alloc();
 44     if (!sock) {
 45         net_warn_ratelimited("socket: no more sockets
");
 46         return -ENFILE;    /* Not exactly a match, but its the
 47                    closest posix thing */
 48     }
 49 
 50     /* 设定类型 */
 51     sock->type = type;
 52 
 53 #ifdef CONFIG_MODULES
 54     /* Attempt to load a protocol module if the find failed.
 55      *
 56      * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
 57      * requested real, full-featured networking support upon configuration.
 58      * Otherwise module support will break!
 59      */
 60     if (rcu_access_pointer(net_families[family]) == NULL)
 61         request_module("net-pf-%d", family);
 62 #endif
 63 
 64     rcu_read_lock();
 65     /* 找到协议族 */
 66     pf = rcu_dereference(net_families[family]);
 67     err = -EAFNOSUPPORT;
 68     if (!pf)
 69         goto out_release;
 70 
 71     /*
 72      * We will call the ->create function, that possibly is in a loadable
 73      * module, so we have to bump that loadable module refcnt first.
 74      */
 75      /* 增加模块的引用计数 */
 76     if (!try_module_get(pf->owner))
 77         goto out_release;
 78 
 79     /* Now protected by module ref count */
 80     rcu_read_unlock();
 81 
 82     /* 调用协议族的创建函数 */
 83     err = pf->create(net, sock, protocol, kern);
 84     if (err < 0)
 85         goto out_module_put;
 86 
 87     /*
 88      * Now to bump the refcnt of the [loadable] module that owns this
 89      * socket at sock_release time we decrement its refcnt.
 90      */
 91     if (!try_module_get(sock->ops->owner))
 92         goto out_module_busy;
 93 
 94     /*
 95      * Now that we're done with the ->create function, the [loadable]
 96      * module can have its refcnt decremented
 97      */
 98     module_put(pf->owner);
 99     err = security_socket_post_create(sock, family, type, protocol, kern);
100     if (err)
101         goto out_sock_release;
102     *res = sock;
103 
104     return 0;
105 
106 out_module_busy:
107     err = -EAFNOSUPPORT;
108 out_module_put:
109     sock->ops = NULL;
110     module_put(pf->owner);
111 out_sock_release:
112     sock_release(sock);
113     return err;
114 
115 out_release:
116     rcu_read_unlock();
117     goto out_sock_release;
118 }
119 EXPORT_SYMBOL(__sock_create);

对于PF_INET协议族来讲,上述的pf->create函数将调用inet_create函数;

1 static const struct net_proto_family inet_family_ops = {
2     .family = PF_INET,
3     .create = inet_create,
4     .owner    = THIS_MODULE,
5 };

inet_create函数负责创建传输控制块,并且将socket与传输控制块进行关联;

  1 /*
  2  *    Create an inet socket.
  3  */
  4 
  5 /* 创建与该接口对应的传输控制块并关联 */
  6 static int inet_create(struct net *net, struct socket *sock, int protocol,
  7                int kern)
  8 {
  9     struct sock *sk;
 10     struct inet_protosw *answer;
 11     struct inet_sock *inet;
 12     struct proto *answer_prot;
 13     unsigned char answer_flags;
 14     int try_loading_module = 0;
 15     int err;
 16 
 17     /* 检查协议 */
 18     if (protocol < 0 || protocol >= IPPROTO_MAX)
 19         return -EINVAL;
 20 
 21     /* 设置接口的状态为未连接 */
 22     sock->state = SS_UNCONNECTED;
 23 
 24     /* Look for the requested type/protocol pair. */
 25 lookup_protocol:
 26     err = -ESOCKTNOSUPPORT;
 27     rcu_read_lock();
 28     list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
 29 
 30         err = 0;
 31         /* Check the non-wild match. */
 32         /* 匹配协议成功 */
 33         if (protocol == answer->protocol) {
 34             /* 传入为某指定协议,成功*/
 35             if (protocol != IPPROTO_IP)
 36                 break;
 37             
 38             /* 未指定协议,继续查找 */
 39             
 40         } 
 41         /* 未指定协议或者未匹配成功的分支 */
 42         else {
 43             /* Check for the two wild cases. */
 44             /* 如果传入为未指定协议 */
 45             if (IPPROTO_IP == protocol) {
 46                 /* 则指定为当前协议,成功 */
 47                 protocol = answer->protocol;
 48                 break;
 49             }
 50 
 51             /* 指定了传入协议,但是均未匹配成功 */
 52 
 53             /* 当前正在匹配的协议通用协议,则使用之 */
 54             if (IPPROTO_IP == answer->protocol)
 55                 break;
 56         }
 57 
 58         /* 循环查找结束了,还未找到 */
 59         /* 传入了某指定协议,未找到匹配,并且没有通用协议 */
 60         err = -EPROTONOSUPPORT;
 61     }
 62 
 63     /* 
 64         未找到对应inet_protosw实例
 65         加载对应的协议模块,重新查找
 66     */
 67     if (unlikely(err)) {
 68         /* 尝试加载的模块不超过2次 */
 69         if (try_loading_module < 2) {
 70             rcu_read_unlock();
 71             /*
 72              * Be more specific, e.g. net-pf-2-proto-132-type-1
 73              * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
 74              */
 75             /* 第一次,加载指定协议和类型的模块 */
 76             if (++try_loading_module == 1)
 77                 request_module("net-pf-%d-proto-%d-type-%d",
 78                            PF_INET, protocol, sock->type);
 79             /*
 80              * Fall back to generic, e.g. net-pf-2-proto-132
 81              * (net-pf-PF_INET-proto-IPPROTO_SCTP)
 82              */
 83             /* 第二次,加载只指定协议的模块 */ 
 84             else
 85                 request_module("net-pf-%d-proto-%d",
 86                            PF_INET, protocol);
 87             goto lookup_protocol;
 88         }
 89         /* 超过2次,则查找失败 */
 90         else
 91             goto out_rcu_unlock;
 92     }
 93 
 94     err = -EPERM;
 95 
 96     /* 判断是否允许创建sock-raw套接口 */
 97     if (sock->type == SOCK_RAW && !kern &&
 98         !ns_capable(net->user_ns, CAP_NET_RAW))
 99         goto out_rcu_unlock;
100 
101     /* 设置套接口操作 */
102     sock->ops = answer->ops;
103     /* 临时存储协议的操作和标志 */
104     answer_prot = answer->prot;
105     answer_flags = answer->flags;
106     rcu_read_unlock();
107 
108     WARN_ON(!answer_prot->slab);
109 
110     err = -ENOBUFS;
111     /* 分配传输控制块 */
112     sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
113     if (!sk)
114         goto out;
115 
116     err = 0;
117     /* 设置重用地址和端口标记 */
118     if (INET_PROTOSW_REUSE & answer_flags)
119         sk->sk_reuse = SK_CAN_REUSE;
120 
121     inet = inet_sk(sk);
122 
123     /* 设置是否为面向连接的控制块 */
124     inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
125 
126     inet->nodefrag = 0;
127 
128     /* 如果类型是原始套接字 */
129     if (SOCK_RAW == sock->type) {
130         /* 设置本地端口为协议号 */
131         inet->inet_num = protocol;
132 
133         /* 协议为ipproto_raw */
134         if (IPPROTO_RAW == protocol)
135             /* 标记需要自己构建ip首部 */
136             inet->hdrincl = 1;
137     }
138 
139     /* 设置是否支持pmtu */
140     if (net->ipv4.sysctl_ip_no_pmtu_disc)
141         inet->pmtudisc = IP_PMTUDISC_DONT;
142     else
143         inet->pmtudisc = IP_PMTUDISC_WANT;
144 
145     /* 出事连接控制块 */
146     inet->inet_id = 0;
147 
148     /* 连接控制块的初始化 */
149     sock_init_data(sock, sk);
150 
151     sk->sk_destruct       = inet_sock_destruct;
152     sk->sk_protocol       = protocol;
153     sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
154 
155     inet->uc_ttl    = -1;
156     inet->mc_loop    = 1;
157     inet->mc_ttl    = 1;
158     inet->mc_all    = 1;
159     inet->mc_index    = 0;
160     inet->mc_list    = NULL;
161     inet->rcv_tos    = 0;
162 
163     sk_refcnt_debug_inc(sk);
164 
165     /* 设置了本地端口 */
166     if (inet->inet_num) {
167         /* It assumes that any protocol which allows
168          * the user to assign a number at socket
169          * creation time automatically
170          * shares.
171          */
172          
173         /* 设置网络序的源端口 */ 
174         inet->inet_sport = htons(inet->inet_num);
175         /* Add to protocol hash chains. */
176         /* 加入到hash */
177         err = sk->sk_prot->hash(sk);
178         if (err) {
179             sk_common_release(sk);
180             goto out;
181         }
182     }
183 
184     /* 如果有init则调用init初始化 */
185     if (sk->sk_prot->init) {
186         err = sk->sk_prot->init(sk);
187         if (err) {
188             sk_common_release(sk);
189             goto out;
190         }
191     }
192 
193     if (!kern) {
194         err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
195         if (err) {
196             sk_common_release(sk);
197             goto out;
198         }
199     }
200 out:
201     return err;
202 out_rcu_unlock:
203     rcu_read_unlock();
204     goto out;
205 }
原文地址:https://www.cnblogs.com/wanpengcoder/p/7609260.html