【Linux】GNU C库 accept函数的逐层下调的研究

在我们写的socket程序中，一般都会使用网络套接字API，比如send()、accept()、receive()等函数，那么这些函数在Linux操作系统中是如何实现的呢？我们将通过分析代码对其进行深入的研究。本文中所使用的Glibc库版本为2.0.111，Linux内核版本为Linux 1.2.13。

1.用户层代码

在程序中我们使用了accept()函数，这个函数的实现是在Glibc库，即GNU的C库中实现的。具体代码是：

1 //glibc-2.0.111\sysdeps\unix\sysv\linux\accept.S
2 #define    socket    accept
3 #define    __socket __libc_accept
4 #define    NARGS    3
5 #include <socket.S>

这段与socket.S是accept()从用户态进入内核态的关键代码。accept.S中将accept定义为socket，__socket定义为__libc_accpet，NARGS定义为3，表示调用参数有3个。接下来包含了socket.S文件。Socket.S的作用与功能按照源文件中的注释所述：

/* The socket-oriented system calls are handled unusally in Linux.
   They are all gated through the single `socketcall' system call number.
   `socketcall' takes two arguments: the first is the subcode, specifying
   which socket function is being called; and the second is a pointer to
   the arguments to the specific function.

   The .S files for the other calls just #define socket and #include this.  */

翻译成中文的大概意思是：

    socket系列的系统函数经常被调用。
    他们都通过单一的一个socketcall系统调用号（进行调用）。
    Socketcall有两个参数：第一个是子调用码，指定了哪一个socket函数被调用；第二个参数是一个指向被调用的socket函数所需参数的指针。

    其他的（socket系列的）函数的.S文件只需要#define socket 为某个值和#include 这个文件（指此socket.S）即可

在socket.S中进行了进一步的调用，socket从用户态进行相应参数的设置，然后使用int指令自陷，调用操作系统提供的中断服务程序，在内核态执行相应的系统服务，我们将整个函数的代码粘贴进来，在具体的语句上进行注释解释：

 1 // glibc-2.0.111\sysdeps\unix\sysv\linux\i386\socket.S
 2 #include <sysdep.h>
 3 #include <socketcall.h>
 4 // 定义了P(a,b)与P2(a,b)两个宏，他们的作用都是将a与b连接到一起。
 5 #define P(a, b) P2(a, b)
 6 #define P2(a, b) a##b
 7 
 8     .text
 9 
10 #ifndef __socket
11 #ifndef NO_WEAK_ALIAS
12 #define __socket P(__,socket)     
13 #else
14 #define __socket socket
15 #endif
16 #endif
17 
18 .globl __socket
19 ENTRY (__socket)   //这里开始进行函数的处理
20 
21 
22     /* 保存ebx的值  */
23     movl %ebx, %edx
24 
25     // SYS_ify宏在sysdep.h中定义。一会儿详细了解它的作用
26     // 下面一条语句的作用是将socketcall的调用号存入寄存器eax
27     movl $SYS_ify(socketcall), %eax    /* System call number in %eax.  */
28 
29     /* 子调用号放入ebx中，关于下面一条语句的将在下面有详细解释  */
30     movl $P(SOCKOP_,socket), %ebx    /* Subcode is first arg to syscall.  */
31     /* 指向调用参数的指针放入ecx中  */
32     lea 4(%esp), %ecx        /* Address of args is 2nd arg.  */
33 
34         /* 0x80中断，自陷进入内核态 */
35     int $0x80
36 
37     /* 恢复ebx寄存器的值 */
38     movl %edx, %ebx
39 
40     /* eax是返回值，如果<0则表示调用出错，就跳到错误处理的代码中去  */
41     cmpl $-125, %eax
42     jae SYSCALL_ERROR_LABEL
43 
44     /* 成功的话就返回相应的返回值  */
45 L(pseudo_end):
46     ret
47 
48 PSEUDO_END (__socket)
49 
50 #ifndef NO_WEAK_ALIAS
51 weak_alias (__socket, socket)
52 #endif

我们首先看movl $SYS_ify(socketcall), %eax这一条语句。SYS_ify在sysdep.h中定义，但是有两个不同文件夹下的sysdep.h文件。

（1）

按照文件层次来讲，应该是按照如下的代码进行：

1 // glibc-2.0.111\sysdeps\unix\sysv\linux\i386\sysdep.h
2 .....
3 #undef SYS_ify
4 #define SYS_ify(syscall_name)    __NR_##syscall_name
5 .....

在这段代码之前有一段注释：

/* For Linux we can use the system call table in the header file
    /usr/include/asm/unistd.h
   of the kernel.  But these symbols do not follow the SYS_* syntax
   so we have to redefine the `SYS_ify' macro here.  */

中文的大概意思是：

   对于Linux系统，我们可以使用在/usr/include/asm/unistd.h头文件中的内核系统调用表。
   但是这些符号并不是以SYS_符号为前缀的，所以这里我们必须重定义SYS_ify宏。

可以看到，通过SYS_ify(socketcall)，我们得到了__NR_socketcall。

（2）

按照另外一本书上所讲的，在下列位置中存在另外一套代码：

1 // glibc-2.0.111\sysdeps\unix\sysdep.h
2 ……
3 #ifdef __STDC__
4 #define SYS_ify(syscall_name) SYS_##syscall_name
5 #else
6 #define SYS_ify(syscall_name) SYS_/**/syscall_name
7 #endif
8 ……

如果是经由这段代码的处理，那么我们将得到SYS_socketcall，那么这又是一个什么呢？我们查看源代码是看不到的。而在实际的操作系统（笔者所使用的是Fedora 14）中，/usr/include /bits/syscall.h中则有相应的答案，这个文件是libc在构建时候根据具体的操作系统而生成的。在其中，会有：

 1 #ifndef _SYSCALL_H
 2 # error "Never use <bits/syscall.h> directly; include <sys/syscall.h> instead."
 3 #endif
 4 
 5 #define SYS__llseek __NR__llseek
 6 #define SYS__newselect __NR__newselect
 7 #define SYS__sysctl __NR__sysctl
 8 #define SYS_access __NR_access
 9 #define SYS_acct __NR_acct
10 ……
11 #define SYS_socketcall __NR_socketcall
12 ……

可以看到，通过这一部分的处理之后，最后依然会得到__NR_socketcall。

2.内核态代码

了解Linux系统的人都知道，在/linux/include/linux/unistd.h中，我们可以看到，这些内容：

 1 // linux/include/linux/unistd.h
 2 ……
 3 #define __NR_setup          0    /* used only by init, to get system going */
 4 #define __NR_exit          1
 5 #define __NR_fork          2
 6 #define __NR_read          3
 7 #define __NR_write          4
 8 ……
 9 #define __NR_socketcall        102
10 ……

我们可以看到，__NR_socketcall被定义为102，上面一行的代码即是将eax的值赋成102，即此系统调用的调用号。

下面我们看movl $P(SOCKOP_,socket), %ebx这一句。在socketcall.h中有相应的定义：

 1 // glibc-2.0.111\sysdeps\unix\sysv\linux\socketcall.h
 2 ……
 3 #define SOCKOP_socket        1
 4 #define SOCKOP_bind        2
 5 #define SOCKOP_connect        3
 6 #define SOCKOP_listen        4
 7 #define SOCKOP_accept        5
 8 #define SOCKOP_getsockname    6
 9 #define SOCKOP_getpeername    7
10 #define SOCKOP_socketpair    8
11 #define SOCKOP_send        9
12 #define SOCKOP_recv        10
13 #define SOCKOP_sendto        11
14 #define SOCKOP_recvfrom        12
15 #define SOCKOP_shutdown        13
16 #define SOCKOP_setsockopt    14
17 #define SOCKOP_getsockopt    15
18 #define SOCKOP_sendmsg        16
19 #define SOCKOP_recvmsg        17
20 ……

这一句的意思就是将相应的操作码赋予ebx，此例中是5。下面我们进入操作系统中的代码进行分析，在entry.S中，有一段中断处理函数：

 1 // linux/arch/i386/kernel/entry.S
 2 _system_call:
 3 // 保存eax的值
 4     pushl %eax            # save orig_eax
 5 // 保存所有寄存器的值
 6     SAVE_ALL
 7     movl $-ENOSYS,EAX(%esp)
 8 // 比较eax中的调用号是否超过了限定的数值，NR_syscalls，默认是256。
 9     cmpl $(NR_syscalls),%eax  # compare whether eax>NR_syscalls
10     jae ret_from_sys_call
11 //从系统调用表中找到对应的入口地址，放入eax中
12     movl _sys_call_table(,%eax,4),%eax
13     testl %eax,%eax
14     je ret_from_sys_call
15 // 子调用号放入ebx中
16     movl _current,%ebx
17     andl $~CF_MASK,EFLAGS(%esp)    # clear carry - assume no errors
18     movl $0,errno(%ebx)
19     movl %db6,%edx
20     movl %edx,dbgreg6(%ebx)  # save current hardware debugging status
21     testb $0x20,flags(%ebx)        # PF_TRACESYS
22     jne 1f
23 // 进行系统调用
24     call *%eax
25     movl %eax,EAX(%esp)        # save the return value
26     movl errno(%ebx),%edx
27     negl %edx
28     je ret_from_sys_call
29     movl %edx,EAX(%esp)
30     orl $(CF_MASK),EFLAGS(%esp)    # set carry to indicate error
31     jmp ret_from_sys_call

具体的语句的作用已经在代码中进行了标注。我们接下来可以查看处理socket调用的系统函数socket.c：

 1 // linux/net/socket.c
 2 ……
 3 asmlinkage int sys_socketcall(int call, unsigned long *args)
 4 {
 5     int er;
 6     switch(call) 
 7     {
 8         case SYS_SOCKET:
 9             er=verify_area(VERIFY_READ, args, 3 * sizeof(long));
10             if(er)
11                 return er;
12             return(sock_socket(get_fs_long(args+0),
13                 get_fs_long(args+1),
14                 get_fs_long(args+2)));
15 ……
16         case SYS_ACCEPT:
17             er=verify_area(VERIFY_READ, args, 3 * sizeof(long));
18             if(er)
19                 return er;
20             return(sock_accept(get_fs_long(args+0),
21                 (struct sockaddr *)get_fs_long(args+1),
22                 (int *)get_fs_long(args+2)));
23 ……

这个sys_socketcall函数是socket系列函数的分发函数，根据具体调用号，调用不同的处理函数进行处理，至此，我们看到了整个从应用层socket函数到BSDsocket的层的传递过程，加深了我们对于此过程的了解。

作者：Chenny Chen
出处：http://www.cnblogs.com/XjChenny/
本文版权归作者和博客园共有，欢迎转载，但未经作者同意必须保留此段声明，且在文章页面明显位置给出原文连接，否则保留追究法律责任的权利。