int 0x80 系统调用实现

int 0x80 系统调用

1.用户怎么调用?(建议是间接方式,方便移植)系统如何使用?(直接方式)

2.调用后处理过程?

3.系统怎么设置的?

===============================

1在用户层面来讲,有2中方式,直接和间接

间接,我们使用c库函数,比如 int read(int fd,char *buf,int n); 

直接,我们直接使用所要使用功能函数对应的系统调用,//lin/include/unistd.h中,比如上边对应read

_syscall3(int, read, int, fd, char *, buf, int, n);//这个是宏

172 #define _syscall3(type,name,atype,a,btype,b,ctype,c) 
173 type name(atype a,btype b,ctype c) 
174 { 
175 long __res; 
176 __asm__ volatile ("int $0x80" 
177         : "=a" (__res) 
178         : "" (__NR_##name),"b" ((long)(a)),"c" ((long)(b)),"d" ((long)(c))); 
179 if (__res>=0) 
180         return (type) __res; 
181 errno=-__res; 
182 return -1; 
183 }
184 

--------------------------------------------------

2调用后处理过程

上边的2个方式调用,最终以下边方式实现

int read(int fd,char *buf,int n)

{

  long _res;

  _asm_ volatile (

    "int $0x80"

    : "=a" (_res)

    : "0" (_NR_read), "b" ((long)(fd)),"c"((long)(buf)),"d"((long)(n)));

  if(_res>=0)

    return int _res;

  errno=-_res;

  return -1;

}

//最终调用形式在c中,内嵌汇编  参数值4个分别放在eax(对应的系统功能号),ebx,ecx,edx,调用int 0x80;

-----

int 0x80的做了什么

++++++++++++++++++++++++

先看看怎么来的...

//在lin/kernel/sched.c中411行sched_init()中

从这句 set_system_gate(0x80,&system_call);

可以看出

1.system_call实现了系统调用处理过程 (处理过程system_call //lin/kernel/system_call.s)

2.int 0x80中断 的对应

+++++++++

system_call()做了什么  //lin/includ/linux/sys.h

1.处理中断前设置环境的过程

2.找到实际处理在入口

3.返回包括最后其他的一些处理,(这里不是重点)

++++++++++++++++++++++++++++++++

处理的入口

call _sys_call_table(,%eax,4)//lin/includ/linux/sys.h

----------------处理函数在哪里

--其中小部分函数在对应 //lin/kernel/sys.c中,大部分分布在相关的文件中

-----------------------------

比如read()会找到,这里的extern int sys_read(); 函数来处理

//sys_read()实现在lin/fs/read_write.c 55-81行

都会在这里处理,这里是具体的实现

----------------------------------------

-当然这里针对不同类型文件,还有对应的pipe,chr,blk,dir具体的处理方法

--------------------------------------------

 55 int sys_read(unsigned int fd,char * buf,int count)
 56 {
 57         struct file * file;
 58         struct m_inode * inode;
 59 
 60         if (fd>=NR_OPEN || count<0 || !(file=current->filp[fd]))
 61                 return -EINVAL;
 62         if (!count)
 63                 return 0;
 64         verify_area(buf,count);
 65         inode = file->f_inode;
 66         if (inode->i_pipe)
 67                 return (file->f_mode&1)?read_pipe(inode,buf,count):-EIO;
 68         if (S_ISCHR(inode->i_mode))
 69                 return rw_char(READ,inode->i_zone[0],buf,count,&file->f_pos);
 70         if (S_ISBLK(inode->i_mode))
 71                 return block_read(inode->i_zone[0],&file->f_pos,buf,count);
 72         if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode)) {
 73                 if (count+file->f_pos > inode->i_size)
 74                         count = inode->i_size - file->f_pos;
 75                 if (count<=0)
 76                         return 0;
 77                 return file_read(inode,file,buf,count);
 78         }
 79         printk("(Read)inode->i_mode=%06o

",inode->i_mode);
 80         return -EINVAL;
 81 }

//这里假设是对blk相关

则系统会调用block_read()在 lin/fs/block_dev.c中47-73行,代码如下

 47 int block_read(int dev, unsigned long * pos, char * buf, int count)
 48 {
 49         int block = *pos >> BLOCK_SIZE_BITS;
 50         int offset = *pos & (BLOCK_SIZE-1);
 51         int chars;
 52         int read = 0;
 53         struct buffer_head * bh;
 54         register char * p;
 55 
 56         while (count>0) {
 57                 chars = BLOCK_SIZE-offset;
 58                 if (chars > count)
 59                         chars = count;
 60                 if (!(bh = breada(dev,block,block+1,block+2,-1)))
 61                         return read?read:-EIO;
 62                 block++;
 63                 p = offset + bh->b_data;
 64                 offset = 0;
 65                 *pos += chars;
 66                 read += chars;
 67                 count -= chars;
 68                 while (chars-->0)
 69                         put_fs_byte(*(p++),buf++);
 70                 brelse(bh);
 71         }
 72         return read;
 73 }
 74 

在这个中间还有少量调用,会有具体代码实现,比如put_fs_byte();靠汇编实在来的

完整的调用过程示意....

--------------------------------------------------------

3.系统的设置(实现int 0x80)

---------------------------接上边2

---set_system_gate(0x80,&system_call);

-------------------------------

这个如何做到的

实现了系统调用 (处理过程system_call //lin/linux/kernel/system_call.s)和 int 0x80中断 的对应

+++++++++++++++++++++++++

系统调用,在linux 0.11中有3中 中断 异常 系统调用   描述符都在idt表中

系统调用只用了0x80 一个中断号 用eax实现不同调用子过程

++++++++++++++++++++++++++

============================================  

====一下是跟具体的

================================================

//lin/include/asm/system.h

//段代码是用来设置 中断 异常 系统调用 ,可以看出它们都在idt表中

 22 #define _set_gate(gate_addr,type,dpl,addr) 
 23 __asm__ ("movw %%dx,%%ax
	" 
 24         "movw %0,%%dx
	" 
 25         "movl %%eax,%1
	" 
 26         "movl %%edx,%2" 
 27         : 
 28         : "i" ((short) (0x8000+(dpl<<13)+(type<<8))), 
 29         "o" (*((char *) (gate_addr))), 
 30         "o" (*(4+(char *) (gate_addr))), 
 31         "d" ((char *) (addr)),"a" (0x00080000))
//以上段代码把 中断 异常 系统调用,都汇集到这里处理,而且都在idt表中
32 33 #define set_intr_gate(n,addr) 34 _set_gate(&idt[n],14,0,addr)
//4个参数分别是 idt表中的偏移地址,14是type指示了系统类型(描述符系统类型),0是dpl,addr是处理程序地址偏移
35 36 #define set_trap_gate(n,addr) 37 _set_gate(&idt[n],15,0,addr)
//可以看到异常和系统调用除了编号不一样 int n;就是用户级别不一样了
38 39 #define set_system_gate(n,addr) 40 _set_gate(&idt[n],15,3,addr)
//n对应int n,系统调用是用户级,用户级别的程序都可以调用
41

//lin/include/kernel/sched.c

//411 int 0x80 放在idt表中

385 void sched_init(void)
386 {
387         int i;
388         struct desc_struct * p;
389 
390         if (sizeof(struct sigaction) != 16)
391                 panic("Struct sigaction MUST be 16 bytes");
392         set_tss_desc(gdt+FIRST_TSS_ENTRY,&(init_task.task.tss));
393         set_ldt_desc(gdt+FIRST_LDT_ENTRY,&(init_task.task.ldt));
394         p = gdt+2+FIRST_TSS_ENTRY;
395         for(i=1;i<NR_TASKS;i++) {
396                 task[i] = NULL;
397                 p->a=p->b=0;
398                 p++;
399                 p->a=p->b=0;
400                 p++;
401         }
402 /* Clear NT, so that we won't have troubles with that later on */
403         __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
404         ltr(0);
405         lldt(0);
406         outb_p(0x36,0x43);              /* binary, mode 3, LSB/MSB, ch 0 */
407         outb_p(LATCH & 0xff , 0x40);    /* LSB */
408         outb(LATCH >> 8 , 0x40);        /* MSB */
409         set_intr_gate(0x20,&timer_interrupt);
410         outb(inb_p(0x21)&~0x01,0x21);
411         set_system_gate(0x80,&system_call);
//411行实现了系统调用 和 int 0x80 的对应,0x80放在idt表中的位置,指向system_call处理
412 } 413

//lin/kernel/system_call.s

//这里是汇编所以_system_call ,和c中的system_call对应

//上边的代码指向了_system_call处理系统调用80-128,它用堆栈其他随便详细说明,其他部分暂时不看

/*

------

|     ss|

|   esp|

|eflags|

|     cs|

|    eip|以上5个int 0x80 自动压入,一定有ss ,esp是系统调用用户态;而中断服务都是在内核态

|     ds|

|     es|

|     fs|

|   edx|

|   ecx|

|   ebx|以上6个sys_call_table前压入

*/

  1 /*
  2  *  linux/kernel/system_call.s
  3  *
  4  *  (C) 1991  Linus Torvalds
  5  */
  6 
  7 /*
  8  *  system_call.s  contains the system-call low-level handling routines.
  9  * This also contains the timer-interrupt handler, as some of the code is
 10  * the same. The hd- and flopppy-interrupts are also here.
 11  *
 12  * NOTE: This code handles signal-recognition, which happens every time
 13  * after a timer-interrupt and after each system call. Ordinary interrupts
 14  * don't handle signal-recognition, as that would clutter them up totally
 15  * unnecessarily.
 16  *
 17  * Stack layout in 'ret_from_system_call':
 18  *
 19  *       0(%esp) - %eax
 20  *       4(%esp) - %ebx
 21  *       8(%esp) - %ecx
 22  *       C(%esp) - %edx
 23  *      10(%esp) - %fs
 24  *      14(%esp) - %es
 25  *      18(%esp) - %ds
 26  *      1C(%esp) - %eip
 27  *      20(%esp) - %cs
 28  *      24(%esp) - %eflags
 29  *      28(%esp) - %oldesp
 30  *      2C(%esp) - %oldss
 31  */
 32 
 33 SIG_CHLD        = 17
 34 
 35 EAX             = 0x00
 36 EBX             = 0x04
 37 ECX             = 0x08
 38 EDX             = 0x0C
 39 FS              = 0x10
 40 ES              = 0x14
 41 DS              = 0x18
 42 EIP             = 0x1C
 43 CS              = 0x20
 44 EFLAGS          = 0x24
 45 OLDESP          = 0x28
 46 OLDSS           = 0x2C
 47 
 48 state   = 0             # these are offsets into the task-struct.
 49 counter = 4
 50 priority = 8
 51 signal  = 12
 52 sigaction = 16          # MUST be 16 (=len of sigaction)
 53 blocked = (33*16)
 54 
 55 # offsets within sigaction
 56 sa_handler = 0
 57 sa_mask = 4
 58 sa_flags = 8
 59 sa_restorer = 12
 60 
 61 nr_system_calls = 72
 62 
 63 /*
 64  * Ok, I get parallel printer interrupts while using the floppy for some
 65  * strange reason. Urgel. Now I just ignore them.
 66  */
 67 .globl _system_call,_sys_fork,_timer_interrupt,_sys_execve
 68 .globl _hd_interrupt,_floppy_interrupt,_parallel_interrupt
 69 .globl _device_not_available, _coprocessor_error
 70 
 71 .align 2
 72 bad_sys_call:
 73         movl $-1,%eax
 74         iret
 75 .align 2
 76 reschedule:
 77         pushl $ret_from_sys_call
 78         jmp _schedule
 79 .align 2
 80 _system_call:
//因为int 0x80 到这里,所以用内核态堆栈要压入 ss esp eflags cs eip 5个
81 cmpl $nr_system_calls-1,%eax
//在数组中nr_system_calls 减 1
//eax中是对应的系统号(子程序编号)在lin/include/linux/sys.h中有对应编号,
//http://www.cnblogs.com/caesarxu/p/3261218.html
82 ja bad_sys_call 83 push %ds 84 push %es 85 push %fs 86 pushl %edx 87 pushl %ecx # push %ebx,%ecx,%edx as parameters 88 pushl %ebx # to the system call
//内核态堆栈要压入 ds es fs edx ecx ebx 6个,这里是参数
89 movl $0x10,%edx # set up ds,es to kernel space 90 mov %dx,%ds 91 mov %dx,%es 92 movl $0x17,%edx # fs points to local data space 93 mov %dx,%fs 94 call _sys_call_table(,%eax,4)
//暂时多压入一个 地址95行,调用对应编号中的函数sys_xxxxx,在相应的函数中返回,函数处理中
对应的函数会认为是none
95          pushl %eax
//压入eax 是保护它在121弹出
96 movl _current,%eax 97 cmpl $0,state(%eax) # state 98 jne reschedule 99 cmpl $0,counter(%eax) # counter 100 je reschedule 101 ret_from_sys_call: 102 movl _current,%eax # task[0] cannot have signals 103 cmpl _task,%eax 104 je 3f 105 cmpw $0x0f,CS(%esp) # was old code segment supervisor ? 106 jne 3f 107 cmpw $0x17,OLDSS(%esp) # was stack segment = 0x17 ? 108 jne 3f 109 movl signal(%eax),%ebx 110 movl blocked(%eax),%ecx 111 notl %ecx 112 andl %ebx,%ecx 113 bsfl %ecx,%ecx 114 je 3f 115 btrl %ecx,%ebx 116 movl %ebx,signal(%eax) 117 incl %ecx 118 pushl %ecx 119 call _do_signal 120 popl %eax 121 3: popl %eax 122 popl %ebx 123 popl %ecx 124 popl %edx 125 pop %fs 126 pop %es 127 pop %ds 128 iret 129 130 .align 2 131 _coprocessor_error:
//
132 push %ds 133 push %es 134 push %fs 135 pushl %edx 136 pushl %ecx 137 pushl %ebx 138 pushl %eax 139 movl $0x10,%eax 140 mov %ax,%ds 141 mov %ax,%es 142 movl $0x17,%eax 143 mov %ax,%fs 144 pushl $ret_from_sys_call 145 jmp _math_error 146 147 .align 2 148 _device_not_available: 149 push %ds 150 push %es 151 push %fs 152 pushl %edx 153 pushl %ecx 154 pushl %ebx 155 pushl %eax 156 movl $0x10,%eax 157 mov %ax,%ds 158 mov %ax,%es 159 movl $0x17,%eax 160 mov %ax,%fs 161 pushl $ret_from_sys_call 162 clts # clear TS so that we can use math 163 movl %cr0,%eax 164 testl $0x4,%eax # EM (math emulation bit) 165 je _math_state_restore 166 pushl %ebp 167 pushl %esi 168 pushl %edi 169 call _math_emulate 170 popl %edi 171 popl %esi 172 popl %ebp 173 ret 174 175 .align 2 176 _timer_interrupt: 177 push %ds # save ds,es and put kernel data space 178 push %es # into them. %fs is used by _system_call 179 push %fs 180 pushl %edx # we save %eax,%ecx,%edx as gcc doesn't 181 pushl %ecx # save those across function calls. %ebx 182 pushl %ebx # is saved as we use that in ret_sys_call 183 pushl %eax 184 movl $0x10,%eax 185 mov %ax,%ds 186 mov %ax,%es 187 movl $0x17,%eax 188 mov %ax,%fs 189 incl _jiffies 190 movb $0x20,%al # EOI to interrupt controller #1 191 outb %al,$0x20 192 movl CS(%esp),%eax 193 andl $3,%eax # %eax is CPL (0 or 3, 0=supervisor) 194 pushl %eax 195 call _do_timer # 'do_timer(long CPL)' does everything from 196 addl $4,%esp # task switching to accounting ... 197 jmp ret_from_sys_call 198 199 .align 2 200 _sys_execve:
//可能会call这里来
201 lea EIP(%esp),%eax 202 pushl %eax 203 call _do_execve 204 addl $4,%esp 205 ret 206 207 .align 2 208 _sys_fork:
//这里也有可能的
209 call _find_empty_process 210 testl %eax,%eax 211 js 1f 212 push %gs 213 pushl %esi 214 pushl %edi 215 pushl %ebp 216 pushl %eax 217 call _copy_process 218 addl $20,%esp 219 1: ret 220 221 _hd_interrupt: 222 pushl %eax 223 pushl %ecx 224 pushl %edx 225 push %ds 226 push %es 227 push %fs 228 movl $0x10,%eax 229 mov %ax,%ds 230 mov %ax,%es 231 movl $0x17,%eax 232 mov %ax,%fs 233 movb $0x20,%al 234 outb %al,$0xA0 # EOI to interrupt controller #1 235 jmp 1f # give port chance to breathe 236 1: jmp 1f 237 1: xorl %edx,%edx 238 xchgl _do_hd,%edx 239 testl %edx,%edx 240 jne 1f 241 movl $_unexpected_hd_interrupt,%edx 242 1: outb %al,$0x20 243 call *%edx # "interesting" way of handling intr. 244 pop %fs 245 pop %es 246 pop %ds 247 popl %edx 248 popl %ecx 249 popl %eax 250 iret 251 252 _floppy_interrupt: 253 pushl %eax 254 pushl %ecx 255 pushl %edx 256 push %ds 257 push %es 258 push %fs 259 movl $0x10,%eax 260 mov %ax,%ds 261 mov %ax,%es 262 movl $0x17,%eax 263 mov %ax,%fs 264 movb $0x20,%al 265 outb %al,$0x20 # EOI to interrupt controller #1 266 xorl %eax,%eax 267 xchgl _do_floppy,%eax 268 testl %eax,%eax 269 jne 1f 270 movl $_unexpected_floppy_interrupt,%eax 271 1: call *%eax # "interesting" way of handling intr. 272 pop %fs 273 pop %es 274 pop %ds 275 popl %edx 276 popl %ecx 277 popl %eax 278 iret 279 280 _parallel_interrupt: 281 pushl %eax 282 movb $0x20,%al 283 outb %al,$0x20 284 popl %eax 285 iret

//对应子过程地址   头文件_ sys_call_table实现

  1 extern int sys_setup();
  2 extern int sys_exit();
  3 extern int sys_fork();
  4 extern int sys_read();
  5 extern int sys_write();
  6 extern int sys_open();
  7 extern int sys_close();
  8 extern int sys_waitpid();
  9 extern int sys_creat();
 10 extern int sys_link();
 11 extern int sys_unlink();
 12 extern int sys_execve();
 13 extern int sys_chdir();
 14 extern int sys_time();
 15 extern int sys_mknod();
 16 extern int sys_chmod();
 17 extern int sys_chown();
 18 extern int sys_break();
 19 extern int sys_stat();
 20 extern int sys_lseek();
 21 extern int sys_getpid();
 22 extern int sys_mount();
 23 extern int sys_umount();
 24 extern int sys_setuid();
 25 extern int sys_getuid();
 26 extern int sys_stime();
 27 extern int sys_ptrace();
 28 extern int sys_alarm();
 29 extern int sys_fstat();
 30 extern int sys_pause();
 31 extern int sys_utime();
 32 extern int sys_stty();
 33 extern int sys_gtty();
 34 extern int sys_access();
 35 extern int sys_nice();
 36 extern int sys_ftime();
 37 extern int sys_sync();
 38 extern int sys_kill();
 39 extern int sys_rename();
 40 extern int sys_mkdir();
 41 extern int sys_rmdir();
 42 extern int sys_dup();
 43 extern int sys_pipe();
 44 extern int sys_times();
 45 extern int sys_prof();
 46 extern int sys_brk();
 47 extern int sys_setgid();
 48 extern int sys_getgid();
 49 extern int sys_signal();
 50 extern int sys_geteuid();
 51 extern int sys_getegid();
 52 extern int sys_acct();
 53 extern int sys_phys();
 54 extern int sys_lock();
 55 extern int sys_ioctl();
 56 extern int sys_fcntl();
 57 extern int sys_mpx();
 58 extern int sys_setpgid();
 59 extern int sys_ulimit();
 60 extern int sys_uname();
 61 extern int sys_umask();
 62 extern int sys_chroot();
 63 extern int sys_ustat();
 64 extern int sys_dup2();
 65 extern int sys_getppid();
 66 extern int sys_getpgrp();
 67 extern int sys_setsid();
 68 extern int sys_sigaction();
 69 extern int sys_sgetmask();
 70 extern int sys_ssetmask();
 71 extern int sys_setreuid();
 72 extern int sys_setregid();
 73 
 74 fn_ptr sys_call_table[] = { sys_setup, sys_exit, sys_fork, sys_read,
 75 sys_write, sys_open, sys_close, sys_waitpid, sys_creat, sys_link,
 76 sys_unlink, sys_execve, sys_chdir, sys_time, sys_mknod, sys_chmod,
 77 sys_chown, sys_break, sys_stat, sys_lseek, sys_getpid, sys_mount,
 78 sys_umount, sys_setuid, sys_getuid, sys_stime, sys_ptrace, sys_alarm,
 79 sys_fstat, sys_pause, sys_utime, sys_stty, sys_gtty, sys_access,
 80 sys_nice, sys_ftime, sys_sync, sys_kill, sys_rename, sys_mkdir,
 81 sys_rmdir, sys_dup, sys_pipe, sys_times, sys_prof, sys_brk, sys_setgid,
 82 sys_getgid, sys_signal, sys_geteuid, sys_getegid, sys_acct, sys_phys,
 83 sys_lock, sys_ioctl, sys_fcntl, sys_mpx, sys_setpgid, sys_ulimit,
 84 sys_uname, sys_umask, sys_chroot, sys_ustat, sys_dup2, sys_getppid,
 85 sys_getpgrp, sys_setsid, sys_sigaction, sys_sgetmask, sys_ssetmask,
 86 sys_setreuid,sys_setregid };
 87 

//对应的处理函数 lin/kernel/sys.c 少部分在这里处理

//大部分在分散在响应的文件在 如copy_process(...)

//

原文地址:https://www.cnblogs.com/caesarxu/p/3261232.html