glibc源码逆向——fopen

最近学习fsop,但一直没学的很明白,所以自己逆一下几个常用的函数

测试代码

#include<stdio.h>

int main()
{
    FILE*fp=fopen("test","wb");
    char *ptr=malloc(0x20);
    return 0;
}

通过分析进入fopen函数查看,首先函数会调用__fopen_internal函数

    94 _IO_FILE *
    95 _IO_new_fopen (const char *filename, const char *mode)
 ►  96 {
    97   return __fopen_internal (filename, mode, 1);
    98 }

进入__fopen_internal函数看看,发现其调用的是这一部分

59    _IO_FILE *
60    __fopen_internal (const char *filename, const char *mode, int is32)
61    {
62      struct locked_FILE
63      {
64        struct _IO_FILE_plus fp;
65    #ifdef _IO_MTSAFE_IO
66        _IO_lock_t lock;
67    #endif
68        struct _IO_wide_data wd;
69      } *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));
70    
71      if (new_f == NULL)
72        return NULL;
73    #ifdef _IO_MTSAFE_IO
74      new_f->fp.file._lock = &new_f->lock;
75    #endif
76    #if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
77      _IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps);
78    #else
79      _IO_no_init (&new_f->fp.file, 1, 0, NULL, NULL);
80    #endif
81      _IO_JUMPS (&new_f->fp) = &_IO_file_jumps;
82      _IO_file_init (&new_f->fp);
83    #if  !_IO_UNIFIED_JUMPTABLES
84      new_f->fp.vtable = NULL;
85    #endif
86      if (_IO_file_fopen ((_IO_FILE *) new_f, filename, mode, is32) != NULL)
87        return __fopen_maybe_mmap (&new_f->fp.file);
88    
89      _IO_un_link (&new_f->fp);
90      free (new_f);
91      return NULL;
92    }

这个函数首先为结构体locked_FILE分配了一段内存空间,其结构体成员有

64        struct _IO_FILE_plus fp;
65    #ifdef _IO_MTSAFE_IO
66        _IO_lock_t lock;
67    #endif
68        struct _IO_wide_data wd;

而当由于我们可以通过上一篇将vtable劫持的时候,可以知道_IO_FILE_plus结构体是非常重要的,它包含了vtable的地址

当分配完空间后,就开始执行

_IO_no_init

跟进去简单的看了看,发现是一个初始化函数,也就是把上面分配的_IO_FILE_plus结构体里的成员全部置0

596    void
597    _IO_no_init (_IO_FILE *fp, int flags, int orientation,
598             struct _IO_wide_data *wd, const struct _IO_jump_t *jmp)
599    {
600      _IO_old_init (fp, flags);
601      fp->_mode = orientation;
602    #if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
603      if (orientation >= 0)
604        {
605          fp->_wide_data = wd;
606          fp->_wide_data->_IO_buf_base = NULL;
607          fp->_wide_data->_IO_buf_end = NULL;
608          fp->_wide_data->_IO_read_base = NULL;
609          fp->_wide_data->_IO_read_ptr = NULL;
610          fp->_wide_data->_IO_read_end = NULL;
611          fp->_wide_data->_IO_write_base = NULL;
612          fp->_wide_data->_IO_write_ptr = NULL;
613          fp->_wide_data->_IO_write_end = NULL;
614          fp->_wide_data->_IO_save_base = NULL;
615          fp->_wide_data->_IO_backup_base = NULL;
616          fp->_wide_data->_IO_save_end = NULL;
617    
618          fp->_wide_data->_wide_vtable = jmp;
619        }
620      else
621        /* Cause predictable crash when a wide function is called on a byte
622           stream.  */
623        fp->_wide_data = (struct _IO_wide_data *) -1L;
624    #endif
625      fp->_freeres_list = NULL;
626    }

接着往下看,发现有一个虚表的赋值操作,这里也是我们调用fsop的原理

_IO_JUMPS (&new_f->fp) = &_IO_file_jumps;

随后,会调用一个_IO_file_init函数,而这里面会又会调用一个_IO_link_in,我没有逆进去了,我们知道在我们每次新创建的_IO_FILE_plus结构体中,都会被链入一个在libc里的一个全局变量_IO_list_all(我们可以通过这个,遍历程序中所有的IO结构体),这里_IO_link_in函数的功能是检查FILE结构体是否包含_IO_LINKED标志,如果不包含则表示这个结构体没有链接进入_IO_list_all,则再后面把它链接进入_IO_list_all链表,同时设置FILE结构体的_chain字段为之前的链表的值,否则直接返回。

143    _IO_new_file_init (struct _IO_FILE_plus *fp)
144    {
145      /* POSIX.1 allows another file handle to be used to change the position
146         of our file descriptor.  Hence we actually don't know the actual
147         position before we do the first fseek (and until a following fflush). */
148      fp->file._offset = _IO_pos_BAD;
149      fp->file._IO_file_flags |= CLOSED_FILEBUF_FLAGS;
150    
151      _IO_link_in (fp);
152      fp->file._fileno = -1;
153    }

所以_IO_file_init主要功能是将FILE结构体链接进入_IO_list_all链表,在没执行_IO_file_init函数前_IO_list_all指向的是stderr结构体

 在执行后,指向的就是我们新申请的堆空间了

 此时,我们查看查看下new_f->fp指针,发现其chain已经指向了stderr

接着就是调用_IO_file_fopen来打开文件了

_IO_file_fopen ((_IO_FILE *) new_f, filename, mode, is32)

进去查看下这个函数

255    #ifdef _LIBC
256      const char *cs;
257      const char *last_recognized;
258    #endif
259    
260      if (_IO_file_is_open (fp))
261        return 0;
262      switch (*mode)
263        {
264        case 'r':
265          omode = O_RDONLY;
266          read_write = _IO_NO_WRITES;
267          break;
268        case 'w':
269          omode = O_WRONLY;
270          oflags = O_CREAT|O_TRUNC;
271          read_write = _IO_NO_READS;
272          break;
273        case 'a':
274          omode = O_WRONLY;
275          oflags = O_CREAT|O_APPEND;
276          read_write = _IO_NO_READS|_IO_IS_APPENDING;
277          break;
278        default:
279          __set_errno (EINVAL);
280          return NULL;
281        }
282    #ifdef _LIBC
283      last_recognized = mode;
284    #endif
285      for (i = 1; i < 7; ++i)
286        {
287          switch (*++mode)
288        {
289        case '':
290          break;
291        case '+':
292          omode = O_RDWR;
293          read_write &= _IO_IS_APPENDING;
294    #ifdef _LIBC
295          last_recognized = mode;
296    #endif
297          continue;
298        case 'x':
299          oflags |= O_EXCL;
300    #ifdef _LIBC
301          last_recognized = mode;
302    #endif
303          continue;
304        case 'b':
305    #ifdef _LIBC
306          last_recognized = mode;
307    #endif
308          continue;
309        case 'm':
310          fp->_flags2 |= _IO_FLAGS2_MMAP;
311          continue;
312        case 'c':
313          fp->_flags2 |= _IO_FLAGS2_NOTCANCEL;
314          continue;
315        case 'e':
316    #ifdef O_CLOEXEC
317          oflags |= O_CLOEXEC;
318    #endif
319          fp->_flags2 |= _IO_FLAGS2_CLOEXEC;
320          continue;
321        default:
322          /* Ignore.  */
323          continue;
324        }
325          break;
326        }
327    
328      result = _IO_file_open (fp, filename, omode|oflags, oprot, read_write,
329                  is32not64);
330    
331      if (result != NULL)
332        {
333    #ifndef __ASSUME_O_CLOEXEC
334          if ((fp->_flags2 & _IO_FLAGS2_CLOEXEC) != 0 && __have_o_cloexec <= 0)
335        {
336          int fd = _IO_fileno (fp);
337          if (__have_o_cloexec == 0)
338            {
339              int flags = __fcntl (fd, F_GETFD);
340              __have_o_cloexec = (flags & FD_CLOEXEC) == 0 ? -1 : 1;
341            }
342          if (__have_o_cloexec < 0)
343            __fcntl (fd, F_SETFD, FD_CLOEXEC);
344        }
345    #endif
346    
347          /* Test whether the mode string specifies the conversion.  */
348          cs = strstr (last_recognized + 1, ",ccs=");
349          if (cs != NULL)
350        {
351          /* Yep.  Load the appropriate conversions and set the orientation
352             to wide.  */
353          struct gconv_fcts fcts;
354          struct _IO_codecvt *cc;
355          char *endp = __strchrnul (cs + 5, ',');
356          char *ccs = malloc (endp - (cs + 5) + 3);
357    
358          if (ccs == NULL)
359            {
360              int malloc_err = errno;  /* Whatever malloc failed with.  */
361              (void) _IO_file_close_it (fp);
362              __set_errno (malloc_err);
363              return NULL;
364            }
365    
366          *((char *) __mempcpy (ccs, cs + 5, endp - (cs + 5))) = '';
367          strip (ccs, ccs);
368    
369          if (__wcsmbs_named_conv (&fcts, ccs[2] == ''
370                       ? upstr (ccs, cs + 5) : ccs) != 0)
371            {
372              /* Something went wrong, we cannot load the conversion modules.
373             This means we cannot proceed since the user explicitly asked
374             for these.  */
375              (void) _IO_file_close_it (fp);
376              free (ccs);
377              __set_errno (EINVAL);
378              return NULL;
379            }
380    
381          free (ccs);
382    
383          assert (fcts.towc_nsteps == 1);
384          assert (fcts.tomb_nsteps == 1);
385    
386          fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
387          fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
388    
389          /* Clear the state.  We start all over again.  */
390          memset (&fp->_wide_data->_IO_state, '', sizeof (__mbstate_t));
391          memset (&fp->_wide_data->_IO_last_state, '', sizeof (__mbstate_t));
392    
393          cc = fp->_codecvt = &fp->_wide_data->_codecvt;
394    
395          /* The functions are always the same.  */
396          *cc = __libio_codecvt;
397    
398          cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps;
399          cc->__cd_in.__cd.__steps = fcts.towc;
400    
401          cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
402          cc->__cd_in.__cd.__data[0].__internal_use = 1;
403          cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
404          cc->__cd_in.__cd.__data[0].__statep = &result->_wide_data->_IO_state;
405    
406          cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps;
407          cc->__cd_out.__cd.__steps = fcts.tomb;
408    
409          cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
410          cc->__cd_out.__cd.__data[0].__internal_use = 1;
411          cc->__cd_out.__cd.__data[0].__flags
412            = __GCONV_IS_LAST | __GCONV_TRANSLIT;
413          cc->__cd_out.__cd.__data[0].__statep =
414            &result->_wide_data->_IO_state;
415    
416          /* From now on use the wide character callback functions.  */
417          _IO_JUMPS_FILE_plus (fp) = fp->_wide_data->_wide_vtable;
418    
419          /* Set the mode now.  */
420          result->_mode = 1;
421        }
422        }
423    
424      return result;
425    }

 看到这里,我们很快就可以分析出,函数在分析完参数后,就又调用了_IO_file_open函数

(这里我也不能分析的很清楚)

不过我可以猜测,调用了open系统调用函数,并将文件描述符赋给了fp->_fileno成员,最后又再次调用了_IO_link_in确保该结构体被链接进入_IO_list_all链表。

211    _IO_FILE *
212    _IO_file_open (_IO_FILE *fp, const char *filename, int posix_mode, int prot,
213               int read_write, int is32not64)
214    {
215      int fdesc;
216    #ifdef _LIBC
217      if (__glibc_unlikely (fp->_flags2 & _IO_FLAGS2_NOTCANCEL))
218        fdesc = open_not_cancel (filename,
219                     posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
220      else
221        fdesc = open (filename, posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
222    #else
223      fdesc = open (filename, posix_mode, prot);
224    #endif
225      if (fdesc < 0)
226        return NULL;
227      fp->_fileno = fdesc;
228      _IO_mask_flags (fp, read_write,_IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING);
229      /* For append mode, send the file offset to the end of the file.  Don't
230         update the offset cache though, since the file handle is not active.  */
231      if ((read_write & (_IO_IS_APPENDING | _IO_NO_READS))
232          == (_IO_IS_APPENDING | _IO_NO_READS))
233        {
234          _IO_off64_t new_pos = _IO_SYSSEEK (fp, 0, _IO_seek_end);
235          if (new_pos == _IO_pos_BAD && errno != ESPIPE)
236        {
237          close_not_cancel (fdesc);
238          return NULL;
239        }
240        }
241      _IO_link_in ((struct _IO_FILE_plus *) fp);
242      return fp;
243    }
244    libc_hidden_def (_IO_file_open)

 调用完_IO_file_fopen函数后,new_f->fp的_IO_FILE_plus结构体为

总结

这里我感觉raycp师傅比我总结的好,所以我直接cp了过来

看完代码后,可以将fopen整体的流程可以归纳为:

  1. malloc分配内存空间。
  2. _IO_no_init 对file结构体进行null初始化。
  3. _IO_file_init将结构体链接进_IO_list_all链表。
  4. _IO_file_fopen执行系统调用打开文件。

整个流程还是比较简单的,fopen返回之后_IO_list_all链表指向返回的FILE结构体,且FILE结构体的_chain字段指向之前的结构体(没有其他额外打开文件的话,将是指向stderr),同时其他的字段大多都是默认的null值,vtable存储的是__GI__IO_file_jumps函数表

 感悟

这次逆向,大部分都是自己逆的,不过到后面有许多细节没有注意,比如查看结构体的变化之类的,不过大体都是自己逆的,不过还是有许多的不足

参考:https://ray-cp.github.io/archivers/IO_FILE_fopen_analysis

文章的部分转载:https://ray-cp.github.io/archivers/IO_FILE_fopen_analysis

原文地址:https://www.cnblogs.com/pppyyyzzz/p/14280065.html