glibc源码逆向——fread函数

源码样例

#include<stdio.h>

int main(){
    char data[20];
    FILE*fp=fopen("test","rb");
    fread(data,1,20,fp);
    return 0;
}

源码分析

直接进入fread函数

29    _IO_size_t
30    _IO_fread (void *buf, _IO_size_t size, _IO_size_t count, _IO_FILE *fp)
31    {
32      _IO_size_t bytes_requested = size * count;
33      _IO_size_t bytes_read;
34      CHECK_FILE (fp, 0);
35      if (bytes_requested == 0)
36        return 0;
37      _IO_acquire_lock (fp);
38      bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested);
39      _IO_release_lock (fp);
40      return bytes_requested == bytes_read ? count : bytes_read / size;
41    }

可以知道其主要是调用了_IO_segetn函数

463    _IO_size_t
464    _IO_sgetn (_IO_FILE *fp, void *data, _IO_size_t n)
465    {
466      /* FIXME handle putback buffer here! */
467      return _IO_XSGETN (fp, data, n);
468    }

而_IO_segetn函数又调用了_IO_XSGETN函数,发现这是个宏

#define _IO_XSGETN(FP, DATA, N) JUMP2 (__xsgetn, FP, DATA, N)

跟进去后,发现其调用的是_IO_file_xsgetn函数

  1. 函数一开始会检查fp->_IO_buf_base是否为null,不然就执行_IO_doallobuf函数
  2. 接着就是一个while大循环,如果需要读取的字节,小于等于剩下的字节,want会被置0,并且将want个字节全部读入我们的缓冲区中。
  3. 如果不小于我们剩下的字节,就会判断剩下的字节是否大于0,如果大于0就会将剩下的字节,全部给放入缓冲区中,接着会进入_IO_inbackup,检查备份情况,接着做一下安全的检查判断
  4. 然后通过_IO_setg来进行系统调用
1358    _IO_size_t
1359    _IO_file_xsgetn (_IO_FILE *fp, void *data, _IO_size_t n)
1360    {
1361      _IO_size_t want, have;
1362      _IO_ssize_t count;
1363      char *s = data;
1364    
1365      want = n;
1366    
1367      if (fp->_IO_buf_base == NULL)
1368        {
1369          /* Maybe we already have a push back pointer.  */
1370          if (fp->_IO_save_base != NULL)
1371        {
1372          free (fp->_IO_save_base);
1373          fp->_flags &= ~_IO_IN_BACKUP;
1374        }
1375          _IO_doallocbuf (fp);
1376        }
1377    
1378      while (want > 0)
1379        {
1380          have = fp->_IO_read_end - fp->_IO_read_ptr;
1381          if (want <= have)
1382        {
1383          memcpy (s, fp->_IO_read_ptr, want);
1384          fp->_IO_read_ptr += want;
1385          want = 0;
1386        }
1387          else
1388        {
1389          if (have > 0)
1390            {
1391    #ifdef _LIBC
1392              s = __mempcpy (s, fp->_IO_read_ptr, have);
1393    #else
1394              memcpy (s, fp->_IO_read_ptr, have);
1395              s += have;
1396    #endif
1397              want -= have;
1398              fp->_IO_read_ptr += have;
1399            }
1400    
1401          /* Check for backup and repeat */
1402          if (_IO_in_backup (fp))
1403            {
1404              _IO_switch_to_main_get_area (fp);
1405              continue;
1406            }
1407    
1408          /* If we now want less than a buffer, underflow and repeat
1409             the copy.  Otherwise, _IO_SYSREAD directly to
1410             the user buffer. */
1411          if (fp->_IO_buf_base
1412              && want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base))
1413            {
1414              if (__underflow (fp) == EOF)
1415            break;
1416    
1417              continue;
1418            }
1419    
1420          /* These must be set before the sysread as we might longjmp out
1421             waiting for input. */
1422          _IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
1423          _IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base);
1424    
1425          /* Try to maintain alignment: read a whole number of blocks.  */
1426          count = want;
1427          if (fp->_IO_buf_base)
1428            {
1429              _IO_size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base;
1430              if (block_size >= 128)
1431            count -= want % block_size;
1432            }
1433    
1434          count = _IO_SYSREAD (fp, s, count);
1435          if (count <= 0)
1436            {
1437              if (count == 0)
1438            fp->_flags |= _IO_EOF_SEEN;
1439              else
1440            fp->_flags |= _IO_ERR_SEEN;
1441    
1442              break;
1443            }
1444    
1445          s += count;
1446          want -= count;
1447          if (fp->_offset != _IO_pos_BAD)
1448            _IO_pos_adjust (fp->_offset, count);
1449        }
1450        }
1451    
1452      return n - want;
1453    }
1454    libc_hidden_def (_IO_file_xsgetn)

_IO_doallobuf函数

我们接着进入这个函数看一看

392    void
393    _IO_doallocbuf (_IO_FILE *fp)
394    {
395      if (fp->_IO_buf_base)
396        return;
397      if (!(fp->_flags & _IO_UNBUFFERED) || fp->_mode > 0)
398        if (_IO_DOALLOCATE (fp) != EOF)
399          return;
400      _IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
401    }
402    libc_hidden_def (_IO_doallocbuf)

首先检查了fp->_IO_buf_base是否为空,如果不为空,直接返回,否则检查fp->flags是不是_IO_UNBUFFERED或fp->mode是否大于0,如果满足就会调用_IO_DOALLOCATE函数,接着跟进去

93    int
94    _IO_file_doallocate (_IO_FILE *fp)
95    {
96      _IO_size_t size;
97      char *p;
98      struct stat64 st;
99    
100    #ifndef _LIBC
101      /* If _IO_cleanup_registration_needed is non-zero, we should call the
102         function it points to.  This is to make sure _IO_cleanup gets called
103         on exit.  We call it from _IO_file_doallocate, since that is likely
104         to get called by any program that does buffered I/O. */
105      if (__glibc_unlikely (_IO_cleanup_registration_needed != NULL))
106        (*_IO_cleanup_registration_needed) ();
107    #endif
108    
109      size = _IO_BUFSIZ;
110      if (fp->_fileno >= 0 && __builtin_expect (_IO_SYSSTAT (fp, &st), 0) >= 0)
111        {
112          if (S_ISCHR (st.st_mode))
113        {
114          /* Possibly a tty.  */
115          if (
116    #ifdef DEV_TTY_P
117              DEV_TTY_P (&st) ||
118    #endif
119              local_isatty (fp->_fileno))
120            fp->_flags |= _IO_LINE_BUF;
121        }
122    #if _IO_HAVE_ST_BLKSIZE
123          if (st.st_blksize > 0)
124        size = st.st_blksize;
125    #endif
126        }
127      p = malloc (size);
128      if (__glibc_unlikely (p == NULL))
129        return EOF;
130      _IO_setb (fp, p, p + size, 1);
131      return 1;
132    }
133    libc_hidden_def (_IO_file_doallocate)

略一看,可以知道改函数先进行了一些简单的检查,并且通过_IO_SYSSTAT函数也就是vtable里的__stat函数,获取文件信息,并且st结构体的st_blksize也被赋值为4096

,然后申请一个缓冲区,并且调用了_IO_setb函数

378    void
379    _IO_setb (_IO_FILE *f, char *b, char *eb, int a)
380    {
381      if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF))
382        free (f->_IO_buf_base);
383      f->_IO_buf_base = b;
384      f->_IO_buf_end = eb;
385      if (a)
386        f->_flags &= ~_IO_USER_BUF;
387      else
388        f->_flags |= _IO_USER_BUF;
389    }

改函数只是对fp的_IO_buf_base和_IO_buf_end赋值了,执行完后,此时查看*fp的值

 然后又返回到_IO_file_xsgetn函数中

缓冲区读入

现在已经开始进入while循环中了,此时want为我们还要读取的字符数量,而have为已经读取的字符数量,这第一个if这里,我们可以很清楚的知道来判断是否可以读取完毕

1381       if (want <= have)
   1382     {
   1383       memcpy (s, fp->_IO_read_ptr, want);
   1384       fp->_IO_read_ptr += want;
   1385       want = 0;
   1386     }

接着判断have是否大于0

1389          if (have > 0)
1390            {
1391    #ifdef _LIBC
1392              s = __mempcpy (s, fp->_IO_read_ptr, have);
1393    #else
1394              memcpy (s, fp->_IO_read_ptr, have);
1395              s += have;
1396    #endif
1397              want -= have;
1398              fp->_IO_read_ptr += have;
1399            }

如果大于0,就让want每次都读取一定的have个字符,其实这一部分主要就是读取字符

 

接着就是判断fp->_IO_buf_base是否初始化成功,并且申请的缓冲区是否大于想要的字节数

__underflow函数

接着直接进入__underflow函数,emm这里不太会分析了,只能看raycp师傅写的了

314    int
315    __underflow (_IO_FILE *fp)
316    {
317    #if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
318      if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
319        return EOF;
320    #endif
321    
322      if (fp->_mode == 0)
323        _IO_fwide (fp, -1);
324      if (_IO_in_put_mode (fp))
325        if (_IO_switch_to_get_mode (fp) == EOF)
326          return EOF;
327      if (fp->_IO_read_ptr < fp->_IO_read_end)
328        return *(unsigned char *) fp->_IO_read_ptr;
329      if (_IO_in_backup (fp))
330        {
331          _IO_switch_to_main_get_area (fp);
332          if (fp->_IO_read_ptr < fp->_IO_read_end)
333        return *(unsigned char *) fp->_IO_read_ptr;
334        }
335      if (_IO_have_markers (fp))
336        {
337          if (save_for_backup (fp, fp->_IO_read_end))
338        return EOF;
339        }
340      else if (_IO_have_backup (fp))
341        _IO_free_backup_area (fp);
342      return _IO_UNDERFLOW (fp);
343    }

 函数稍微做一些检查就会调用_IO_UNDERFLOW函数,其中一个检查是如果fp->_IO_read_ptr小于fp->_IO_read_end则表明输入缓冲区里存在数据,可直接返回,否则则表示需要继续读入数据。

接着会进入函数_IO_UNDERFLOW函数,看了raycp师傅的分析,了解到该函数是FILE结构体vtable里的_IO_new_file_underflow函数,跟进去看

529    int
530    _IO_new_file_underflow (_IO_FILE *fp)
531    {
532      _IO_ssize_t count;
533    #if 0
534      /* SysV does not make this test; take it out for compatibility */
535      if (fp->_flags & _IO_EOF_SEEN)
536        return (EOF);
537    #endif
538      /*判断是否有可读标志*/
539      if (fp->_flags & _IO_NO_READS)
540        {
541          fp->_flags |= _IO_ERR_SEEN;
542          __set_errno (EBADF);
543          return EOF;
544        }
      /*如果还有读的数据,直接返回*/
545 if (fp->_IO_read_ptr < fp->_IO_read_end) 546 return *(unsigned char *) fp->_IO_read_ptr; 547   /*如果分配的空间失败,就调用_IO_doallocbuf来分配缓冲区*/ 548 if (fp->_IO_buf_base == NULL) 549 { 550 /* Maybe we already have a push back pointer. */ 551 if (fp->_IO_save_base != NULL) 552 { 553 free (fp->_IO_save_base); 554 fp->_flags &= ~_IO_IN_BACKUP; 555 } 556 _IO_doallocbuf (fp); 557 } 558 559 /* Flush all line buffered files before reading. */ 560 /* FIXME This can/should be moved to genops ?? */ 561 if (fp->_flags & (_IO_LINE_BUF|_IO_UNBUFFERED)) 562 { 563 #if 0 564 _IO_flush_all_linebuffered (); 565 #else 566 /* We used to flush all line-buffered stream. This really isn't 567 required by any standard. My recollection is that 568 traditional Unix systems did this for stdout. stderr better 569 not be line buffered. So we do just that here 570 explicitly. --drepper */ 571 _IO_acquire_lock (_IO_stdout); 572 573 if ((_IO_stdout->_flags & (_IO_LINKED | _IO_NO_WRITES | _IO_LINE_BUF)) 574 == (_IO_LINKED | _IO_LINE_BUF)) 575 _IO_OVERFLOW (_IO_stdout, EOF); 576 577 _IO_release_lock (_IO_stdout); 578 #endif 579 } 580 581 _IO_switch_to_get_mode (fp); 582 583 /* This is very tricky. We have to adjust those 584 pointers before we call _IO_SYSREAD () since 585 we may longjump () out while waiting for 586 input. Those pointers may be screwed up. H.J. */ 587 fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base; 588 fp->_IO_read_end = fp->_IO_buf_base; 589 fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end 590 = fp->_IO_buf_base; 591   /*调用_IO_SYSREAD函数最终指向系统调用读取数据*/ 592 count = _IO_SYSREAD (fp, fp->_IO_buf_base, 593 fp->_IO_buf_end - fp->_IO_buf_base); 594 if (count <= 0) 595 { 596 if (count == 0) 597 fp->_flags |= _IO_EOF_SEEN; 598 else 599 fp->_flags |= _IO_ERR_SEEN, count = 0; 600 }
      /*设置结构体指针*/
601 fp->_IO_read_end += count; 602 if (count == 0) 603 { 604 /* If a stream is read to EOF, the calling application may switch active 605 handles. As a result, our offset cache would no longer be valid, so 606 unset it. */ 607 fp->_offset = _IO_pos_BAD; 608 return EOF; 609 } 610 if (fp->_offset != _IO_pos_BAD) 611 _IO_pos_adjust (fp->_offset, count); 612 return *(unsigned char *) fp->_IO_read_ptr; 613 } 614 libc_hidden_ver (_IO_new_file_underflow, _IO_file_underflow)

 随后返回

小总结

所以fread函数调用流程如下

  • _IO_segetn——>_IO_XSGETN发现就是_IO_file_xsgetn
  • 接着进行了了初始化的判断,是否是第一次打开该文件,随后进入了_IO_doallocbuf进行了初始化,并分配了0x1000的缓冲区
  • 然后进行读取字节,如果是第一次读取,就会进入__underflow函数,并判断缓冲区有没有分配成功,没成功就再分配一次,然后对_IO_FILE进行一些赋值操作,接着进行系统调用,将文件里的信息给读入到缓冲区中

所以我们可以搞清楚一些结构体成员的作用了

struct _IO_FILE {
  int _flags;       /* High-order word is _IO_MAGIC; rest is flags. */
#define _IO_file_flags _flags

  /* The following pointers correspond to the C++ streambuf protocol. */
  /* Note:  Tk uses the _IO_read_ptr and _IO_read_end fields directly. */
    /*当前读取指针读取的位置*/
  char* _IO_read_ptr;   /* Current read pointer */
    /*当前缓冲区的末端*/
  char* _IO_read_end;   /* End of get area. */
    /*读取缓冲区的基址*/
  char* _IO_read_base;  /* Start of putback+get area. */
  char* _IO_write_base; /* Start of put area. */
  char* _IO_write_ptr;  /* Current put pointer. */
  char* _IO_write_end;  /* End of put area. */
    /*缓冲区基址*/
  char* _IO_buf_base;   /* Start of reserve area. */
    /*缓冲区末端*/
  char* _IO_buf_end;    /* End of reserve area. */
  /* The following fields are used to support backing up and undo. */
  char *_IO_save_base; /* Pointer to start of non-current get area. */
  char *_IO_backup_base;  /* Pointer to first valid character of backup area */
  char *_IO_save_end; /* Pointer to end of non-current get area. */

  struct _IO_marker *_markers;

  struct _IO_FILE *_chain;

  int _fileno;
#if 0
  int _blksize;
#else
  int _flags2;
#endif
  _IO_off_t _old_offset; /* This used to be _offset but it's too small.  */

#define __HAVE_COLUMN /* temporary */
  /* 1+column number of pbase(); 0 is unknown. */
  unsigned short _cur_column;
  signed char _vtable_offset;
  char _shortbuf[1];

  /*  char* _save_gptr;  char* _save_egptr; */

  _IO_lock_t *_lock;
#ifdef _IO_USE_OLD_IO_FILE
};

不过在进行最后的系统调用,结构体的成员赋值还是很重要的

587      fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
588      fp->_IO_read_end = fp->_IO_buf_base;
589      fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
590        = fp->_IO_buf_base;
591      /*调用_IO_SYSREAD函数最终指向系统调用读取数据*/
592      count = _IO_SYSREAD (fp, fp->_IO_buf_base,
593                   fp->_IO_buf_end - fp->_IO_buf_base);

 尾声

非常感谢raycp师傅的分析,我大部分也只是照着他的逆了一遍,不过受益匪浅,懂了很多,过一阵子我还打算自己逆一遍malloc calloc free、realloc等函数

参考资料:

IO FILE之fopen详解

原文地址:https://www.cnblogs.com/pppyyyzzz/p/14280126.html