有关binlog的那点事(mysql5.7.13)

binlog作为mysql中最重要的日志之一,能实现异常恢复以及主从复制。

我们主要讨论的是主从复制中的binlog,这里将以mysql5.7.13的源码为主要依据来分析binlog。

在主从复制中,binlog一般使用row模式,在主服务器上是binlog,在副服务器上是relaylog,在sqlinlog.h中is_relay_log这个变量来区分该情况.

在整个mysql中只有一个MYSQL_BIN_LOG实例,那就是在mysqld.h中定义的mysql_bin_log

1.binlog写定位的机制

以下是所有binlog的列表,通过show binary logs可以看到。

这个列表事实上是从mysqlbin-log.index的文件中拉出来的,我们称这个文件为binlog index 文件。

 

这些binlog相关文件一般在数据库路径下面。

这里我们要讲到binlog写的定位机制,其实通过binlog文件名和binlog文件偏移来定位binlog的写位置。

可以查看代码

sqlinlog.h:

 1 class MYSQL_BIN_LOG: public TC_LOG
 2 {
 3   enum enum_log_state { LOG_OPENED, LOG_CLOSED, LOG_TO_BE_OPENED };
 4 
 5   /* LOCK_log is inited by init_pthread_objects() */
 6   mysql_mutex_t LOCK_log;
 7   char *name;
 8   char log_file_name[FN_REFLEN];
 9   char db[NAME_LEN + 1];
10   bool write_error, inited;
11   IO_CACHE log_file;  
12   IO_CACHE index_file;
13   char index_file_name[FN_REFLEN];
14     
View Code

index_file,log_file分别是binlog index文件以及binlog文件的读写io

log_file_name,index_file_name分别是binlog index文件以及binlog文件的文件名

includemy_sys.h:

  1 typedef struct st_io_cache        /* Used when cacheing files */
  2 {
  3   /* Offset in file corresponding to the first byte of uchar* buffer. */
  4   my_off_t pos_in_file;
  5   /*
  6     The offset of end of file for READ_CACHE and WRITE_CACHE.
  7     For SEQ_READ_APPEND it the maximum of the actual end of file and
  8     the position represented by read_end.
  9   */
 10   my_off_t end_of_file;
 11   /* Points to current read position in the buffer */
 12   uchar    *read_pos;
 13   /* the non-inclusive boundary in the buffer for the currently valid read */
 14   uchar  *read_end;
 15   uchar  *buffer;                /* The read buffer */
 16   /* Used in ASYNC_IO */
 17   uchar  *request_pos;
 18 
 19   /* Only used in WRITE caches and in SEQ_READ_APPEND to buffer writes */
 20   uchar  *write_buffer;
 21   /*
 22     Only used in SEQ_READ_APPEND, and points to the current read position
 23     in the write buffer. Note that reads in SEQ_READ_APPEND caches can
 24     happen from both read buffer (uchar* buffer) and write buffer
 25     (uchar* write_buffer).
 26   */
 27   uchar *append_read_pos;
 28   /* Points to current write position in the write buffer */
 29   uchar *write_pos;
 30   /* The non-inclusive boundary of the valid write area */
 31   uchar *write_end;
 32 
 33   /*
 34     Current_pos and current_end are convenience variables used by
 35     my_b_tell() and other routines that need to know the current offset
 36     current_pos points to &write_pos, and current_end to &write_end in a
 37     WRITE_CACHE, and &read_pos and &read_end respectively otherwise
 38   */
 39   uchar  **current_pos, **current_end;
 40 
 41   /*
 42     The lock is for append buffer used in SEQ_READ_APPEND cache
 43     need mutex copying from append buffer to read buffer.
 44   */
 45   mysql_mutex_t append_buffer_lock;
 46   /*
 47     The following is used when several threads are reading the
 48     same file in parallel. They are synchronized on disk
 49     accesses reading the cached part of the file asynchronously.
 50     It should be set to NULL to disable the feature.  Only
 51     READ_CACHE mode is supported.
 52   */
 53   IO_CACHE_SHARE *share;
 54 
 55   /*
 56     A caller will use my_b_read() macro to read from the cache
 57     if the data is already in cache, it will be simply copied with
 58     memcpy() and internal variables will be accordinging updated with
 59     no functions invoked. However, if the data is not fully in the cache,
 60     my_b_read() will call read_function to fetch the data. read_function
 61     must never be invoked directly.
 62   */
 63   int (*read_function)(struct st_io_cache *,uchar *,size_t);
 64   /*
 65     Same idea as in the case of read_function, except my_b_write() needs to
 66     be replaced with my_b_append() for a SEQ_READ_APPEND cache
 67   */
 68   int (*write_function)(struct st_io_cache *,const uchar *,size_t);
 69   /*
 70     Specifies the type of the cache. Depending on the type of the cache
 71     certain operations might not be available and yield unpredicatable
 72     results. Details to be documented later
 73   */
 74   enum cache_type type;
 75   /*
 76     Callbacks when the actual read I/O happens. These were added and
 77     are currently used for binary logging of LOAD DATA INFILE - when a
 78     block is read from the file, we create a block create/append event, and
 79     when IO_CACHE is closed, we create an end event. These functions could,
 80     of course be used for other things
 81   */
 82   IO_CACHE_CALLBACK pre_read;
 83   IO_CACHE_CALLBACK post_read;
 84   IO_CACHE_CALLBACK pre_close;
 85   /*
 86     Counts the number of times, when we were forced to use disk. We use it to
 87     increase the binlog_cache_disk_use and binlog_stmt_cache_disk_use status
 88     variables.
 89   */
 90   ulong disk_writes;
 91   void* arg;                /* for use by pre/post_read */
 92   char *file_name;            /* if used with 'open_cached_file' */
 93   char *dir,*prefix;
 94   File file; /* file descriptor */
 95   PSI_file_key file_key; /* instrumented file key */
 96 
 97   /*
 98     seek_not_done is set by my_b_seek() to inform the upcoming read/write
 99     operation that a seek needs to be preformed prior to the actual I/O
100     error is 0 if the cache operation was successful, -1 if there was a
101     "hard" error, and the actual number of I/O-ed bytes if the read/write was
102     partial.
103   */
104   int    seek_not_done,error;
105   /* buffer_length is memory size allocated for buffer or write_buffer */
106   size_t    buffer_length;
107   /* read_length is the same as buffer_length except when we use async io */
108   size_t  read_length;
109   myf    myflags;            /* Flags used to my_read/my_write */
110   /*
111     alloced_buffer is 1 if the buffer was allocated by init_io_cache() and
112     0 if it was supplied by the user.
113     Currently READ_NET is the only one that will use a buffer allocated
114     somewhere else
115   */
116   my_bool alloced_buffer;
117 } IO_CACHE;
View Code

看英文注释很容易明白pos_in_file就是在文件中的偏移量

再查看sqlinlog.h,这是定位binlog写位置的结构体

 1 typedef struct st_log_info
 2 {
 3   char log_file_name[FN_REFLEN];
 4   my_off_t index_file_offset, index_file_start_offset;
 5   my_off_t pos;
 6   bool fatal; // if the purge happens to give us a negative offset
 7   int entry_index; //used in purge_logs(), calculatd in find_log_pos().
 8   st_log_info()
 9     : index_file_offset(0), index_file_start_offset(0),
10       pos(0), fatal(0), entry_index(0)
11     {
12       memset(log_file_name, 0, FN_REFLEN);
13     }
14 } LOG_INFO;
View Code

log_file_name,pos是binlog文件的文件名以及binlog文件的偏移量,一般pos不使用。

index_file_offset, index_file_start_offset是该binlog在index文件中所处的偏移量以及index文件下一个查找开始的偏移量。

这样我们来看sqlinlog.cc代码find_log_pos以及find_log_next

  1 /**
  2   Find the position in the log-index-file for the given log name.
  3 
  4   @param[out] linfo The found log file name will be stored here, along
  5   with the byte offset of the next log file name in the index file.
  6   @param log_name Filename to find in the index file, or NULL if we
  7   want to read the first entry.
  8   @param need_lock_index If false, this function acquires LOCK_index;
  9   otherwise the lock should already be held by the caller.
 10 
 11   @note
 12     On systems without the truncate function the file will end with one or
 13     more empty lines.  These will be ignored when reading the file.
 14 
 15   @retval
 16     0            ok
 17   @retval
 18     LOG_INFO_EOF            End of log-index-file found
 19   @retval
 20     LOG_INFO_IO        Got IO error while reading file
 21 */
 22 
 23 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
 24                                 bool need_lock_index)
 25 {
 26   int error= 0;
 27   char *full_fname= linfo->log_file_name;
 28   char full_log_name[FN_REFLEN], fname[FN_REFLEN];
 29   size_t log_name_len= 0, fname_len= 0;
 30   DBUG_ENTER("find_log_pos");
 31   full_log_name[0]= full_fname[0]= 0;
 32 
 33   /*
 34     Mutex needed because we need to make sure the file pointer does not
 35     move from under our feet
 36   */
 37   if (need_lock_index)
 38     mysql_mutex_lock(&LOCK_index);
 39   else
 40     mysql_mutex_assert_owner(&LOCK_index);
 41 
 42   // extend relative paths for log_name to be searched
 43   if (log_name)
 44   {
 45     if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
 46     {
 47       error= LOG_INFO_EOF;
 48       goto end;
 49     }
 50   }
 51 
 52   log_name_len= log_name ? strlen(full_log_name) : 0;
 53   DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s", 
 54                        log_name ? log_name : "NULL", full_log_name));
 55 
 56   /* As the file is flushed, we can't get an error here */
 57   my_b_seek(&index_file, (my_off_t) 0);
 58 
 59   for (;;)
 60   {
 61     size_t length;
 62     my_off_t offset= my_b_tell(&index_file);
 63 
 64     DBUG_EXECUTE_IF("simulate_find_log_pos_error",
 65                     error=  LOG_INFO_EOF; break;);
 66     /* If we get 0 or 1 characters, this is the end of the file */
 67     if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
 68     {
 69       /* Did not find the given entry; Return not found or error */
 70       error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
 71       break;
 72     }
 73 
 74     // extend relative paths and match against full path
 75     if (normalize_binlog_name(full_fname, fname, is_relay_log))
 76     {
 77       error= LOG_INFO_EOF;
 78       break;
 79     }
 80     fname_len= strlen(full_fname);
 81 
 82     // if the log entry matches, null string matching anything
 83     if (!log_name ||
 84        (log_name_len == fname_len &&
 85        !memcmp(full_fname, full_log_name, log_name_len)))
 86     {
 87       DBUG_PRINT("info", ("Found log file entry"));
 88       linfo->index_file_start_offset= offset;
 89       linfo->index_file_offset = my_b_tell(&index_file);
 90       break;
 91     }
 92     linfo->entry_index++;
 93   }
 94 
 95 end:  
 96   if (need_lock_index)
 97     mysql_mutex_unlock(&LOCK_index);
 98   DBUG_RETURN(error);
 99 }
100 
101 
102 /**
103   Find the position in the log-index-file for the given log name.
104 
105   @param[out] linfo The filename will be stored here, along with the
106   byte offset of the next filename in the index file.
107 
108   @param need_lock_index If true, LOCK_index will be acquired;
109   otherwise it should already be held by the caller.
110 
111   @note
112     - Before calling this function, one has to call find_log_pos()
113     to set up 'linfo'
114     - Mutex needed because we need to make sure the file pointer does not move
115     from under our feet
116 
117   @retval 0 ok
118   @retval LOG_INFO_EOF End of log-index-file found
119   @retval LOG_INFO_IO Got IO error while reading file
120 */
121 int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock_index)
122 {
123   int error= 0;
124   size_t length;
125   char fname[FN_REFLEN];
126   char *full_fname= linfo->log_file_name;
127 
128   if (need_lock_index)
129     mysql_mutex_lock(&LOCK_index);
130   else
131     mysql_mutex_assert_owner(&LOCK_index);
132 
133   /* As the file is flushed, we can't get an error here */
134   my_b_seek(&index_file, linfo->index_file_offset);
135 
136   linfo->index_file_start_offset= linfo->index_file_offset;
137   if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
138   {
139     error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
140     goto err;
141   }
142 
143   if (fname[0] != 0)
144   {
145     if(normalize_binlog_name(full_fname, fname, is_relay_log))
146     {
147       error= LOG_INFO_EOF;
148       goto err;
149     }
150     length= strlen(full_fname);
151   }
152 
153   linfo->index_file_offset= my_b_tell(&index_file);
154 
155 err:
156   if (need_lock_index)
157     mysql_mutex_unlock(&LOCK_index);
158   return error;
159 }
View Code

可以看到find_log_pos使用过binlog的文件名在index文件中的偏移量来查找,具体是将两个文件名转化为绝对路径来比较的。

而find_log_next通过读取下一行的文件名来查看定位下一个文件。这两个函数被用于重启时恢复数据库等。

最后通过raw_get_current_log可以找到当前binlog的定位,这个函数被用于show master status

int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
{
  strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
  linfo->pos = my_b_safe_tell(&log_file);
  return 0;
}
View Code

2.binlog的文件格式以及更新时机

binlog的文件格式是由binlog_event的格式写入的,可以用show binlog events in 'log file name'或者show binlog events等SQL命令查看。

写binlog文件首先写一个格式信息就是format_desc和previous_gtids两个binlog_event。

写binlog的时机(常见的)

(1)非事务性的语句,如一些ddl,仅仅记录sql语句。

1)关于数据库的 

在sqlsql_db.cc中的 

删除数据库、创建数据库、修改数据库名、升级数据库

2)视图

创建视图、修改视图、删除视图

3)表

创建表,丢弃或导入表,简单地重命名或开关索引,修改表。

3)其他

在sql_base.cc中close_temporary_tables 关闭临时表==>MYSQL_BIN_LOG::write_event

在sp_head.cc中==>MYSQL_BIN_LOG::write_event

4)灾难错误

MYSQL_BIN_LOG::write_incident

(2)事务性的语句,如一些ddl,先让事务开始,有一个开始标识,再提交修改表的信息,然后记录修改前后的内容,最后提交事务。

即以这样的形式记录

 1)insert, update, delete

 2)删除表 回滚

还可以使用程序mysqlbinlog查看:

 1 ../bin/mysqlbinlog --base64-output=DECODE-ROWS -v mysqlbin-log.000001 

3)在rpl_injector.h中 use_table(tablemap) write_row(inster) delete_row update_row commit rollback

4)在transaction.h中

 1 bool trans_check_state(THD *thd);
 2 void trans_reset_one_shot_chistics(THD *thd);
 3 void trans_track_end_trx(THD *thd);
 4 
 5 bool trans_begin(THD *thd, uint flags= 0);
 6 bool trans_commit(THD *thd);
 7 bool trans_commit_implicit(THD *thd);
 8 bool trans_rollback(THD *thd);
 9 bool trans_rollback_implicit(THD *thd);
10 
11 bool trans_commit_stmt(THD *thd);
12 bool trans_rollback_stmt(THD *thd);
13 bool trans_commit_attachable(THD *thd);
14 
15 bool trans_savepoint(THD *thd, LEX_STRING name);
16 bool trans_rollback_to_savepoint(THD *thd, LEX_STRING name);
17 bool trans_release_savepoint(THD *thd, LEX_STRING name);
View Code

(3)binlog异常恢复

这个是指mysql在重启时的异常恢复,代码入口在mysqld.h的4137行 open函数,具体实现在binlog.h的683行

1   int open(const char *opt_name) { return open_binlog(opt_name); }
View Code

open_binlog是通过找到binlog的最后一个binlog文件,具体请看代码

1     do
2     {
3       strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
4     } while (!(error= find_next_log(&log_info, true/*need_lock_index=true*/)));
View Code

接着打开相关文件,开始读取。

 if ((file= open_binlog_file(&log, log_name, &errmsg)) < 0)
    {
      sql_print_error("%s", errmsg);
      goto err;
    }

    my_stat(log_name, &s, MYF(0));
    binlog_size= s.st_size;

    /*
      If the binary log was not properly closed it means that the server
      may have crashed. In that case, we need to call MYSQL_BIN_LOG::recover
      to:

        a) collect logged XIDs;
        b) complete the 2PC of the pending XIDs;
        c) collect the last valid position.

      Therefore, we do need to iterate over the binary log, even if
      total_ha_2pc == 1, to find the last valid group of events written.
      Later we will take this value and truncate the log if need be.
    */
    if ((ev= Log_event::read_log_event(&log, 0, &fdle,
                                       opt_master_verify_checksum)) &&
        ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT &&
        (ev->common_header->flags & LOG_EVENT_BINLOG_IN_USE_F ||
         DBUG_EVALUATE_IF("eval_force_bin_log_recovery", true, false)))
    {
      sql_print_information("Recovering after a crash using %s", opt_name);
      valid_pos= my_b_tell(&log);
      error= recover(&log, (Format_description_log_event *)ev, &valid_pos);
    }
    else
      error=0;
View Code

在recove中实现了恢复,把其中的没有commit的命令执行一遍来实现的。

  1 /**
  2   MYSQLD server recovers from last crashed binlog.
  3 
  4   @param log           IO_CACHE of the crashed binlog.
  5   @param fdle          Format_description_log_event of the crashed binlog.
  6   @param valid_pos     The position of the last valid transaction or
  7                        event(non-transaction) of the crashed binlog.
  8 
  9   @retval
 10     0                  ok
 11   @retval
 12     1                  error
 13 */
 14 int MYSQL_BIN_LOG::recover(IO_CACHE *log, Format_description_log_event *fdle,
 15                             my_off_t *valid_pos)
 16 {
 17   Log_event  *ev;
 18   HASH xids;
 19   MEM_ROOT mem_root;
 20   /*
 21     The flag is used for handling the case that a transaction
 22     is partially written to the binlog.
 23   */
 24   bool in_transaction= FALSE;
 25 
 26   if (! fdle->is_valid() ||
 27       my_hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
 28                    sizeof(my_xid), 0, 0, MYF(0),
 29                    key_memory_binlog_recover_exec))
 30     goto err1;
 31 
 32   init_alloc_root(key_memory_binlog_recover_exec,
 33                   &mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE);
 34 
 35   while ((ev= Log_event::read_log_event(log, 0, fdle, TRUE))
 36          && ev->is_valid())
 37   {
 38     if (ev->get_type_code() == binary_log::QUERY_EVENT &&
 39         !strcmp(((Query_log_event*)ev)->query, "BEGIN"))
 40       in_transaction= TRUE;
 41 
 42     if (ev->get_type_code() == binary_log::QUERY_EVENT &&
 43         !strcmp(((Query_log_event*)ev)->query, "COMMIT"))
 44     {
 45       DBUG_ASSERT(in_transaction == TRUE);
 46       in_transaction= FALSE;
 47     }
 48     else if (ev->get_type_code() == binary_log::XID_EVENT)
 49     {
 50       DBUG_ASSERT(in_transaction == TRUE);
 51       in_transaction= FALSE;
 52       Xid_log_event *xev=(Xid_log_event *)ev;
 53       uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
 54                                       sizeof(xev->xid));
 55       if (!x || my_hash_insert(&xids, x))
 56         goto err2;
 57     }
 58 
 59     /*
 60       Recorded valid position for the crashed binlog file
 61       which did not contain incorrect events. The following
 62       positions increase the variable valid_pos:
 63 
 64       1 -
 65         ...
 66         <---> HERE IS VALID <--->
 67         GTID 
 68         BEGIN
 69         ...
 70         COMMIT
 71         ...
 72          
 73       2 -
 74         ...
 75         <---> HERE IS VALID <--->
 76         GTID 
 77         DDL/UTILITY
 78         ...
 79 
 80       In other words, the following positions do not increase
 81       the variable valid_pos:
 82 
 83       1 -
 84         GTID 
 85         <---> HERE IS VALID <--->
 86         ...
 87 
 88       2 -
 89         GTID 
 90         BEGIN
 91         <---> HERE IS VALID <--->
 92         ...
 93     */
 94     if (!log->error && !in_transaction &&
 95         !is_gtid_event(ev))
 96       *valid_pos= my_b_tell(log);
 97 
 98     delete ev;
 99   }
100 
101   /*
102     Call ha_recover if and only if there is a registered engine that
103     does 2PC, otherwise in DBUG builds calling ha_recover directly
104     will result in an assert. (Production builds would be safe since
105     ha_recover returns right away if total_ha_2pc <= opt_log_bin.)
106    */
107   if (total_ha_2pc > 1 && ha_recover(&xids))
108     goto err2;
109 
110   free_root(&mem_root, MYF(0));
111   my_hash_free(&xids);
112   return 0;
113 
114 err2:
115   free_root(&mem_root, MYF(0));
116   my_hash_free(&xids);
117 err1:
118   sql_print_error("Crash recovery failed. Either correct the problem "
119                   "(if it's, for example, out of memory error) and restart, "
120                   "or delete (or rename) binary log and start mysqld with "
121                   "--tc-heuristic-recover={commit|rollback}");
122   return 1;
123 }
View Code

在通过调用ha_recove对弈每一条记录进行执行的操作,ha_recover使用哈希表存储数据进行恢复操作的

 1 int ha_recover(HASH *commit_list)
 2 {
 3   struct xarecover_st info;
 4   DBUG_ENTER("ha_recover");
 5   info.found_foreign_xids= info.found_my_xids= 0;
 6   info.commit_list= commit_list;
 7   info.dry_run= (info.commit_list == 0 &&
 8                  tc_heuristic_recover == TC_HEURISTIC_NOT_USED);
 9   info.list= NULL;
10 
11   /* commit_list and tc_heuristic_recover cannot be set both */
12   DBUG_ASSERT(info.commit_list == 0 ||
13               tc_heuristic_recover == TC_HEURISTIC_NOT_USED);
14   /* if either is set, total_ha_2pc must be set too */
15   DBUG_ASSERT(info.dry_run || total_ha_2pc>(ulong)opt_bin_log);
16 
17   if (total_ha_2pc <= (ulong)opt_bin_log)
18     DBUG_RETURN(0);
19 
20   if (info.commit_list)
21     sql_print_information("Starting crash recovery...");
22 
23   if (total_ha_2pc > (ulong)opt_bin_log + 1)
24   {
25     if (tc_heuristic_recover == TC_HEURISTIC_RECOVER_ROLLBACK)
26     {
27       sql_print_error("--tc-heuristic-recover rollback strategy is not safe "
28                       "on systems with more than one 2-phase-commit-capable "
29                       "storage engine. Aborting crash recovery.");
30       DBUG_RETURN(1);
31     }
32   }
33   else
34   {
35     /*
36       If there is only one 2pc capable storage engine it is always safe
37       to rollback. This setting will be ignored if we are in automatic
38       recovery mode.
39     */
40     tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK
41     info.dry_run= false;
42   }
43 
44   for (info.len= MAX_XID_LIST_SIZE ;
45        info.list == 0 && info.len > MIN_XID_LIST_SIZE; info.len/= 2)
46   {
47     info.list= (XID *)my_malloc(key_memory_XID,
48                                 info.len * sizeof(XID), MYF(0));
49   }
50   if (!info.list)
51   {
52     sql_print_error(ER(ER_OUTOFMEMORY),
53                     static_cast<int>(info.len * sizeof(XID)));
54     DBUG_RETURN(1);
55   }
56 
57   plugin_foreach(NULL, xarecover_handlerton,
58                  MYSQL_STORAGE_ENGINE_PLUGIN, &info);
59 
60   my_free(info.list);
61   if (info.found_foreign_xids)
62     sql_print_warning("Found %d prepared XA transactions",
63                       info.found_foreign_xids);
64   if (info.dry_run && info.found_my_xids)
65   {
66     sql_print_error("Found %d prepared transactions! It means that mysqld was "
67                     "not shut down properly last time and critical recovery "
68                     "information (last binlog or %s file) was manually deleted"
69                     " after a crash. You have to start mysqld with "
70                     "--tc-heuristic-recover switch to commit or rollback "
71                     "pending transactions.",
72                     info.found_my_xids, opt_tc_log_file);
73     DBUG_RETURN(1);
74   }
75   if (info.commit_list)
76     sql_print_information("Crash recovery finished.");
77   DBUG_RETURN(0);
78 }
View Code

其中plugin_foreach(NULL, xarecover_handlerton,MYSQL_STORAGE_ENGINE_PLUGIN, &info);每一条进行如下xarecover_handlerton操作

 1 static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
 2                                     void *arg)
 3 {
 4   handlerton *hton= plugin_data<handlerton*>(plugin);
 5   struct xarecover_st *info= (struct xarecover_st *) arg;
 6   int got;
 7 
 8   if (hton->state == SHOW_OPTION_YES && hton->recover)
 9   {
10     while ((got= hton->recover(hton, info->list, info->len)) > 0)
11     {
12       sql_print_information("Found %d prepared transaction(s) in %s",
13                             got, ha_resolve_storage_engine_name(hton));
14       for (int i= 0; i < got; i++)
15       {
16         my_xid x= info->list[i].get_my_xid();
17         if (!x) // not "mine" - that is generated by external TM
18         {
19 #ifndef DBUG_OFF
20           char buf[XIDDATASIZE * 4 + 6]; // see xid_to_str
21           XID *xid= info->list + i;
22           sql_print_information("ignore xid %s", xid->xid_to_str(buf));
23 #endif
24           transaction_cache_insert_recovery(info->list + i);
25           info->found_foreign_xids++;
26           continue;
27         }
28         if (info->dry_run)
29         {
30           info->found_my_xids++;
31           continue;
32         }
33         // recovery mode
34         if (info->commit_list ?
35             my_hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 :
36             tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)
37         {
38 #ifndef DBUG_OFF
39           char buf[XIDDATASIZE * 4 + 6]; // see xid_to_str
40           XID *xid= info->list + i;
41           sql_print_information("commit xid %s", xid->xid_to_str(buf));
42 #endif
43           hton->commit_by_xid(hton, info->list + i);
44         }
45         else
46         {
47 #ifndef DBUG_OFF
48           char buf[XIDDATASIZE * 4 + 6]; // see xid_to_str
49           XID *xid= info->list + i;
50           sql_print_information("rollback xid %s", xid->xid_to_str(buf));
51 #endif
52           hton->rollback_by_xid(hton, info->list + i);
53         }
54       }
55       if (got < info->len)
56         break;
57     }
58   }
59   return false;
60 }
View Code

hton->recover(hton, info->list, info->len)是个函数指针

赋值如下:

1 nnobase_hton->recover = innobase_xa_recover
View Code

具体实现如下:

 1 static
 2 int
 3 innobase_xa_recover(
 4 /*================*/
 5     handlerton*    hton,    /*!< in: InnoDB handlerton */
 6     XID*        xid_list,/*!< in/out: prepared transactions */
 7     uint        len)    /*!< in: number of slots in xid_list */
 8 {
 9     return(trx_recover_for_mysql(xid_list, len));
10 }
11 
12 int
13 trx_recover_for_mysql(
14 /*==================*/
15     XID*    xid_list,    /*!< in/out: prepared transactions */
16     ulint    len)        /*!< in: number of slots in xid_list */
17 {
18     const trx_t*    trx;
19     ulint        count = 0;
20 
21     ut_ad(xid_list);
22     ut_ad(len);
23 
24     /* We should set those transactions which are in the prepared state
25     to the xid_list */
26 
27     trx_sys_mutex_enter();
28 
29     for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
30          trx != NULL;
31          trx = UT_LIST_GET_NEXT(trx_list, trx)) {
32 
33         assert_trx_in_rw_list(trx);
34 
35         /* The state of a read-write transaction cannot change
36         from or to NOT_STARTED while we are holding the
37         trx_sys->mutex. It may change to PREPARED, but not if
38         trx->is_recovered. It may also change to COMMITTED. */
39         if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
40             xid_list[count] = *trx->xid;
41 
42             if (count == 0) {
43                 ib::info() << "Starting recovery for"
44                     " XA transactions...";
45             }
46 
47             ib::info() << "Transaction "
48                 << trx_get_id_for_print(trx)
49                 << " in prepared state after recovery";
50 
51             ib::info() << "Transaction contains changes to "
52                 << trx->undo_no << " rows";
53 
54             count++;
55 
56             if (count == len) {
57                 break;
58             }
59         }
60     }
61 
62     trx_sys_mutex_exit();
63 
64     if (count > 0){
65         ib::info() << count << " transactions in prepared state"
66             " after recovery";
67     }
68 
69     return(int (count));
70 }
View Code
原文地址:https://www.cnblogs.com/onlyac/p/5777316.html