innodb事务锁的一些常见数据结构

lock_sys_t

整个innodb的锁系统管理结构体，定义在lock0lock.h中。在lock0lock.cc中有一个lock_sys_t的全局指针lock_sys, 由lock_sys_create分配。

/** The lock system struct */
struct lock_sys_t{
	char		pad1[CACHE_LINE_SIZE];	/*!< padding to prevent other
						memory update hotspots from
						residing on the same memory
						cache line */
	LockMutex	mutex;			/*!< Mutex protecting the
						locks */
	/*记录锁的哈希表*/
	hash_table_t*	rec_hash;		/*!< hash table of the record
						locks */
	/*谓词锁的哈希表*/
	hash_table_t*	prdt_hash;		/*!< hash table of the predicate
						lock */
	/*页锁的哈希表*/
	hash_table_t*	prdt_page_hash;		/*!< hash table of the page
						lock */

	char		pad2[CACHE_LINE_SIZE];	/*!< Padding */
	LockMutex	wait_mutex;		/*!< Mutex protecting the
						next two fields */
	/*存放因获取锁而阻塞的线程的数组*/
	srv_slot_t*	waiting_threads;	/*!< Array  of user threads
						suspended while waiting for
						locks within InnoDB, protected
						by the lock_sys->wait_mutex */
	/*就是数组的最后一个值，在遍历的时候用作边界*/
	srv_slot_t*	last_slot;		/*!< highest slot ever used
						in the waiting_threads array,
						protected by
						lock_sys->wait_mutex */
	ibool		rollback_complete;
						/*!< TRUE if rollback of all
						recovered transactions is
						complete. Protected by
						lock_sys->mutex */

	ulint		n_lock_max_wait_time;	/*!< Max wait time */

    /*检查锁等待超时的线程使用的信号量*/
	os_event_t	timeout_event;		/*!< Set to the event that is
						created in the lock wait monitor
						thread. A value of 0 means the
						thread is not active */

	bool		timeout_thread_active;	/*!< True if the timeout thread
						is running */
};

其中的rec_hash，prdt_hash， prdt_page_hash分别为行锁、表锁、页锁的哈希表。

lock_rec_t

描述记录锁的结构体，定义在lock0priv.h

struct lock_rec_t {
	ib_uint32_t	space;		/*!< space id */
	ib_uint32_t	page_no;	/*!< page number */
	ib_uint32_t	n_bits;		/*!< number of bits in the lock
					bitmap; NOTE: the lock bitmap is
					placed immediately after the
					lock struct */

	/** Print the record lock into the given output stream
	@param[in,out]	out	the output stream
	@return the given output stream. */
	std::ostream& print(std::ostream& out) const;
};

记录锁通过space:page_no:heap_no来唯一确定，用space和page_no来计算在哈希表使用的key，这个结构体实际上分配的内存会比本身大，后面紧跟着的是一个位图，每一个位用来表示在这个页上的一个记录是否需要加锁，每一个位的偏移量即每条记录的heap_no。

lock_table_t

表锁结构体, 定义在lock0priv.h中

/** A table lock */
struct lock_table_t {
    /*指向的表*/
	dict_table_t*	table;		/*!< database table in dictionary
					cache */
	/*将同一个表的加锁连在一起*/
	UT_LIST_NODE_T(lock_t)
			locks;		/*!< list of locks on the same
					table */
	/** Print the table lock into the given output stream
	@param[in,out]	out	the output stream
	@return the given output stream. */
	std::ostream& print(std::ostream& out) const;
};

lock_t

定义于lock0prive.h中通用的锁结构体类型，各种类型的锁都可以用这种结构体表达，lock_sys_t中的哈希表存放的锁也是这种类型的锁结构体。

/** Lock struct; protected by lock_sys->mutex */
struct lock_t {
    /*这个锁属于哪个事务*/
	trx_t*		trx;		/*!< transaction owning the
					lock */
	/*事务中拥有的锁通过一个链表连接起来*/
	UT_LIST_NODE_T(lock_t)
			trx_locks;	/*!< list of the locks of the
					transaction */

	dict_index_t*	index;		/*!< index for a record lock */
    
    /*哈希表中同一个key值的节点使用链表连接起来*/
	lock_t*		hash;		/*!< hash chain node for a record
					lock. The link node in a singly linked
					list, used during hashing. */

    /*如果是表锁则为lock_table_t，如果是记录锁则为lock_rec_t，通过type_mode来判断类型*/
	union {
		lock_table_t	tab_lock;/*!< table lock */
		lock_rec_t	rec_lock;/*!< record lock */
	} un_member;			/*!< lock details */
    
    /*这个整数的各个位用于表达锁的类型,type_mode的各个位的定义在lock0lock.h中(LOCK_GAP等宏)*/
	ib_uint32_t	type_mode;	/*!< lock type, mode, LOCK_GAP or
					LOCK_REC_NOT_GAP,
					LOCK_INSERT_INTENTION,
					wait flag, ORed */

	/** Determine if the lock object is a record lock.
	@return true if record lock, false otherwise. */
	bool is_record_lock() const
	{
		return(type() == LOCK_REC);
	}

	bool is_waiting() const
	{
		return(type_mode & LOCK_WAIT);
	}

	bool is_gap() const
	{
		return(type_mode & LOCK_GAP);
	}

	bool is_record_not_gap() const
	{
		return(type_mode & LOCK_REC_NOT_GAP);
	}

	bool is_insert_intention() const
	{
		return(type_mode & LOCK_INSERT_INTENTION);
	}

	ulint type() const {
		return(type_mode & LOCK_TYPE_MASK);
	}

	enum lock_mode mode() const
	{
		return(static_cast<enum lock_mode>(type_mode & LOCK_MODE_MASK));
	}

	/** Print the lock object into the given output stream.
	@param[in,out]	out	the output stream
	@return the given output stream. */
	std::ostream& print(std::ostream& out) const;

	/** Convert the member 'type_mode' into a human readable string.
	@return human readable string */
	std::string type_mode_string() const;

	const char* type_string() const
	{
		switch (type_mode & LOCK_TYPE_MASK) {
		case LOCK_REC:
			return("LOCK_REC");
		case LOCK_TABLE:
			return("LOCK_TABLE");
		default:
			ut_error;
		}
	}
};

trx_t

这个结构体代表一个事务，在锁系统中主要是成员trx_lock_t

struct trx_t {
    ···
	trx_lock_t	lock;		/*!< Information about the transaction
					locks and state. Protected by
					trx->mutex or lock_sys->mutex
					or both */
	···
}

trx_lock_t

事务处理中与锁系统相关的部分，是trx_t的成员

struct trx_lock_t {
	ulint		n_active_thrs;	/*!< number of active query threads */

    /*事务的状态*/
	trx_que_t	que_state;	/*!< valid when trx->state
					== TRX_STATE_ACTIVE: TRX_QUE_RUNNING,
					TRX_QUE_LOCK_WAIT, ... */
    /*事务正在请求的锁*/
	lock_t*		wait_lock;	/*!< if trx execution state is
					TRX_QUE_LOCK_WAIT, this points to
					the lock request, otherwise this is
					NULL; set to non-NULL when holding
					both trx->mutex and lock_sys->mutex;
					set to NULL when holding
					lock_sys->mutex; readers should
					hold lock_sys->mutex, except when
					they are holding trx->mutex and
					wait_lock==NULL */
	/*死锁检测的时候用于标记是否访问过*/
	ib_uint64_t	deadlock_mark;	/*!< A mark field that is initialized
					to and checked against lock_mark_counter
					by lock_deadlock_recursive(). */
	/*事务发生了死锁，在死锁检测中被选为进行回滚的事务*/
	bool		was_chosen_as_deadlock_victim;
					/*!< when the transaction decides to
					wait for a lock, it sets this to false;
					if another transaction chooses this
					transaction as a victim in deadlock
					resolution, it sets this to true.
					Protected by trx->mutex. */
	time_t		wait_started;	/*!< lock wait started at this time,
					protected only by lock_sys->mutex */

	que_thr_t*	wait_thr;	/*!< query thread belonging to this
					trx that is in QUE_THR_LOCK_WAIT
					state. For threads suspended in a
					lock wait, this is protected by
					lock_sys->mutex. Otherwise, this may
					only be modified by the thread that is
					serving the running transaction. */

	lock_pool_t	rec_pool;	/*!< Pre-allocated record locks */

	lock_pool_t	table_pool;	/*!< Pre-allocated table locks */

	ulint		rec_cached;	/*!< Next free rec lock in pool */

	ulint		table_cached;	/*!< Next free table lock in pool */

	mem_heap_t*	lock_heap;	/*!< memory heap for trx_locks;
					protected by lock_sys->mutex */
    /*指向所有已经获取的锁*/
	trx_lock_list_t trx_locks;	/*!< locks requested by the transaction;
					insertions are protected by trx->mutex
					and lock_sys->mutex; removals are
					protected by lock_sys->mutex */

	lock_pool_t	table_locks;	/*!< All table locks requested by this
					transaction, including AUTOINC locks */

	bool		cancel;		/*!< true if the transaction is being
					rolled back either via deadlock
					detection or due to lock timeout. The
					caller has to acquire the trx_t::mutex
					in order to cancel the locks. In
					lock_trx_table_locks_remove() we
					check for this cancel of a transaction's
					locks and avoid reacquiring the trx
					mutex to prevent recursive deadlocks.
					Protected by both the lock sys mutex
					and the trx_t::mutex. */
	ulint		n_rec_locks;	/*!< number of rec locks in this trx */

	/** The transaction called ha_innobase::start_stmt() to
	lock a table. Most likely a temporary table. */
	bool		start_stmt;
};

DeadlockChecker

死锁检测类，每次加锁的时候都使用这个类进行死锁检测。

/** Deadlock checker. */
class DeadlockChecker {
public:
	/** Checks if a joining lock request results in a deadlock. If
	a deadlock is found this function will resolve the deadlock
	by choosing a victim transaction and rolling it back. It
	will attempt to resolve all deadlocks. The returned transaction
	id will be the joining transaction id or 0 if some other
	transaction was chosen as a victim and rolled back or no
	deadlock found.

	@param lock lock the transaction is requesting
	@param trx transaction requesting the lock

	@return id of transaction chosen as victim or 0 */
	/*外部调用接口*/
	static const trx_t* check_and_resolve(
		const lock_t*	lock,
		trx_t*		trx);

private:
    /*构造函数为私有函数，由静态成员函数check_and_resolve创建每次进行死锁检测的时候的DeadlockChecker类实例*/
	/** Do a shallow copy. Default destructor OK.
	@param trx the start transaction (start node)
	@param wait_lock lock that a transaction wants
	@param mark_start visited node counter */
	DeadlockChecker(
		const trx_t*	trx,
		const lock_t*	wait_lock,
		ib_uint64_t	mark_start)
		:
		m_cost(),
		m_start(trx),
		m_too_deep(),
		m_wait_lock(wait_lock),
		m_mark_start(mark_start),
		m_n_elems()
	{
	}

	/** Check if the search is too deep. */
	bool is_too_deep() const
	{
		return(m_n_elems > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
		       || m_cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK);
	}

	/** Save current state.
	@param lock lock to push on the stack.
	@param heap_no the heap number to push on the stack.
	@return false if stack is full. */
	bool push(const lock_t*	lock, ulint heap_no)
	{
		ut_ad((lock_get_type_low(lock) & LOCK_REC)
		      || (lock_get_type_low(lock) & LOCK_TABLE));

		ut_ad(((lock_get_type_low(lock) & LOCK_TABLE) != 0)
		      == (heap_no == ULINT_UNDEFINED));

		/* Ensure that the stack is bounded. */
		if (m_n_elems >= UT_ARR_SIZE(s_states)) {
			return(false);
		}

		state_t&	state = s_states[m_n_elems++];

		state.m_lock = lock;
		state.m_wait_lock = m_wait_lock;
		state.m_heap_no =heap_no;

		return(true);
	}

	/** Restore state.
	@param[out] lock current lock
	@param[out] heap_no current heap_no */
	void pop(const lock_t*& lock, ulint& heap_no)
	{
		ut_a(m_n_elems > 0);

		const state_t&	state = s_states[--m_n_elems];

		lock = state.m_lock;
		heap_no = state.m_heap_no;
		m_wait_lock = state.m_wait_lock;
	}

	/** Check whether the node has been visited.
	@param lock lock to check
	@return true if the node has been visited */
	bool is_visited(const lock_t* lock) const
	{
		return(lock->trx->lock.deadlock_mark > m_mark_start);
	}

	/** Get the next lock in the queue that is owned by a transaction
	whose sub-tree has not already been searched.
	Note: "next" here means PREV for table locks.
	@param lock Lock in queue
	@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
	@return next lock or NULL if at end of queue */
	const lock_t* get_next_lock(const lock_t* lock, ulint heap_no) const;

	/** Get the first lock to search. The search starts from the current
	wait_lock. What we are really interested in is an edge from the
	current wait_lock's owning transaction to another transaction that has
	a lock ahead in the queue. We skip locks where the owning transaction's
	sub-tree has already been searched.

	Note: The record locks are traversed from the oldest lock to the
	latest. For table locks we go from latest to oldest.

	For record locks, we first position the iterator on first lock on
	the page and then reposition on the actual heap_no. This is required
	due to the way the record lock has is implemented.

	@param[out] heap_no if rec lock, else ULINT_UNDEFINED.

	@return first lock or NULL */
	const lock_t* get_first_lock(ulint* heap_no) const;

	/** Notify that a deadlock has been detected and print the conflicting
	transaction info.
	@param lock lock causing deadlock */
	void notify(const lock_t* lock) const;

	/** Select the victim transaction that should be rolledback.
	@return victim transaction */
	const trx_t* select_victim() const;

	/** Rollback transaction selected as the victim. */
	void trx_rollback();

	/** Looks iteratively for a deadlock. Note: the joining transaction
	may have been granted its lock by the deadlock checks.

	@return 0 if no deadlock else the victim transaction.*/
	const trx_t* search();

	/** Print transaction data to the deadlock file and possibly to stderr.
	@param trx transaction
	@param max_query_len max query length to print */
	static void print(const trx_t* trx, ulint max_query_len);

	/** rewind(3) the file used for storing the latest detected deadlock
	and print a heading message to stderr if printing of all deadlocks to
	stderr is enabled. */
	static void start_print();

	/** Print lock data to the deadlock file and possibly to stderr.
	@param lock record or table type lock */
	static void print(const lock_t* lock);

	/** Print a message to the deadlock file and possibly to stderr.
	@param msg message to print */
	static void print(const char* msg);

	/** Print info about transaction that was rolled back.
	@param trx transaction rolled back
	@param lock lock trx wants */
	static void rollback_print(const trx_t* trx, const lock_t* lock);

private:
	/** DFS state information, used during deadlock checking. */
	struct state_t {
		const lock_t*	m_lock;		/*!< Current lock */
		const lock_t*	m_wait_lock;	/*!< Waiting for lock */
		ulint		m_heap_no;	/*!< heap number if rec lock */
	};

	/** Used in deadlock tracking. Protected by lock_sys->mutex. */
	static ib_uint64_t	s_lock_mark_counter;

	/** Calculation steps thus far. It is the count of the nodes visited. */
	ulint			m_cost;

	/** Joining transaction that is requesting a lock in an
	incompatible mode */
	const trx_t*		m_start;

	/** TRUE if search was too deep and was aborted */
	bool			m_too_deep;

	/** Lock that trx wants */
	const lock_t*		m_wait_lock;

	/**  Value of lock_mark_count at the start of the deadlock check. */
	ib_uint64_t		m_mark_start;

	/** Number of states pushed onto the stack */
	size_t			m_n_elems;

	/** This is to avoid malloc/free calls. */
	static state_t		s_states[MAX_STACK_SIZE];
};