gdb调试分析多线程死锁

转载:

http://blog.chinaunix.net/uid-30343738-id-5757210.html

#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>

static int sequence1 = 0;
static int sequence2 = 0;

pthread_mutex_t lock1;
pthread_mutex_t lock2;

int func1()
{
    pthread_mutex_lock(&lock1); 
    ++sequence1; 
    sleep(1); 
    pthread_mutex_lock(&lock2); 
    ++sequence2; 
    pthread_mutex_unlock(&lock2); 
    pthread_mutex_unlock(&lock1); 

    return sequence1; 
}

int func2()
{
    pthread_mutex_lock(&lock2); 
    ++sequence2; 
    sleep(1); 
    pthread_mutex_lock(&lock1); 
    ++sequence2; 
    pthread_mutex_unlock(&lock1); 
    pthread_mutex_unlock(&lock2); 

    return sequence1; 
}


void* thread1(void *arg)
{
    int rev = 0;
    while(1)
    {
        rev = func1();
        
        if (rev == 100000)
        {
            pthread_exit(NULL);
        }
    }
}

void* thread2(void *arg)
{
    int rev = 0;
    while(1)
    {
        rev = func2();
        
        if (rev == 100000)
        {
            pthread_exit(NULL);
        }
    }
}

void* thread3(void *arg)
{
    int count = 0;
    while(1)
    {
        sleep(1);
        if ( count++ > 10000)
        {
            pthread_exit(NULL);
        }
    }
}

void* thread4(void *arg)
{
    int count = 0;
    while(1)
    {
        sleep(1);
        if ( count++ > 10000)
        {
            pthread_exit(NULL);
        }
    }
}



int main()
{
    pthread_t tid[4];
    
    pthread_mutex_init(&lock1, NULL);

    pthread_mutex_init(&lock2, NULL);

    

    if(pthread_create(&tid[0], NULL, &thread1, NULL) != 0)
    {
        _exit(1);
    }

    if(pthread_create(&tid[1], NULL, &thread2, NULL) != 0)
    {
        _exit(1);
    }

    if(pthread_create(&tid[2], NULL, &thread3, NULL) != 0)
    {
        _exit(1);
    }

    if(pthread_create(&tid[3], NULL, &thread4, NULL) != 0)
    {
        _exit(1);
    }

    sleep(5);


    pthread_join(tid[0], NULL);
    pthread_join(tid[1], NULL);
    pthread_join(tid[2], NULL);
    pthread_join(tid[3], NULL);


    pthread_mutex_destroy( &lock1 );
    pthread_mutex_destroy( &lock2 );

    return 0;
}

编译执行程序。

gcc -o main main17.c -lpthread -g

 

使用 pstack 和 gdb 工具对死锁程序进行分析

1、使用pstack 

查找测试程序的进程号

root 5383 1 0 06:31 ? 00:00:43 gedit /root/Project/xa/main17.c
root 7197 7179 0 10:04 pts/1 00:00:00 ./main
root 7218 7206 0 10:04 pts/2 00:00:00 grep --color=auto main

对死锁进程第一次执行 pstack(pstack –进程号)的输出结果

 Thread 5 (Thread 0x41e37940 (LWP 6722)): 
 #0  0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0 
 #1  0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0 
 #2  0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0 
 #3  0x0000000000400a9b in func1() () 
 #4  0x0000000000400ad7 in thread1(void*) () 
 #5  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
 #6  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
 Thread 4 (Thread 0x42838940 (LWP 6723)): 
 #0  0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0 
 #1  0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0 
 #2  0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0 
 #3  0x0000000000400a17 in func2() () 
 #4  0x0000000000400a53 in thread2(void*) () 
 #5  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
 #6  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
 Thread 3 (Thread 0x43239940 (LWP 6724)): 
 #0  0x0000003d19c9a541 in nanosleep () from /lib64/libc.so.6 
 #1  0x0000003d19c9a364 in sleep () from /lib64/libc.so.6 
 #2  0x00000000004009bc in thread3(void*) () 
 #3  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
 #4  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
 Thread 2 (Thread 0x43c3a940 (LWP 6725)): 
 #0  0x0000003d19c9a541 in nanosleep () from /lib64/libc.so.6 
 #1  0x0000003d19c9a364 in sleep () from /lib64/libc.so.6 
 #2  0x0000000000400976 in thread4(void*) () 
 #3  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
 #4  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
 Thread 1 (Thread 0x2b984ecabd90 (LWP 6721)): 
 #0  0x0000003d1a807b35 in pthread_join () from /lib64/libpthread.so.0 
 #1  0x0000000000400900 in main ()  

 对死锁进程第二次执行 pstack(pstack –进程号)的输出结果

 Thread 5 (Thread 0x40bd6940 (LWP 6722)): 
 #0  0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0 
 #1  0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0 
 #2  0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0 
 #3  0x0000000000400a87 in func1() () 
 #4  0x0000000000400ac3 in thread1(void*) () 
 #5  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
 #6  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
 Thread 4 (Thread 0x415d7940 (LWP 6723)): 
 #0  0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0 
 #1  0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0 
 #2  0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0 
 #3  0x0000000000400a03 in func2() () 
 #4  0x0000000000400a3f in thread2(void*) () 
 #5  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
 #6  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
 Thread 3 (Thread 0x41fd8940 (LWP 6724)): 
 #0  0x0000003d19c7aec2 in memset () from /lib64/libc.so.6 
 #1  0x00000000004009be in thread3(void*) () 
 #2  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
 #3  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
 Thread 2 (Thread 0x429d9940 (LWP 6725)): 
 #0  0x0000003d19c7ae0d in memset () from /lib64/libc.so.6 
 #1  0x0000000000400982 in thread4(void*) () 
 #2  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
 #3  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
 Thread 1 (Thread 0x2af906fd9d90 (LWP 6721)): 
 #0  0x0000003d1a807b35 in pthread_join () from /lib64/libpthread.so.0 
 #1  0x0000000000400900 in main () 

 

连续多次查看这个进程的函数调用关系堆栈进行分析:当进程吊死时,多次使用 pstack 查看进程的函数调用堆栈,死锁线程将一直处于等锁的状态,对比多次的函数调用堆栈输出结果,

确定哪两个线程(或者几个线程)一直没有变化且一直处于等锁的状态(可能存在两个线程 一直没有变化)。

输出分析:

根据上面的输出对比可以发现,线程 1 和线程 2 由第一次 pstack 输出的处在 sleep 函数变化为第二次 pstack 输出的处在 memset 函数。但是线程 4 和线程 5 一直处在等锁状态(pthread_mutex_lock),

在连续两次的 pstack 信息输出中没有变化,所以我们可以推测线程 4 和线程 5 发生了死锁

 

2、使用gdb进行进一步的分析

查找测试程序的进程号

root 5383 1 0 06:31 ? 00:00:43 gedit /root/Project/xa/main17.c
root 7197 7179 0 10:04 pts/1 00:00:00 ./main
root 7218 7206 0 10:04 pts/2 00:00:00 grep --color=auto main

使用gdb 的attach功能

gdb attach 7197

查看当前进程的线程信息

(gdb) info thread
Id Target Id Frame
5 Thread 0xb7539b40 (LWP 7198) "main" 0xb7717424 in __kernel_vsyscall ()
4 Thread 0xb6d38b40 (LWP 7199) "main" 0xb7717424 in __kernel_vsyscall ()
3 Thread 0xb6537b40 (LWP 7200) "main" 0xb7717424 in __kernel_vsyscall ()
2 Thread 0xb5d36b40 (LWP 7201) "main" 0xb7717424 in __kernel_vsyscall ()
* 1 Thread 0xb753a6c0 (LWP 7197) "main" 0xb7717424 in __kernel_vsyscall ()

 

 切换到线程 5 的输出

(gdb) thread  5
[Switching to thread 5 (Thread 0xb7539b40 (LWP 7198))]
#0 0xb7717424 in __kernel_vsyscall ()
(gdb) where
#0 0xb7717424 in __kernel_vsyscall ()
#1 0xb76f25a2 in __lll_lock_wait () from /lib/i386-linux-gnu/libpthread.so.0
#2 0xb76edead in _L_lock_686 () from /lib/i386-linux-gnu/libpthread.so.0
#3 0xb76edcf3 in pthread_mutex_lock ()
from /lib/i386-linux-gnu/libpthread.so.0
#4 0x0804864b in func1 () at main17.c:17
#5 0x080486ef in thread1 (arg=0x0) at main17.c:44
#6 0xb76ebd4c in start_thread () from /lib/i386-linux-gnu/libpthread.so.0
#7 0xb762adde in clone () from /lib/i386-linux-gnu/libc.so.6
(gdb) f  4
#4 0x0804864b in func1 () at main17.c:17
warning: Source file is more recent than executable.
17 pthread_mutex_lock(&lock2);     ////线程 5 正试图获得锁 lock2

 

切换到线程4的输出

(gdb) thread 4
[Switching to thread 4 (Thread 0xb6d38b40 (LWP 7199))]
#0 0xb7717424 in __kernel_vsyscall ()
(gdb) where
#0 0xb7717424 in __kernel_vsyscall ()
#1 0xb76f25a2 in __lll_lock_wait () from /lib/i386-linux-gnu/libpthread.so.0
#2 0xb76edead in _L_lock_686 () from /lib/i386-linux-gnu/libpthread.so.0
#3 0xb76edcf3 in pthread_mutex_lock ()
from /lib/i386-linux-gnu/libpthread.so.0
#4 0x080486ae in func2 () at main17.c:30
#5 0x0804871c in thread2 (arg=0x0) at main17.c:58
#6 0xb76ebd4c in start_thread () from /lib/i386-linux-gnu/libpthread.so.0
#7 0xb762adde in clone () from /lib/i386-linux-gnu/libc.so.6
(gdb) f 4
#4 0x080486ae in func2 () at main17.c:30
30 pthread_mutex_lock(&lock1);      //线程 4 正试图获得锁 lock1

打印锁的信息

(gdb) p lock1
$1 = {__data = {__lock = 2, __count = 0, __owner = 7198, __kind = 0,
__nusers = 1, {__spins = 0, __list = {__next = 0x0}}},
__size = "0200000000000000363400000000000001000000000000", __align = 2}
(gdb) p lock2
$2 = {__data = {__lock = 2, __count = 0, __owner = 7199, __kind = 0,
__nusers = 1, {__spins = 0, __list = {__next = 0x0}}},
__size = "0200000000000000373400000000000001000000000000", __align = 2}

 

从上面可以发现,线程 4 正试图获得锁 lock1,但是锁 lock1已经被 LWP 为 7198的线程得到(__owner = 7198),

线程 5 正试图获得锁 lock2,但是锁 lock2 已经被 LWP 为 7199的 得到(__owner = 7199),从 pstack 的输出可以发现(gdb info thread),LWP 7198与线程 5 是对应的,LWP 7199与线程 4 是对应的。

所以我们可以得出, 线程 4 和线程 5 发生了交叉持锁的死锁现象。查看线程的源代码发现,线程 4 和线程 5 同时使用 mutex1 和 mutex2,且申请顺序不合理

 

原文地址:https://www.cnblogs.com/zhangxuan/p/6385329.html