nginx在收到stop信号后的处理

1 启动时指定信号处理函数

在nginx启动的时候就会指定信号的处理函数:

ngx_int_t
ngx_init_signals(ngx_log_t *log)
{
    ngx_signal_t      *sig;
    struct sigaction   sa;

    for (sig = signals; sig->signo != 0; sig++) {
        ngx_memzero(&sa, sizeof(struct sigaction));

        if (sig->handler) {
            sa.sa_sigaction = sig->handler;
            sa.sa_flags = SA_SIGINFO;

        } else {
            sa.sa_handler = SIG_IGN;
        }

        sigemptyset(&sa.sa_mask);
        if (sigaction(sig->signo, &sa, NULL) == -1) {
#if (NGX_VALGRIND)
            ngx_log_error(NGX_LOG_ALERT, log, ngx_errno,
                          "sigaction(%s) failed, ignored", sig->signame);
#else
            ngx_log_error(NGX_LOG_EMERG, log, ngx_errno,
                          "sigaction(%s) failed", sig->signame);
            return NGX_ERROR;
#endif
        }
    }

    return NGX_OK;
}

其中,signals结构体如下:

 1 ngx_signal_t  signals[] = {
 2     { ngx_signal_value(NGX_RECONFIGURE_SIGNAL),
 3       "SIG" ngx_value(NGX_RECONFIGURE_SIGNAL),
 4       "reload",
 5       ngx_signal_handler },
 6 
 7     { ngx_signal_value(NGX_REOPEN_SIGNAL),
 8       "SIG" ngx_value(NGX_REOPEN_SIGNAL),
 9       "reopen",
10       ngx_signal_handler },
11 
12     { ngx_signal_value(NGX_NOACCEPT_SIGNAL),
13       "SIG" ngx_value(NGX_NOACCEPT_SIGNAL),
14       "",
15       ngx_signal_handler },
16 
17     { ngx_signal_value(NGX_TERMINATE_SIGNAL),
18       "SIG" ngx_value(NGX_TERMINATE_SIGNAL),
19       "stop",
20       ngx_signal_handler },
21 
22     { ngx_signal_value(NGX_SHUTDOWN_SIGNAL),
23       "SIG" ngx_value(NGX_SHUTDOWN_SIGNAL),
24       "quit",
25       ngx_signal_handler },
26 
27     { ngx_signal_value(NGX_CHANGEBIN_SIGNAL),
28       "SIG" ngx_value(NGX_CHANGEBIN_SIGNAL),
29       "",
30       ngx_signal_handler },
31 
32     { SIGALRM, "SIGALRM", "", ngx_signal_handler },
33 
34     { SIGINT, "SIGINT", "", ngx_signal_handler },
35 
36     { SIGIO, "SIGIO", "", ngx_signal_handler },
37 
38     { SIGCHLD, "SIGCHLD", "", ngx_signal_handler },
39 
40     { SIGSYS, "SIGSYS, SIG_IGN", "", NULL },
41 
42     { SIGPIPE, "SIGPIPE, SIG_IGN", "", NULL },
43 
44     { 0, NULL, "", NULL }
45 }



2 执行nginx -s stop后的处理过程

执行nginx -s stop会启动一个新的进程,这个进程的任务主要是打开之前的pid文件,然后向之前的master进程发送一个NGX_TERMINATE_SIGNAL 信号(这个只是个代号,真正的信号的名字是由字符串拼接的,跟下就能找到)。然后退出

在ngx_get_options中判断参数类型,把静态char指针ngx_signal赋值stop

在ngx_signal_process函数中获取master进程号,并调用ngx_os_signal_process

在ngx_os_signal_process函数中根据信号类型向master发送信号:

 1 for (sig = signals; sig->signo != 0; sig++) {
 2         if (ngx_strcmp(name, sig->name) == 0) {
 3             if (kill(pid, sig->signo) != -1) {
 4                 return 0;
 5             }
 6 
 7             ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
 8                           "kill(%P, %d) failed", pid, sig->signo);
 9         }
10     }

3 master收到信号后的处理

不难看出在nginx启动时,就指定了信号的处理函数,而收到stop信号的处理函数为ngx_signal_handler,在这个函数中设置了全局变量ngx_ternimate = 1;

  1 static void
  2 ngx_signal_handler(int signo, siginfo_t *siginfo, void *ucontext)
  3 {
  4     char            *action;
  5     ngx_int_t        ignore;
  6     ngx_err_t        err;
  7     ngx_signal_t    *sig;
  8 
  9     ignore = 0;
 10 
 11     err = ngx_errno;
 12 
 13     for (sig = signals; sig->signo != 0; sig++) {
 14         if (sig->signo == signo) {
 15             break;
 16         }
 17     }
 18 
 19     ngx_time_sigsafe_update();
 20 
 21     action = "";
 22 
 23     switch (ngx_process) {
 24 
 25     case NGX_PROCESS_MASTER:
 26     case NGX_PROCESS_SINGLE:
 27         switch (signo) {
 28 
 29         case ngx_signal_value(NGX_SHUTDOWN_SIGNAL):
 30             ngx_quit = 1;
 31             action = ", shutting down";
 32             break;
 33 
 34         case ngx_signal_value(NGX_TERMINATE_SIGNAL):
 35         case SIGINT:
 36             ngx_terminate = 1;
 37             action = ", exiting";
 38             break;
 39 
 40         case ngx_signal_value(NGX_NOACCEPT_SIGNAL):
 41             if (ngx_daemonized) {
 42                 ngx_noaccept = 1;
 43                 action = ", stop accepting connections";
 44             }
 45             break;
 46 
 47         case ngx_signal_value(NGX_RECONFIGURE_SIGNAL):
 48             ngx_reconfigure = 1;
 49             action = ", reconfiguring";
 50             break;
 51 
 52         case ngx_signal_value(NGX_REOPEN_SIGNAL):
 53             ngx_reopen = 1;
 54             action = ", reopening logs";
 55             break;
 56 
 57         case ngx_signal_value(NGX_CHANGEBIN_SIGNAL):
 58             if (ngx_getppid() == ngx_parent || ngx_new_binary > 0) {
 59 
 60                 /*
 61                  * Ignore the signal in the new binary if its parent is
 62                  * not changed, i.e. the old binary's process is still
 63                  * running.  Or ignore the signal in the old binary's
 64                  * process if the new binary's process is already running.
 65                  */
 66 
 67                 action = ", ignoring";
 68                 ignore = 1;
 69                 break;
 70             }
 71 
 72             ngx_change_binary = 1;
 73             action = ", changing binary";
 74             break;
 75 
 76         case SIGALRM:
 77             ngx_sigalrm = 1;
 78             break;
 79 
 80         case SIGIO:
 81             ngx_sigio = 1;
 82             break;
 83 
 84         case SIGCHLD:
 85             ngx_reap = 1;
 86             break;
 87         }
 88 
 89         break;
 90 
 91     case NGX_PROCESS_WORKER:
 92     case NGX_PROCESS_HELPER:
 93         switch (signo) {
 94 
 95         case ngx_signal_value(NGX_NOACCEPT_SIGNAL):
 96             if (!ngx_daemonized) {
 97                 break;
 98             }
 99             ngx_debug_quit = 1;
100             /* fall through */
101         case ngx_signal_value(NGX_SHUTDOWN_SIGNAL):
102             ngx_quit = 1;
103             action = ", shutting down";
104             break;
105 
106         case ngx_signal_value(NGX_TERMINATE_SIGNAL):
107         case SIGINT:
108             ngx_terminate = 1;
109             action = ", exiting";
110             break;
111 
112         case ngx_signal_value(NGX_REOPEN_SIGNAL):
113             ngx_reopen = 1;
114             action = ", reopening logs";
115             break;
116 
117         case ngx_signal_value(NGX_RECONFIGURE_SIGNAL):
118         case ngx_signal_value(NGX_CHANGEBIN_SIGNAL):
119         case SIGIO:
120             action = ", ignoring";
121             break;
122         }
123 
124         break;
125     }
126 
127     if (siginfo && siginfo->si_pid) {
128         ngx_log_error(NGX_LOG_NOTICE, ngx_cycle->log, 0,
129                       "signal %d (%s) received from %P%s",
130                       signo, sig->signame, siginfo->si_pid, action);
131 
132     } else {
133         ngx_log_error(NGX_LOG_NOTICE, ngx_cycle->log, 0,
134                       "signal %d (%s) received%s",
135                       signo, sig->signame, action);
136     }
137 
138     if (ignore) {
139         ngx_log_error(NGX_LOG_CRIT, ngx_cycle->log, 0,
140                       "the changing binary signal is ignored: "
141                       "you should shutdown or terminate "
142                       "before either old or new binary's process");
143     }
144 
145     if (signo == SIGCHLD) {
146         ngx_process_get_status();
147     }
148 
149     ngx_set_errno(err);
150 }

设置了ngx_terimate之后直接影响master主循环体中这一段:

 1 void
 2 ngx_master_process_cycle(ngx_cycle_t *cycle)
 3 {
 4     ...
 5     for ( ;; ) {
 6         if (ngx_reap) {
 7             ngx_reap = 0;
 8             ngx_log_debug0(NGX_LOG_DEBUG_EVENT, cycle->log, 0, "reap children");
 9 
10             live = ngx_reap_children(cycle);
11         }
12 
13         if (!live && (ngx_terminate || ngx_quit)) {
14             ngx_master_process_exit(cycle);
15         }
16 
17         if (ngx_terminate) {
18             if (delay == 0) {
19                 delay = 50;
20             }
21 
22             if (sigio) {
23                 sigio--;
24                 continue;
25             }
26 
27             sigio = ccf->worker_processes + 2 /* cache processes */;
28 
29             if (delay > 1000) {
30                 ngx_signal_worker_processes(cycle, SIGKILL);
31             } else {
32                 ngx_signal_worker_processes(cycle,
33                                        ngx_signal_value(NGX_TERMINATE_SIGNAL));
34             }
35 
36             continue;
37         }
38     }

ngx_signal_worker_processes函数中,首先通过ipc通信write channel 向所有worker进程发送terminal 信号。发送失败的情况下,直接kill。

master进程首先设置了delay=50,并在下一次循环中设置delay时间的定时器,

 1 if (delay) {
 2             if (ngx_sigalrm) {
 3                 sigio = 0;
 4                 delay *= 2;
 5                 ngx_sigalrm = 0;
 6             }
 7 
 8             ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
 9                            "termination cycle: %M", delay);
10 
11             itv.it_interval.tv_sec = 0;
12             itv.it_interval.tv_usec = 0;
13             itv.it_value.tv_sec = delay / 1000;
14             itv.it_value.tv_usec = (delay % 1000 ) * 1000;
15 
16             if (setitimer(ITIMER_REAL, &itv, NULL) == -1) {
17                 ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
18                               "setitimer() failed");
19             }
20         }

定时时间到的话,master进程会收到一个SIGALRM信号,然后会把delay*2,然后再设置一个定时器..直到定时时间超过1000(某个阈值时间)直接通过socket发送kill信号,再失败的话,就直接调用kill函数。

1 if (delay > 1000) {
2                 ngx_signal_worker_processes(cycle, SIGKILL);
3             } else {
4                 ngx_signal_worker_processes(cycle,
5                                        ngx_signal_value(NGX_TERMINATE_SIGNAL));
6             }

当子进程正常处理完master发出的信号,并退出后,内核会发出一个SIGCHLD信号,master收到这个信号后调用waitpid来回收子进程。

1 if (signo == SIGCHLD) {
2         ngx_process_get_status();
3     }
 1 static void
 2 ngx_process_get_status(void)
 3 {
 4     int              status;
 5     char            *process;
 6     ngx_pid_t        pid;
 7     ngx_err_t        err;
 8     ngx_int_t        i;
 9     ngx_uint_t       one;
10 
11     one = 0;
12 
13     for ( ;; ) {
14         pid = waitpid(-1, &status, WNOHANG);
15 
16         if (pid == 0) {
17             return;
18         }
19 
20         if (pid == -1) {
21             err = ngx_errno;
22 
23             if (err == NGX_EINTR) {
24                 continue;
25             }
26 
27             if (err == NGX_ECHILD && one) {
28                 return;
29             }
30 
31             /*
32              * Solaris always calls the signal handler for each exited process
33              * despite waitpid() may be already called for this process.
34              *
35              * When several processes exit at the same time FreeBSD may
36              * erroneously call the signal handler for exited process
37              * despite waitpid() may be already called for this process.
38              */
39 
40             if (err == NGX_ECHILD) {
41                 ngx_log_error(NGX_LOG_INFO, ngx_cycle->log, err,
42                               "waitpid() failed");
43                 return;
44             }
45 
46             ngx_log_error(NGX_LOG_ALERT, ngx_cycle->log, err,
47                           "waitpid() failed");
48             return;
49         }
50 
51 
52         one = 1;
53         process = "unknown process";
54 
55         for (i = 0; i < ngx_last_process; i++) {
56             if (ngx_processes[i].pid == pid) {
57                 ngx_processes[i].status = status;
58                 ngx_processes[i].exited = 1;
59                 process = ngx_processes[i].name;
60                 break;
61             }
62         }
63 
64         if (WTERMSIG(status)) {
65 #ifdef WCOREDUMP
66             ngx_log_error(NGX_LOG_ALERT, ngx_cycle->log, 0,
67                           "%s %P exited on signal %d%s",
68                           process, pid, WTERMSIG(status),
69                           WCOREDUMP(status) ? " (core dumped)" : "");
70 #else
71             ngx_log_error(NGX_LOG_ALERT, ngx_cycle->log, 0,
72                           "%s %P exited on signal %d",
73                           process, pid, WTERMSIG(status));
74 #endif
75 
76         } else {
77             ngx_log_error(NGX_LOG_NOTICE, ngx_cycle->log, 0,
78                           "%s %P exited with code %d",
79                           process, pid, WEXITSTATUS(status));
80         }
81 
82         if (WEXITSTATUS(status) == 2 && ngx_processes[i].respawn) {
83             ngx_log_error(NGX_LOG_ALERT, ngx_cycle->log, 0,
84                           "%s %P exited with fatal code %d "
85                           "and cannot be respawned",
86                           process, pid, WEXITSTATUS(status));
87             ngx_processes[i].respawn = 0;
88         }
89 
90         ngx_unlock_mutexes(pid);
91     }
92 }

work进程收到信号并处理

work进程启动时,ngx_worker_process_cycle-->ngx_worker_process_init-->

1  if (ngx_add_channel_event(cycle, ngx_channel, NGX_READ_EVENT,
2                               ngx_channel_handler)
3         == NGX_ERROR)
4     {
5         /* fatal */
6         exit(2);
7     }

在ngx_channel_handler中设置了ngx_terminate为1;

而在work进程主循环中, 检测到该标志位1,则启动ngx_worker_process_exit函数。

1 for(;;) {
2     if (ngx_terminate) {
3         ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, "exiting");
4         ngx_worker_process_exit(cycle);
5     }
6 }

现象(偶发,未解决)

目前遇到2个非必现的现象:

1、执行一段时间后,stop后,master进程残留。master成为僵尸进程。

2、执行一段时间后,stop后,master进程和worker进程残留,不是僵尸进程,但是已经不占用端口了。

ps:目前还需要对现象进行进一步综合日志汇总分析,这两天一直又没出现。

疑问(已解答)

1、执行./nginx -s stop之后,master进程收到好几个SIGALERT信号,但是从代码上来看,其实只发送了一次信号。但是为什么master进程会收到N次信号后退出。

答:master进程收到stop之后会设置delay时间的定时器,定时时间到了master进程就会收到一个SIGALERT信号。定时时间到了还没回收所有的子进程,就会把delay*2,再设置定时时间。直到定时时间达到阈值或者所有子进程退出。

2、worker进程在销毁的时候在thread 模块中没出来,但是在master进程收到N次信号后,worker进程出来了,然后master进程接着退出。为什么worker进程的thread 模块的exit process没出来也退出了worker进程。

答:delay时间达到1000之后,会到ngx_signal_worker_processes函数,参数为SIGKILL,这个时候就不会通过write channel让子进程关闭了,就是直接kill子进程了。

1 if (delay > 1000) {
2                 ngx_signal_worker_processes(cycle, SIGKILL);
3             } else {
4                 ngx_signal_worker_processes(cycle,
5                                        ngx_signal_value(NGX_TERMINATE_SIGNAL));
6             }
 1 if (kill(ngx_processes[i].pid, signo) == -1) {
 2             err = ngx_errno;
 3             ngx_log_error(NGX_LOG_ALERT, cycle->log, err,
 4                           "kill(%P, %d) failed", ngx_processes[i].pid, signo);
 5 
 6             if (err == NGX_ESRCH) {
 7                 ngx_processes[i].exited = 1;
 8                 ngx_processes[i].exiting = 0;
 9                 ngx_reap = 1;
10             }
11 
12             continue;
13         }

杀掉子进程后,内核通知master,然后master waitpid和reap chaild回收子进程,然后设置live标识,最后master退出。

原文地址:https://www.cnblogs.com/micoblog/p/13366190.html