进程保活方法

         mysql-proxy源码中,使用了一种进程保活的方法。这种方法的基本原理是:当父进程完成基本的初始化后,创建子进程,由子进程继续后面的主体逻辑。而父进程wait子进程的退出状态。一旦发现子进程是由于收到信号而退出的,则重启子进程。

         这种方法的实现代码如下:

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>
#include <signal.h>
#include <string.h>
#include <sys/resource.h>


static void signal_forward(int sig) 
{ 
    signal(sig, SIG_IGN); /* we don't want to create a loop here */
    kill(0, sig); 
}


/**
 * keep the ourself alive 
 *
 * if we or the child gets a SIGTERM, we quit too
 * on everything else we restart it
 */
int chassis_unix_proc_keepalive() 
{
    int nprocs = 0;
    pid_t child_pid = -1;

    /* we ignore SIGINT and SIGTERM and just let it be forwarded to the child instead
     * as we want to collect its PID before we shutdown too 
     *
     * the child will have to set its own signal handlers for this
     */

    for (;;) 
    {
        /* try to start the children */
        while (nprocs < 1) 
        {
            pid_t pid = fork();

            if (pid == 0) 
            {
                /* child */
                printf("we are the child: %d
", getpid());
                return 0;
            } 
            else if (pid < 0) 
            {
                /* fork() failed */
                printf("fork() failed: %s[%d]
", strerror(errno), errno);
                return -1;
            } 
            else 
            {
                /* we are the angel, let's see what the child did */
                printf("[father]: we try to keep PID=%d alive
",   pid);

                /* forward a few signals that are sent to us to the child instead */
                signal(SIGINT, signal_forward);
                signal(SIGTERM, signal_forward);
                signal(SIGHUP, signal_forward);
                signal(SIGUSR1, signal_forward);
                signal(SIGUSR2, signal_forward);

                child_pid = pid;
                nprocs++;
            }
        }

        if (child_pid != -1) 
        {
            struct rusage rusage;
            int exit_status;
            pid_t exit_pid;

            printf("[father]: waiting for %d
",    child_pid);
#ifdef HAVE_WAIT4
            exit_pid = wait4(child_pid, &exit_status, 0, &rusage);
#else
            memset(&rusage, 0, sizeof(rusage)); /* make sure everything is zero'ed out */
            exit_pid = waitpid(child_pid, &exit_status, 0);
#endif
            printf("[father]: %d returned: %d
",   child_pid, exit_pid);

            if (exit_pid == child_pid) 
            {
                /* our child returned, let's see how it went */
                if (WIFEXITED(exit_status)) 
                {
                    printf("[father]: PID=%d exited normally with exit-code = %d (it used %ld kBytes max)
",
                            child_pid,
                            WEXITSTATUS(exit_status),
                            rusage.ru_maxrss / 1024);
                            
                    return 1;
                } 
                else if (WIFSIGNALED(exit_status)) 
                {
                    int time_towait = 60;
                    /* our child died on a signal
                     *
                     * log it and restart */

                    printf("[father]: PID=%d died on signal=%d (it used %ld kBytes max) ... waiting 1min before restart
",
                            child_pid,
                            WTERMSIG(exit_status),
                            rusage.ru_maxrss / 1024);

                    /**
                     * to make sure we don't loop as fast as we can, sleep a bit between 
                     * restarts
                     */
    
                    signal(SIGINT, SIG_DFL);
                    signal(SIGTERM, SIG_DFL);
                    signal(SIGHUP, SIG_DFL);
                    while (time_towait > 0) time_towait = sleep(time_towait);

                    nprocs--;
                    child_pid = -1;
                }
                else if (WIFSTOPPED(exit_status)) 
                {
                } 
                else 
                {
                    printf("[father]: should not reached
");
                }
            }
            else if (-1 == exit_pid) 
            {
                /* EINTR is ok, all others bad */
                if (EINTR != errno) 
                {
                    /* how can this happen ? */
                    printf("[father]: wait4(%d, ...) failed: %s[%d]
",
                        child_pid,
                        strerror(errno),
                        errno);

                    return -1;
                }
            } 
            else 
            {
                printf("[father]: should not reached
");
            }
        }
    }
}


int main()
{
    int ret = chassis_unix_proc_keepalive();

    if (ret > 0) 
    {
        exit(0);
    } 
    else if (ret < 0) 
    {
        exit(-1);
    } 
    else 
    {
        /* we are the child, go on */
    }

    for(;;)
    {
        printf("hello, world
");
        sleep(10);
    }
}

         这里的主体逻辑,就是每隔10秒打印一次” hello,world”。程序运行结果如下:

[father]: we try to keep PID=1824 alive
[father]: waiting for 1824
we are the child: 1824
hello, world
hello, world
...

(向子进程发送SIGKILL信号)
[father]: 1824 returned: 1824
[father]: PID=1824 died on signal=9 (it used 0 kBytes max) ... waiting 1min before restart

[father]: we try to keep PID=1853 alive
[father]: waiting for 1853
we are the child: 1853
hello, world
hello, world
hello, world
...

(向父进程发送SIGINT信号)
[father]: 1853 returned: 1853
[father]: PID=1853 died on signal=2 (it used 0 kBytes max) ... waiting 1min before restart

[father]: we try to keep PID=1870 alive
[father]: waiting for 1870
we are the child: 1870
hello, world
hello, world
hello, world
hello, world
...

 

原文地址:https://www.cnblogs.com/gqtcgq/p/7247036.html