怎样查找进程被谁频繁干掉

  首先需要弄清楚 进程被干掉的过程是怎样的?

一般使用kill -9 pid 来杀死进程 , 那么进程是怎样收到signal以及处理signal呢?

那目前有哪些现成的工具可以使用探测signal呢? 参考tapset文档

#! /usr/bin/env stap
# sigkill.stp
# Copyright (C) 2007 Red Hat, Inc., Eugene Teo <eteo@redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# /usr/share/systemtap/tapset/signal.stp:
# [...]
# probe signal.send = _signal.send.*
# {
#     sig=$sig
#     sig_name = _signal_name($sig)
#     sig_pid = task_pid(task)
#     pid_name = task_execname(task)
# [...]

probe signal.send {

  printf(" sig[%d] %s was sent to %s (pid:%d) by exec:%s -->:%d uid:%d ",
    sig, sig_name, pid_name, sig_pid, execname(), pid(), uid())

}

就可以探测到 相关信息

一下代码分析参考魅族团队对signal 的分析 

/**
 *  sys_kill - send a signal to a process
 *  @pid: the PID of the process
 *  @sig: signal to be sent
 */
SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
{
    struct siginfo info;

    info.si_signo = sig; //信号类型
    info.si_errno = 0;
    info.si_code = SI_USER;//用户信号send by kill or send by kernel (SI_KERNEL)
    info.si_pid = task_tgid_vnr(current); //进程号
    info.si_uid = from_kuid_munged(current_user_ns(), current_uid());//uid

    return kill_something_info(sig, &info, pid);
}

kill() 系统调用的功能是发送一个信号给线程组,只需要线程组挑出一个线程来响应处理信号。但是对于致命信号,线程组内所有进程都会被杀死,而不仅仅是处理信号的线程。

/*
 * kill_something_info() interprets pid in interesting ways just like kill(2).
 *
 * POSIX specifies that kill(-1,sig) is unspecified, but what we have
 * is probably wrong.  Should make it like BSD or SYSV.
 */

static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
{
    int ret;

    if (pid > 0) {// 1、pid>0, 发送信号给pid进程所在的线程组
        rcu_read_lock();
        ret = kill_pid_info(sig, info, find_vpid(pid));
        rcu_read_unlock();
        return ret;
    }

    /* -INT_MIN is undefined.  Exclude this case to avoid a UBSAN warning */
    if (pid == INT_MIN)
        return -ESRCH;

    read_lock(&tasklist_lock);
    if (pid != -1) {// 2、(pid <= 0) && (pid != -1), 发送信号给-pid进程所在进程组中的每一个线程组 ,如果pid == 0 则发送给当前current 进程
        ret = __kill_pgrp_info(sig, info,
                pid ? find_vpid(-pid) : task_pgrp(current));
    } else {
        int retval = 0, count = 0;
        struct task_struct * p;
        // 3、pid = -1, 发送信号给所有进程的进程组,除了pid=1和当前进程自己
        for_each_process(p) {
            if (task_pid_vnr(p) > 1 &&
                    !same_thread_group(p, current)) {
                int err = group_send_sig_info(sig, info, p);
                ++count;
                if (err != -EPERM)
                    retval = err;
            }
        }
        ret = count ? retval : -ESRCH;
    }
    read_unlock(&tasklist_lock);

    return ret;
}

   发送信号的核心函数 __send_signal(),函数的主要目的是把信号挂到信号的 pending 队列中去。pending 队列有两种:一种是进程组共享的 task_struct->signal->shared_pending,发送给线程组的信号会挂载到该队列;另一种是进程私有队列 task_struct->pending,发送给进程的信号会挂载到该队列

/*
 * send signal info to all the members of a group
 */
int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
    int ret;

    rcu_read_lock();
    ret = check_kill_permission(sig, info, p);
    rcu_read_unlock();

    if (!ret && sig)//参数group=ture,信号发送给线程组
        ret = do_send_sig_info(sig, info, p, true);

    return ret;
}

/*
送信号的核心函数 __send_signal(),函数的主要目的是把信号挂到信号的 pending 队列中去。
pending 队列有两种:一种是进程组共享的 task_struct->signal->shared_pending,发送给线程组的信号会挂载到该队列;
另一种是进程私有队列 task_struct->pending,发送给进程的信号会挂载到该队列。
*/

static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
            int group, int from_ancestor_ns)
{
    struct sigpending *pending;
    struct sigqueue *q;
    int override_rlimit;
    int ret = 0, result;

    assert_spin_locked(&t->sighand->siglock);

    result = TRACE_SIGNAL_IGNORED;
    //判断是否可以忽略信号
    if (!prepare_signal(sig, t,
            from_ancestor_ns || (info == SEND_SIG_FORCED)))
        goto ret;
//选择信号pending队列      选择 线程组共享队列(t->signal->shared_pending) or 进程私有队列(t->pending)
    pending = group ? &t->signal->shared_pending : &t->pending;
    /*
     * Short-circuit ignored signals and support queuing
     * exactly one non-rt signal, so that we can get more
     * detailed information about the cause of the signal.
     */
     //如果信号是常规信号(regular signal),且已经在pending队列中,则忽略重复信号;
    // 另外一方面也说明了,如果是实时信号,尽管信号重复,但还是要加入pending队列;
    // 实时信号的多个信号都需要能被接收到。
    result = TRACE_SIGNAL_ALREADY_PENDING;
    if (legacy_queue(pending, sig))
        goto ret;

    result = TRACE_SIGNAL_DELIVERED;
    /*
     * fast-pathed signals for kernel-internal things like SIGSTOP
     * or SIGKILL.
     */// 如果是强制信号(SEND_SIG_FORCED),不走挂载pending队列的流程,直接快速路径优先处理。
    // 主要处理 sigkill  sigstop 
    if (info == SEND_SIG_FORCED)
        goto out_set;

    /*
     * Real-time signals must be queued if sent by sigqueue, or
     * some other real-time mechanism.  It is implementation
     * defined whether kill() does so.  We attempt to do so, on
     * the principle of least surprise, but since kill is not
     * allowed to fail with EAGAIN when low on memory we just
     * make sure at least one signal gets delivered and don't
     * pass on the info struct.
     */
     //符合条件的特殊信号可以突破siganl pending队列的大小限制(rlimit)
    // 否则在队列满的情况下,丢弃信号
    if (sig < SIGRTMIN)// signal pending队列大小rlimit的值可以通过命令"ulimit -i"查看
        override_rlimit = (is_si_special(info) || info->si_code >= 0);
    else
        override_rlimit = 0;

    q = __sigqueue_alloc(sig, t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE,
        override_rlimit);// (6)没有ignore的信号,加入到pending队列中。
    if (q) {
        list_add_tail(&q->list, &pending->list);
        switch ((unsigned long) info) {
        case (unsigned long) SEND_SIG_NOINFO:
            q->info.si_signo = sig;
            q->info.si_errno = 0;
            q->info.si_code = SI_USER;
            q->info.si_pid = task_tgid_nr_ns(current,
                            task_active_pid_ns(t));
            q->info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
            break;
        case (unsigned long) SEND_SIG_PRIV:
            q->info.si_signo = sig;
            q->info.si_errno = 0;
            q->info.si_code = SI_KERNEL;
            q->info.si_pid = 0;
            q->info.si_uid = 0;
            break;
        default:
            copy_siginfo(&q->info, info);
            if (from_ancestor_ns)
                q->info.si_pid = 0;
            break;
        }

        userns_fixup_signal_uid(&q->info, t);

    } else if (!is_si_special(info)) {
        if (sig >= SIGRTMIN && info->si_code != SI_USER) {
            /*
             * Queue overflow, abort.  We may abort if the
             * signal was rt and sent by user using something
             * other than kill().
             */
            result = TRACE_SIGNAL_OVERFLOW_FAIL;
            ret = -EAGAIN;
            goto ret;
        } else {
            /*
             * This is a silent loss of information.  We still
             * send the signal, but the *info bits are lost.
             */
            result = TRACE_SIGNAL_LOSE_INFO;
        }
    }

out_set:
    signalfd_notify(t, sig);//将信号sig传递给正处于监听状态的signalfd
    sigaddset(&pending->signal, sig); //向信号集中加入信号sig
    complete_signal(sig, t, group);//完成wake信号过程
ret:
    trace_signal_generate(sig, info, t, group, result);
    return ret;
}

具体看下 prepare_signal 以及 complete_signal 的实现

/*
 * Handle magic process-wide effects of stop/continue signals. Unlike
 * the signal actions, these happen immediately at signal-generation
 * time regardless of blocking, ignoring, or handling.  This does the
 * actual continuing for SIGCONT, but not the actual stopping for stop
 * signals. The process stop is done as a signal action for SIG_DFL.
 *
 * Returns true if the signal should be actually delivered, otherwise
 * it should be dropped.
 */
static bool prepare_signal(int sig, struct task_struct *p, bool force)
{
    struct signal_struct *signal = p->signal;
    struct task_struct *t;
    sigset_t flush;

    if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) {
        if (!(signal->flags & SIGNAL_GROUP_EXIT))
            return sig == SIGKILL;// 在coredump 中 则 返回 
        //SIGNAL_GROUP_COREDUMP  1.2 如果进程正在处于SIGNAL_GROUP_COREDUMP,则当前信号被忽略
        /*
         * The process is in the middle of dying, nothing to do.
         */
    } else if (sig_kernel_stop(sig)) {
        /*
         * This is a stop signal.  Remove SIGCONT from all queues.
         */// 如果当前是stop信号,则移除线程组所有线程pending队列中的SIGCONT信号
        siginitset(&flush, sigmask(SIGCONT));
        flush_sigqueue_mask(&flush, &signal->shared_pending);
        for_each_thread(p, t)
            flush_sigqueue_mask(&flush, &t->pending);
    } else if (sig == SIGCONT) {
        unsigned int why;
        /*
         * Remove all stop signals from all queues, wake all threads.
         */// (1.3)如果当前是SIGCONT信号,则移除线程组所有线程pending队列中的stop信号,并唤醒stop进程
        siginitset(&flush, SIG_KERNEL_STOP_MASK);
        flush_sigqueue_mask(&flush, &signal->shared_pending);
        for_each_thread(p, t) {
            flush_sigqueue_mask(&flush, &t->pending);
            task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING);
            if (likely(!(t->ptrace & PT_SEIZED)))
                wake_up_state(t, __TASK_STOPPED);
            else
                ptrace_trap_notify(t);
        }

        /*
         * Notify the parent with CLD_CONTINUED if we were stopped.
         *
         * If we were in the middle of a group stop, we pretend it
         * was already finished, and then continued. Since SIGCHLD
         * doesn't queue we report only CLD_STOPPED, as if the next
         * CLD_CONTINUED was dropped.
         */
        why = 0;
        if (signal->flags & SIGNAL_STOP_STOPPED)
            why |= SIGNAL_CLD_CONTINUED;
        else if (signal->group_stop_count)
            why |= SIGNAL_CLD_STOPPED;

        if (why) {
            /*
             * The first thread which returns from do_signal_stop()
             * will take ->siglock, notice SIGNAL_CLD_MASK, and
             * notify its parent. See get_signal_to_deliver().
             */
            signal_set_stop_flags(signal, why | SIGNAL_STOP_CONTINUED);
            signal->group_stop_count = 0;
            signal->group_exit_code = 0;
        }
    }
    // 进一步判断信号是否会被忽略
    return !sig_ignored(p, sig, force);
}

static int sig_handler_ignored(void __user *handler, int sig)
{
    /* Is it explicitly or implicitly ignored? */
    // 如果信号操作函数是忽略SIG_IGN,或者操作函数是默认SIG_DFL但是默认动作是忽略
        // 默认动作是忽略的信号包括:
        // #define SIG_KERNEL_IGNORE_MASK (
        //    rt_sigmask(SIGCONT)   |  rt_sigmask(SIGCHLD)   | 
        //    rt_sigmask(SIGWINCH)  |  rt_sigmask(SIGURG)    )
        // 忽略这一类信号

    return handler == SIG_IGN ||
        (handler == SIG_DFL && sig_kernel_ignore(sig));
}

static int sig_task_ignored(struct task_struct *t, int sig, bool force)
{
    void __user *handler;

    handler = sig_handler(t, sig);// (1.4.2.1)提取信号的操作函数

    if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
        handler == SIG_DFL && !(force && sig_kernel_only(sig)))
        return 1;//信号被忽略

    return sig_handler_ignored(handler, sig);
}
static void complete_signal(int sig, struct task_struct *p, int group)
{
    struct signal_struct *signal = p->signal;
    struct task_struct *t;

    /*
     * Now find a thread we can wake up to take the signal off the queue.
     *
     * If the main thread wants the signal, it gets first crack.
     * Probably the least surprising to the average bear.
     */
     //判断当前线程是否符合响应信号的条件 也就是查找能处理该信号的线程 
    if (wants_signal(sig, p))
        t = p;
    else if (!group || thread_group_empty(p))
        /*
         * There is just one thread and it does not need to be woken.
         * It will dequeue unblocked signals before it runs again.
         */
         //如果信号是发给单线程的,且但进程又不能处理              ---- 直接返回
        return;
    else {
        /*
         * Otherwise try to find a suitable thread.
         */ // 递归查找适合的线程也就是 在当前线程组中挑出一个符合响应信号条件的线程
        t = signal->curr_target;
        while (!wants_signal(sig, t)) {
            t = next_thread(t);
            if (t == signal->curr_target)
                /*
                 * No thread needs to be woken.
                 * Any eligible threads will see
                 * the signal in the queue soon.
                 */
                return;
        }
        signal->curr_target = t;
    }

    /*
     * Found a killable thread.  If the signal will be fatal,
     * then start taking the whole group down immediately.
     */ 
     //找到一个能被杀掉的线程,如果这个信号是SIGKILL,则立刻干掉整个线程组
    if (sig_fatal(p, sig) &&
        !(signal->flags & SIGNAL_GROUP_EXIT) &&
        !sigismember(&t->real_blocked, sig) &&
        (sig == SIGKILL || !p->ptrace)) {
        /*
         * This signal will be fatal to the whole group.
         */
        if (!sig_kernel_coredump(sig)) { //信号将终结整个线程组
            /*
             * Start a group exit and wake everybody up.
             * This way we don't have other threads
             * running and doing things after a slower
             * thread has the fatal signal pending.
             */
            signal->flags = SIGNAL_GROUP_EXIT;
            signal->group_exit_code = sig;
            signal->group_stop_count = 0;
            t = p;
            do {
                task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
                sigaddset(&t->pending.signal, SIGKILL); //遍历整个线程组,全部结束
                signal_wake_up(t, 1);
            } while_each_thread(p, t); //遍历整个线程组,全部结束
            return;
        }
    }

    /*
     * The signal is already in the shared-pending queue.
     * Tell the chosen thread to wake up and dequeue it.
     */ //该信号处于pending队列里(即将要处理的)。唤醒已选中的目标线程,最后将该信号移出队列 然后处理。
    signal_wake_up(t, sig == SIGKILL);
    return;
}


/*
 * Tell a process that it has a new active signal..
 *
 * NOTE! we rely on the previous spin_lock to
 * lock interrupts for us! We can only be called with
 * "siglock" held, and the local interrupt must
 * have been disabled when that got acquired!
 *
 * No need to set need_resched since signal event passing
 * goes through ->blocked
 */
void signal_wake_up_state(struct task_struct *t, unsigned int state)
{
// 设置thread_info->flags中的TIF_SIGPENDING标志
    // ret_to_user()时会根据此标志来调用do_notify_resume()
    set_tsk_thread_flag(t, TIF_SIGPENDING);
    /*
     * TASK_WAKEKILL also means wake it up in the stopped/traced/killable
     * case. We don't check t->state here because there is a race with it
     * executing another processor and just now entering stopped state.
     * By using wake_up_state, we ensure the process will wake up and
     * handle its death signal.
     */
     //唤醒阻塞状态为TASK_INTERRUPTIBLE的信号响应线程
    if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))
        kick_process(t);
}

/*
 * Test if P wants to take SIG.  After we've checked all threads with this,
 * it's equivalent to finding no threads not blocking SIG.  Any threads not
 * blocking SIG were ruled out because they are not running and already
 * have pending signals.  Such threads will dequeue from the shared queue
 * as soon as they're available, so putting the signal on the shared queue
 * will be equivalent to sending it to one such thread.
 */
static inline int wants_signal(int sig, struct task_struct *p)
{
    if (sigismember(&p->blocked, sig))// 被阻塞
        return 0;
    if (p->flags & PF_EXITING)
        return 0;
    if (sig == SIGKILL)
        return 1;
    if (task_is_stopped_or_traced(p))
        return 0;
    return task_curr(p) || !signal_pending(p);
}

kill() | User Space
========================================================= | Kernel Space sys_kill() └→ kill_something_info() └→ kill_proc_info() └→ find_task_by_pid() └→ send_sig_info() └→ bad_signal() └→ handle_stop_signal() └→ ignored_signal() └→ deliver_signal() └→ send_signal() | └→ kmem_cache_alloc() | └→ sigaddset() └→ signal_wake_up()

什么时候会触发信号相应的处理函数呢?

为了尽快让信号得到处理,Linux把信号处理过程放置在进程从内核态返回到用户态前,也就是在 ret_from_sys_call 处:

// arch/i386/kernel/entry.S

ENTRY(ret_from_sys_call)
 ...
ret_with_reschedule:
 ...
 cmpl $0, sigpending(%ebx)  // 检查进程的sigpending成员是否等于1
 jne signal_return          // 如果是就跳转到 signal_return 处执行
restore_all:
 RESTORE_ALL

 ALIGN
signal_return:
 sti                             // 开启硬件中断
 testl $(VM_MASK),EFLAGS(%esp)
 movl %esp,%eax
 jne v86_signal_return
 xorl %edx,%edx
 call SYMBOL_NAME(do_signal)    // 调用do_signal()函数进行处理
 jmp restore_all

信号处理的核心函数就是 do_signal()

/*
 * Note that 'init' is a special process: it doesn't get signals it doesn't
 * want to handle. Thus you cannot kill init even with a SIGKILL even by
 * mistake.
 *
 * Note that we go through the signals twice: once to check the signals that
 * the kernel can handle, and then we build all the user-level signal handling
 * stack-frames in one go after that.
 */
static void do_signal(struct pt_regs *regs)
{
    unsigned long continue_addr = 0, restart_addr = 0;
    int retval = 0;
    int syscall = (int)regs->syscallno;
    struct ksignal ksig;

    /*
     * If we were from a system call, check for system call restarting...
     */
    if (syscall >= 0) {// (1)如果是 system call 被信号中断,判断是否需要重启 system call  
    //一般不会这样设置   重启syscal
        continue_addr = regs->pc;
        restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4);
        retval = regs->regs[0];

        /*
         * Avoid additional syscall restarting via ret_to_user.
         */
        regs->syscallno = ~0UL;

        /*
         * Prepare for system call restart. We do this here so that a
         * debugger will see the already changed PC.
         */
        switch (retval) {
        case -ERESTARTNOHAND:
        case -ERESTARTSYS:
        case -ERESTARTNOINTR:
        case -ERESTART_RESTARTBLOCK:
            regs->regs[0] = regs->orig_x0;
            regs->pc = restart_addr;
            break;
        }
    }

    /*
     * Get the signal to deliver. When running under ptrace, at this point
     * the debugger may change all of our registers.
     */// (2) 从线程的信号 pending 队列中取出信号,
    // 如果没有对应的用户自定义处理函数,则执行默认的内核态处理函数
    if (get_signal(&ksig)) {
        /*
         * Depending on the signal settings, we may need to revert the
         * decision to restart the system call, but skip this if a
         * debugger has chosen to restart at a different PC.
         */
        if (regs->pc == restart_addr &&
            (retval == -ERESTARTNOHAND ||
             retval == -ERESTART_RESTARTBLOCK ||
             (retval == -ERESTARTSYS &&
              !(ksig.ka.sa.sa_flags & SA_RESTART)))) {
            regs->regs[0] = -EINTR;
            regs->pc = continue_addr;
        }
        // (3)如果有对应的用户自定义处理函数,则执行用户态处理函数
        handle_signal(&ksig, regs);
        return;
    }

    /*
     * Handle restarting a different system call. As above, if a debugger
     * has chosen to restart at a different PC, ignore the restart.
     */// (4)重启被中断的system call
    if (syscall >= 0 && regs->pc == restart_addr) {
        if (retval == -ERESTART_RESTARTBLOCK)
            setup_restart_syscall(regs);
        user_rewind_single_step(current);
    }

    restore_saved_sigmask();
}

  如果用户没有注册信号处理函数,默认的内核处理函数在 get_signal() 函数中执行完了。对于用户有注册处理函数的信号,但是因为这些处理函数都是用户态的,所以内核使用了一个技巧:先构造堆栈,返回用户态去执行自定义信号处理函数,再返回内核态继续被信号打断的返回用户态的动作。

t get_signal(struct ksignal *ksig)
{
    struct sighand_struct *sighand = current->sighand;
    struct signal_struct *signal = current->signal;
    int signr;

    if (unlikely(current->task_works))
        task_work_run();//执行task work机制中的work task_work 会使用 CAS;以无锁的形式添加了一个链表元素。

    if (unlikely(uprobe_deny_signal()))
        return 0;

    /*
     * Do this once, we can't return to user-mode if freezing() == T.
     * do_signal_stop() and ptrace_stop() do freezable_schedule() and
     * thus do not need another check after return.
     */
     // 系统在suspend时会调用suspend_freeze_processes()来freeze线程
    // 实际上也是唤醒线程,让线程在ret_to_user时刻去freeze自己
    try_to_freeze();

relock:
    spin_lock_irq(&sighand->siglock);
    /*
     * Every stopped thread goes here after wakeup. Check to see if
     * we should notify the parent, prepare_signal(SIGCONT) encodes
     * the CLD_ si_code into SIGNAL_CLD_MASK bits.
     */
    if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
        int why;// (2.3)在子进程状态变化的情况下,发送SIGCHLD信号通知父进程

        if (signal->flags & SIGNAL_CLD_CONTINUED)
            why = CLD_CONTINUED;
        else
            why = CLD_STOPPED;

        signal->flags &= ~SIGNAL_CLD_MASK;

        spin_unlock_irq(&sighand->siglock);

        /*
         * Notify the parent that we're continuing.  This event is
         * always per-process and doesn't make whole lot of sense
         * for ptracers, who shouldn't consume the state via
         * wait(2) either, but, for backward compatibility, notify
         * the ptracer of the group leader too unless it's gonna be
         * a duplicate.
         */
        read_lock(&tasklist_lock);
        do_notify_parent_cldstop(current, false, why);

        if (ptrace_reparented(current->group_leader))
            do_notify_parent_cldstop(current->group_leader,
                        true, why);
        read_unlock(&tasklist_lock);

        goto relock;
    }

    for (;;) {
        struct k_sigaction *ka;

        if (unlikely(current->jobctl & JOBCTL_STOP_PENDING) &&
            do_signal_stop(0))
            goto relock;

        if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) {
            do_jobctl_trap();
            spin_unlock_irq(&sighand->siglock);
            goto relock;
        }
        // 从信号pending队列中,取出优先级最好的信号

        signr = dequeue_signal(current, &current->blocked, &ksig->info);

        if (!signr)
            break; /* will return 0 */

        if (unlikely(current->ptrace) && signr != SIGKILL) {
            signr = ptrace_signal(signr, &ksig->info);
            if (!signr)
                continue;
        }
//// 从信号处理数组sighand中,取出信号对应的处理函数
        ka = &sighand->action[signr-1];

        /* Trace actually delivered signals. */
        trace_signal_deliver(signr, &ksig->info, ka);
    // (2.6.1)信号处理的第1种方法:忽略
        if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
            continue;
        // (2.6.2)信号处理的第2种方法:调用用户态注册的处理函数
        // 获取到用户态的处理函数指针,返回调用handle_signal()来执行
        if (ka->sa.sa_handler != SIG_DFL) {
            /* Run the handler.  */
            ksig->ka = *ka;

            if (ka->sa.sa_flags & SA_ONESHOT)
                ka->sa.sa_handler = SIG_DFL;

            break; /* will return non-zero "signr" value */
        }

        /*
         * Now we are doing the default action for this signal.
         */// (2.6.3)信号处理的第3种方法:调用默认的内核态处理函数
         // (2.6.3.1)SIG_KERNEL_IGNORE_MASK信号的默认处理方法Ignore:忽略
        // #define SIG_KERNEL_IGNORE_MASK (
        //        rt_sigmask(SIGCONT)   |  rt_sigmask(SIGCHLD)   | 
        //         rt_sigmask(SIGWINCH)  |  rt_sigmask(SIGURG)    )
        if (sig_kernel_ignore(signr)) /* Default is nothing. */
            continue;

        /*
         * Global init gets no signals it doesn't want.
         * Container-init gets no signals it doesn't want from same
         * container.
         *
         * Note that if global/container-init sees a sig_kernel_only()
         * signal here, the signal must have been generated internally
         * or must have come from an ancestor namespace. In either
         * case, the signal cannot be dropped.
         */
        if (unlikely(signal->flags & SIGNAL_UNKILLABLE) &&
                !sig_kernel_only(signr))
            continue;
        // (2.6.3.2)SIG_KERNEL_STOP_MASK信号的默认处理方法Stop:do_signal_stop()
                // #define SIG_KERNEL_STOP_MASK (
                // rt_sigmask(SIGSTOP)   |  rt_sigmask(SIGTSTP)   | 
                // rt_sigmask(SIGTTIN)   |  rt_sigmask(SIGTTOU)   )

        if (sig_kernel_stop(signr)) {
            /*
             * The default action is to stop all threads in
             * the thread group.  The job control signals
             * do nothing in an orphaned pgrp, but SIGSTOP
             * always works.  Note that siglock needs to be
             * dropped during the call to is_orphaned_pgrp()
             * because of lock ordering with tasklist_lock.
             * This allows an intervening SIGCONT to be posted.
             * We need to check for that and bail out if necessary.
             */
            if (signr != SIGSTOP) {
                spin_unlock_irq(&sighand->siglock);

                /* signals can be posted during this window */
                    // 不是SIGSTOP信号,且是孤儿进程组
                if (is_current_pgrp_orphaned())
                    goto relock;

                spin_lock_irq(&sighand->siglock);
            }

            if (likely(do_signal_stop(ksig->info.si_signo))) {
                /* It released the siglock.  */
                goto relock;
            }

            /*
             * We didn't actually stop, due to a race
             * with SIGCONT or something like that.
             */
            continue;
        }

        spin_unlock_irq(&sighand->siglock);

        /*
         * Anything else is fatal, maybe with a core dump.
         */
        current->flags |= PF_SIGNALED;

        // (2.6.3.3)SIG_KERNEL_COREDUMP_MASK信号的默认处理方法Dump:do_coredump() & do_group_exit()
        // #define SIG_KERNEL_COREDUMP_MASK (
        //         rt_sigmask(SIGQUIT)   |  rt_sigmask(SIGILL)    | 
        //     rt_sigmask(SIGTRAP)   |  rt_sigmask(SIGABRT)   | 
        //         rt_sigmask(SIGFPE)    |  rt_sigmask(SIGSEGV)   | 
        //     rt_sigmask(SIGBUS)    |  rt_sigmask(SIGSYS)    | 
        //         rt_sigmask(SIGXCPU)   |  rt_sigmask(SIGXFSZ)   | 
        //     SIGEMT_MASK                       )

        if (sig_kernel_coredump(signr)) {
            if (print_fatal_signals)
                print_fatal_signal(ksig->info.si_signo);
            proc_coredump_connector(current);
            /*
             * If it was able to dump core, this kills all
             * other threads in the group and synchronizes with
             * their demise.  If we lost the race with another
             * thread getting here, it set group_exit_code
             * first and our do_group_exit call below will use
             * that value and ignore the one we pass it.
             */
            do_coredump(&ksig->info);
        }

        /*
         * Death signals, no core dump.
         */// (2.6.3.4)Death signals信号的默认处理方法Terminate:do_group_exit()
        do_group_exit(ksig->info.si_signo);
        /* NOTREACHED */
    }
    spin_unlock_irq(&sighand->siglock);

    ksig->sig = signr;
    return ksig->sig > 0;
}
static void collect_signal(int sig, struct sigpending *list, siginfo_t *info,
               bool *resched_timer)
{
    struct sigqueue *q, *first = NULL;

    /*
     * Collect the siginfo appropriate to this signal.  Check if
     * there is another siginfo for the same signal.
    */
    list_for_each_entry(q, &list->list, list) {
        if (q->info.si_signo == sig) {
            if (first)
                goto still_pending;
            first = q;
        }
    }//foreach pending 链表, 查找对应sig的siginfo

    sigdelset(&list->signal, sig);

    if (first) {
still_pending:
        list_del_init(&first->list); // siginfo 从list中del
        copy_siginfo(info, &first->info);//copy 发送信号保存的信息

        *resched_timer =
            (first->flags & SIGQUEUE_PREALLOC) &&
            (info->si_code == SI_TIMER) &&
            (info->si_sys_private);

        __sigqueue_free(first); //del 在发送信号生产的sigqueue 信息以及释放相关缓存
    } else {
        /*
         * Ok, it wasn't in the queue.  This must be
         * a fast-pathed signal or we must have been
         * out of queue space.  So zero out the info.
         */// 如果队列中没有相关信息,则是走的快速通道存储sig信息 并立即唤醒进程
        info->si_signo = sig;
        info->si_errno = 0;
        info->si_code = SI_USER;
        info->si_pid = 0;
        info->si_uid = 0;
    }
}

static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
            siginfo_t *info, bool *resched_timer)
{
    int sig = next_signal(pending, mask); //找到一个未被遮蔽的信号位

    if (sig)// copy 相关sig信息
        collect_signal(sig, pending, info, resched_timer);
    return sig;
}

/*
 * Dequeue a signal and return the element to the caller, which is
 * expected to free it.
 *
 * All callers have to hold the siglock.
 */
int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
{
    bool resched_timer = false;
    int signr;

    /* We only dequeue private signals from ourselves, we don't let
     * signalfd steal them
     */
    signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
    if (!signr) {//foreach 另个share 队列
        signr = __dequeue_signal(&tsk->signal->shared_pending,
                     mask, info, &resched_timer);
        /*
         * itimer signal ?
         *
         * itimers are process shared and we restart periodic
         * itimers in the signal delivery path to prevent DoS
         * attacks in the high resolution timer case. This is
         * compliant with the old way of self-restarting
         * itimers, as the SIGALRM is a legacy signal and only
         * queued once. Changing the restart behaviour to
         * restart the timer in the signal dequeue path is
         * reducing the timer noise on heavy loaded !highres
         * systems too.
         */
        if (unlikely(signr == SIGALRM)) {
            struct hrtimer *tmr = &tsk->signal->real_timer;

            if (!hrtimer_is_queued(tmr) &&
                tsk->signal->it_real_incr.tv64 != 0) {
                hrtimer_forward(tmr, tmr->base->get_time(),
                        tsk->signal->it_real_incr);
                hrtimer_restart(tmr);
            }
        }
    }
/*

void recalc_sigpending(void)
{
    if (!recalc_sigpending_tsk(current) && !freezing(current))
        clear_thread_flag(TIF_SIGPENDING);

}
static int recalc_sigpending_tsk(struct task_struct *t)
{
    if ((t->jobctl & JOBCTL_PENDING_MASK) ||
        PENDING(&t->pending, &t->blocked) ||
        PENDING(&t->signal->shared_pending, &t->blocked)) {
        set_tsk_thread_flag(t, TIF_SIGPENDING);
        return 1;
    }
     * We must never clear the flag in another thread, or in current
     * when it's possible the current syscall is returning -ERESTART*.
     * So we don't clear it here, and only callers who know they should do.
     
    return 0;
}
*/
    recalc_sigpending();//是否有信号等待处理总标志位也需要再计算 
    if (!signr)
        return 0;
    /*
#define SIG_KERNEL_STOP_MASK (
        rt_sigmask(SIGSTOP)   |  rt_sigmask(SIGTSTP)   | 
        rt_sigmask(SIGTTIN)   |  rt_sigmask(SIGTTOU)   )*/
    if (unlikely(sig_kernel_stop(signr))) {
        /*
         * Set a marker that we have dequeued a stop signal.  Our
         * caller might release the siglock and then the pending
         * stop signal it is about to process is no longer in the
         * pending bitmasks, but must still be cleared by a SIGCONT
         * (and overruled by a SIGKILL).  So those cases clear this
         * shared flag after we've set it.  Note that this flag may
         * remain set after the signal we return is ignored or
         * handled.  That doesn't matter because its only purpose
         * is to alert stop-signal processing code when another
         * processor has come along and cleared the flag.
         */
        current->jobctl |= JOBCTL_STOP_DEQUEUED;
    }
    if (resched_timer) {
        /*
         * Release the siglock to ensure proper locking order
         * of timer locks outside of siglocks.  Note, we leave
         * irqs disabled here, since the posix-timers code is
         * about to disable them again anyway.
         */
        spin_unlock(&tsk->sighand->siglock);
        do_schedule_next_timer(info);
        spin_lock(&tsk->sighand->siglock);
    }
    return signr;
}

  由于信号处理程序是由用户提供的,所以信号处理程序的代码是在用户态的。而从系统调用返回到用户态前还是属于内核态,CPU是禁止内核态执行用户态代码的,那么怎么办?

答案先返回到用户态执行信号处理程序,执行完信号处理程序后再返回到内核态,再在内核态完成收尾工作

  从内核态返回到用户态时,CPU要从内核栈中找到返回到用户态的地址(就是调用系统调用的下一条代码指令地址),Linux为了先让信号处理程序执行,所以就需要把这个返回地址修改为信号处理程序的入口,这样当从系统调用返回到用户态时,就可以执行信号处理程序了。

所以,handle_signal() 调用了 setup_frame() 函数来构建这个过程的运行环境-->其实就是修改内核栈和用户栈相应的数据来完成

handle_signal() 函数中的具体实现。

  • arch/arm64/kernel/signal.c:
  • -> ret_to_user() -> do_notify_resume() -> do_signal() -> handle_signal()
static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
    struct thread_info *thread = current_thread_info();
    struct task_struct *tsk = current;
    sigset_t *oldset = sigmask_to_save();
    int usig = ksig->sig;
    int ret;

    /*
     * translate the signal
     */
    if (usig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap)
        usig = thread->exec_domain->signal_invmap[usig];

    /*
     * Set up the stack frame
     */
    // (1)构造返回堆栈,将用户态返回地址替换成用户注册的信号处理函数&ksig->ka
    if (is_compat_task()) {
        if (ksig->ka.sa.sa_flags & SA_SIGINFO)
            ret = compat_setup_rt_frame(usig, ksig, oldset, regs);
        else
            ret = compat_setup_frame(usig, ksig, oldset, regs);
    } else {
        ret = setup_rt_frame(usig, ksig, oldset, regs);
    }

    /*
     * Check that the resulting registers are actually sane.
     */
    ret |= !valid_user_regs(&regs->user_regs);

    /*
     * Fast forward the stepping logic so we step into the signal
     * handler.
     */
    if (!ret)
        user_fastforward_single_step(tsk);

    signal_setup_done(ret, ksig, 0);
}
static int __setup_rt_frame(int sig, struct ksignal *ksig,
                sigset_t *set, struct pt_regs *regs)
{
    struct rt_sigframe __user *frame;
    void __user *restorer;
    int err = 0;
    void __user *fpstate = NULL;

    frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);

    if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
        return -EFAULT;

    put_user_try {
        put_user_ex(sig, &frame->sig);
        put_user_ex(&frame->info, &frame->pinfo);
        put_user_ex(&frame->uc, &frame->puc);

        /* Create the ucontext.  */
        if (cpu_has_xsave)
            put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
        else
            put_user_ex(0, &frame->uc.uc_flags);
        put_user_ex(0, &frame->uc.uc_link);
        save_altstack_ex(&frame->uc.uc_stack, regs->sp);

        /* Set up to return from userspace. 修改信号程序程序返回后要执行代码的地址 */
        restorer = current->mm->context.vdso +
            vdso_image_32.sym___kernel_rt_sigreturn;
        if (ksig->ka.sa.sa_flags & SA_RESTORER)
            restorer = ksig->ka.sa.sa_restorer;
        put_user_ex(restorer, &frame->pretcode);

        /*
         * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
         *
         * WE DO NOT USE IT ANY MORE! It's only left here for historical
         * reasons and because gdb uses it as a signature to notice
         * signal handler stack frames.
         */
        put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
    } put_user_catch(err);
    
    // 记住这些copy to user 的信息, 在处理signal的时候可以根据uc 等信息回溯进程的调用堆栈
    err |= copy_siginfo_to_user(&frame->info, &ksig->info);
    err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
                regs, set->sig[0]);
    err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));

    if (err)
        return -EFAULT;

    /* Set up registers for signal handler */
    regs->sp = (unsigned long)frame;
    regs->ip = (unsigned long)ksig->ka.sa.sa_handler; // regs是内核栈中保存的寄存器集合  ip指向下一条指令执行的代码地址
    regs->ax = (unsigned long)sig;
    regs->dx = (unsigned long)&frame->info;
    regs->cx = (unsigned long)&frame->uc;
    
    regs->ds = __USER_DS;
    regs->es = __USER_DS;
    regs->ss = __USER_DS;
    regs->cs = __USER_CS;

    return 0;
}

   图中的 eip 就是内核态返回到用户态后开始执行的第一条指令地址,所以把 eip 改成信号处理程序的地址就可以在内核态返回到用户态的时候自动执行信号处理程序了,

   目前内核态返回到用户态时自动执行信号处理程序了,但是当信号处理程序执行完怎么返回到内核态呢?

  Linux的做法就是在用户态栈空间构建一个 Frame(帧),构建这个帧的目的就是为了执行完信号处理程序后返回到内核态,并恢复原来内核栈的内容。返回到内核态的方式是调用一个名为 sigreturn() 系统调用,然后再 sigreturn() 中恢复原来内核栈的内容。

关于使用如下结构在handle signal 过程中回溯进程调用堆栈可以参考之前的文章: 调试无发coredump的思路

/*
 * Do a signal return; undo the signal stack. These are aligned to 128-bit.
 */
struct rt_sigframe {
    struct siginfo info;
    struct ucontext uc;
    u64 fp;
    u64 lr;
};
http代理服务器(3-4-7层代理)-网络事件库公共组件、内核kernel驱动 摄像头驱动 tcpip网络协议栈、netfilter、bridge 好像看过!!!! 但行好事 莫问前程 --身高体重180的胖子
原文地址:https://www.cnblogs.com/codestack/p/15088759.html