struct pid & pid_namespace

struct pid & pid_namespace

alloc_pid() & task_struct插入pid struct tasks[] hash list

fork进程/线程时,copy_process()会给此线程alloc一个struct pid结构体。当是fork进程/线程时,copy_process()的pid参数将是null,所以会call alloc_pid()

static __latent_entropy struct task_struct *copy_process(
                    unsigned long clone_flags,
                    unsigned long stack_start,
                    unsigned long stack_size,
                    int __user *child_tidptr,
                    struct pid *pid,
                    int trace,
                    unsigned long tls,
                    int node)
{
    if (pid != &init_struct_pid) {
        pid = alloc_pid(p->nsproxy->pid_ns_for_children);
        if (IS_ERR(pid)) {
            retval = PTR_ERR(pid);
            goto bad_fork_cleanup_thread;
        }
    }

看下alloc_pid干了些啥。首先它会alloc一个pid struct,然后设置这个pid struct:

调用idr_alloc_cyclic(),这个函数的返回值就是当前fork线程的pid;

设置pid里numbers成员(nr和ns)

ns->level次数的for循环,这个对于没有开CONFIG_PID_NS时,pid namespace将只有一个level,所以ns->level都会是0,所以此时只有有一次循环,此时将只会设置pid numbers[0],0 index即是全局的pid,在整个系统中唯一;

如果开启了CONFIG_PID_NS,此时ns->level将有可能不是0,此时pid->members[0]是全局的upid,其它pid->numbers[1]则是numbers[0]的child namespace,pid->numbers[2]等依次类推。
alloc_pid()的参数ns在没有开启CONFIG_PID_NS的情况下,都是一样的,即指向init_pid_ns

设置完pid struct后,调用idr_replace将此pid struct和alloc的pid作为一对mapping值保存起来:

struct pid *alloc_pid(struct pid_namespace *ns)
{
    struct pid *pid;
    enum pid_type type;
    int i, nr;
    struct pid_namespace *tmp;
    struct upid *upid;
    int retval = -ENOMEM;

    pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
    if (!pid)
        return ERR_PTR(retval);

    tmp = ns;
    pid->level = ns->level;

    for (i = ns->level; i >= 0; i--) {
        int pid_min = 1;

        idr_preload(GFP_KERNEL);
        spin_lock_irq(&pidmap_lock);

        /*
         * init really needs pid 1, but after reaching the maximum
         * wrap back to RESERVED_PIDS
         */
        if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
            pid_min = RESERVED_PIDS;

        /*
         * Store a null pointer so find_pid_ns does not find
         * a partially initialized PID (see below).
         */
        nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
                      pid_max, GFP_ATOMIC);
        spin_unlock_irq(&pidmap_lock);
        idr_preload_end();

        if (nr < 0) {
            retval = (nr == -ENOSPC) ? -EAGAIN : nr;
            goto out_free;
        }

        pid->numbers[i].nr = nr;
        pid->numbers[i].ns = tmp;
        tmp = tmp->parent;
    }

    if (unlikely(is_child_reaper(pid))) {
        if (pid_ns_prepare_proc(ns))
            goto out_free;
    }

    get_pid_ns(ns);
    atomic_set(&pid->count, 1);
    for (type = 0; type < PIDTYPE_MAX; ++type)
        INIT_HLIST_HEAD(&pid->tasks[type]);

    upid = pid->numbers + ns->level;
    spin_lock_irq(&pidmap_lock);
    if (!(ns->pid_allocated & PIDNS_ADDING))
        goto out_unlock;
    for ( ; upid >= pid->numbers; --upid) {
        /* Make the PID visible to find_pid_ns. */
        idr_replace(&upid->ns->idr, pid, upid->nr);  
        upid->ns->pid_allocated++;
    }
    spin_unlock_irq(&pidmap_lock);

    return pid;

alloc_pid()后,会设置当前fork的task_struct的pid成员,此pid成员就是当前fork出的线程的pid,这个pid数值即是上面alloc_pid()里分配的pid结构体里的numbers[0].nr,即系统全局的线程的pid,具有唯一性

static inline pid_t pid_nr(struct pid *pid)
{
    pid_t nr = 0;
    if (pid)
        nr = pid->numbers[0].nr;
    return nr;
}

接下来则会将当前fork的task_struct和上面alloc的pid struct关联起来。如果当前fork的线程是进程的主线程(thread group leader),则会将主线程链接到上面alloc给它的struct pid的tasks[PIDTYPE_PID] & tasks[PIDTYPE_TGID] hash list上,以及将它链接到其父进程所链接到的tasks[PGID]和tasks[PIDTYPE_SID] hash list上;

如果不是主线程,则只会将此task_struct插入上面给它alloc的pid struct的tasks[PIDTYPE_PID] hash list。

copy_process()
    init_task_pid_links(p);
    if (likely(p->pid)) {
        ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);

        init_task_pid(p, PIDTYPE_PID, pid);
        if (thread_group_leader(p)) {
            init_task_pid(p, PIDTYPE_TGID, pid);
            init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
            init_task_pid(p, PIDTYPE_SID, task_session(current));

            if (is_child_reaper(pid)) {
                ns_of_pid(pid)->child_reaper = p;
                p->signal->flags |= SIGNAL_UNKILLABLE;
            }
            p->signal->shared_pending.signal = delayed.signal;
            p->signal->tty = tty_kref_get(current->signal->tty);
            /*
             * Inherit has_child_subreaper flag under the same
             * tasklist_lock with adding child to the process tree
             * for propagate_has_child_subreaper optimization.
             */
            p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
                             p->real_parent->signal->is_child_subreaper;
            list_add_tail(&p->sibling, &p->real_parent->children);
            list_add_tail_rcu(&p->tasks, &init_task.tasks);
            attach_pid(p, PIDTYPE_TGID);
            attach_pid(p, PIDTYPE_PGID);
            attach_pid(p, PIDTYPE_SID);
            __this_cpu_inc(process_counts);
        } else {
            current->signal->nr_threads++;
            atomic_inc(&current->signal->live);
            atomic_inc(&current->signal->sigcnt);
            task_join_group_stop(p);
            list_add_tail_rcu(&p->thread_group,
                      &p->group_leader->thread_group);
            list_add_tail_rcu(&p->thread_node,
                      &p->signal->thread_head);
        }
        attach_pid(p, PIDTYPE_PID);
        nr_threads++;
    }

 setpgid创建进程组或者迁移某个进程到另外一个进程组

1. setpgid创建新的进程组

此时setpgid(pid_t pid, pid_t pgid) pid参数和pgid参数要相等,并且此pid要是thread group leader,比如user space调用setpgid(getpid(), getpid())或者setpgid(0, 0)或者setpgid(getpid(), 0)。此后此进程将脱离其父进程所在的进程组,自己创建了一个独立的进程组。

2. setpgid()迁移一个进程到另外的进程组

此时pgid参数不能为0,setpgid()的pgid参数是另外一个进程组的组长进程的pid,同时要迁移的进程所在的进程组和要迁往的进程组要在同一个session里,此后要迁移的进程将迁入目标进程组,其task_struct将链接到目标进程组组长进程的pid struct的tasks[PIDTYPE_PGID] hash list

SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
{
    struct task_struct *p;
    struct task_struct *group_leader = current->group_leader;
    struct pid *pgrp;
    int err;

    if (!pid)
        pid = task_pid_vnr(group_leader);
    if (!pgid)
        pgid = pid;
    if (pgid < 0)
        return -EINVAL;
    rcu_read_lock();

    /* From this point forward we keep holding onto the tasklist lock
     * so that our parent does not change from under us. -DaveM
     */
    write_lock_irq(&tasklist_lock);

    err = -ESRCH;
    p = find_task_by_vpid(pid);
    if (!p)
        goto out;

    err = -EINVAL;
    if (!thread_group_leader(p))
        goto out;

    if (same_thread_group(p->real_parent, group_leader)) {
        err = -EPERM;
        if (task_session(p) != task_session(group_leader))
            goto out;
        err = -EACCES;
        if (!(p->flags & PF_FORKNOEXEC))
            goto out;
    } else {
        err = -ESRCH;
        if (p != group_leader)
            goto out;
    }

    err = -EPERM;
    if (p->signal->leader)
        goto out;

    pgrp = task_pid(p);
    if (pgid != pid) {
        struct task_struct *g;

        pgrp = find_vpid(pgid);
        g = pid_task(pgrp, PIDTYPE_PGID);
        if (!g || task_session(g) != task_session(group_leader))
            goto out;
    }

    err = security_task_setpgid(p, pgid);
    if (err)
        goto out;

    if (task_pgrp(p) != pgrp)
        change_pid(p, PIDTYPE_PGID, pgrp);

    err = 0;
out:
    /* All paths lead to here, thus we are safe. -DaveM */
    write_unlock_irq(&tasklist_lock);
    rcu_read_unlock();
    return err;
}

进程的task_struct所插入的pid struct tasks[] hash list

1. 如果进程没有调用setpgid系统调用,并且其父进程也没有执行此系统调用,则其链接关系如下图,task_struct通过其pid_links[PIDTYPE_PID]/[PIDTYPE_TGID]插入它自己的struct pid的tasks[PIDTYPE_PID]/[PIDTYPE_TGID] hash list,其它pid_links[PIDTYPE_PGID]/[PIDTYPE_SID]应该是插入了init_struct_pid的tasks[PIDTYPE_PGID]/[PIDTYPE_SID] hash list:

2. 如果进程有执行setpgid创建了进程组,则pid_links[]的链接关系如下图。

Struct pid是某个进程fork时分配的,后面通过setpgid(0,0)创建一个进程组,首先将自己的task_struct通过pid_links[PIDTYPE_PGID]链接到自己pid struct的tasks[PIDTYPE_PGID] hash list上。后面此进程创建子进程时子进程也都会类似这样将其task_struct链入此pid struct的tasks[PIDTYPE_PGID] hash list上,这样同一个进程组中的所有进程将会被链接到组长进程的pid struct的tasks[PIDTYPE_PGID] hash list上:

                                (1)进程组struct pid tasks[] hash list链接关系

* 进程组中的成员进程是以进程的主线程的task_struct/struct pid来表示

 从上述两图可以看出,对于主线程,线程自己的pid struct里的tasks[PIDTYPE_PID]/[PIDTYPE_TGID] hash list长度只有1,即只有一个list node,即为自己本身的task_struct.pid_links[PIDTYPE_PID]/[PIDTYPE_TGID]。

3. 非主线程的struct pid.tasks[] hash list链接关系

 

如果是非主线程,则只会用到一个hash list,即tasks[PIDTYPE_PID] hash list,并且此hash list也只有一个node,即此非主线程的task_struct.pid_links[PIDTYPE_PID],同事没有和所在进程内的其它线程以及其它进程有链接关系,所以非主线程的struct pid.tasks[]链接关系很简单

注意:

1. 不管是主线程还是非主线程,如果属于user space的,均会给它alloc一个struct pid;

2. 不管是主线程和非主线程,因为struct pid.task[PIDTYPE_PID] hash list上只有一个node,所以find_task_by_vpid()在tasks[PIDTYPE_PID] hash list上取第一个node就得到了pid_t对应的task_struct

 CONFIG_PID_NS开启条件下的多级pid_namespace

上述level 1是level 2的parent;level 0是level 1的parent.

一个level 2的线程fork时,会从level 2开始alloc pid,一直到level 0,所以这里它会alloc 3个pid,即会alloc3个pid namespace的pid number。

level 0是全局的,在通过pid_nr()设置task_struct pid_t成员时,其就是取的level 0 pid_namespace的pid number。

常用pid struct相关API

  • static inline pid_t task_pid_vnr(struct task_struct *tsk):根据task_struct得到对应的pid

  • struct task_struct *find_task_by_vpid(pid_t vnr):根据pid num得到对应的task_struct

原文地址:https://www.cnblogs.com/aspirs/p/15753834.html