SD卡中的completion实现

 Linux系统提供了一种比信号量更好的同步机制，即completion，它用于一个执行单元等待另一个执行单元执行完某事。
 Linux系统中与completion相关的操作主要有以下4种:
      (1) 定义completion
          struct completion my_completion;
      (2) 初始化completion
          init_completion(&my_completion);
         对my_completion的定义和初始化可以通过如下快捷方式实现
          DECLEARE_COMPLETION(my_completion);
      (3) 等待completion
          void wait_for_completion(struct completion *c);
      (4) 唤醒completion
          void complete(struct completion *c);
          void complete_all(struct completion *c);
         前者只唤醒一个等待的执行单元，后者唤醒所有等待同一completion的执行单元。

二、作用：

虽然信号量可以用于实现同步，但往往可能会出现一些不好的结果。例如：当进程A分配了一个临时信号量变量，把它初始化为关闭的MUTEX，
并把其地址传递给进程B,然后在A之上调用down()，进程A打算一旦被唤醒就撤销给信号量。随后，运行在不同CPU上的进程B在同一个信号量
上调用up()。然而，up()和down()的目前实现还允许这两个函数在同一个信号量上并发。因此，进程A可以被唤醒并撤销临时信号量，而进程B
还在运行up()函数。结果up()可能试图访问一个不存在的数据结构。这样就会出现错误。为了防止发生这种错误就专门设计了completion机制
专门用于同步。信号量在多cpu的时候效果不好。

SD卡中表现为：

void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq)
{
    DECLARE_COMPLETION_ONSTACK(complete);//创建一个completion结构体放在内核堆栈中，如果不加ONSTACK就是静态申请，会存放在全局变量区
    mrq->done_data = &complete;
    mrq->done = mmc_wait_done;
    mmc_start_request(host, mrq);
    wait_for_completion(&complete);
}
static void mmc_wait_done(struct mmc_request *mrq)
{
    complete(mrq->done_data);
//mrq->done_data = &complete
}

那么为什么要这样做呢？

  linux/include/linux/completion.h
  13struct completion {
  14        unsigned int done;
  //指示等待的事件是否完成。初始化时为0。如果为0，则表示等待的事件未完成。大于0表示等待的事件已经完成。
  15        wait_queue_head_t wait;
            //一个等待队列wait
  16};

static inline void init_completion(struct completion *x)
{
        x->done = 0;
        init_waitqueue_head(&x->wait);//初始化一个新的等待队列，把当前进程添加到等待队列中去，还要确定是信号唤醒还是
事件唤醒，每次都创建一个新的等待队列，是不是很浪费啊？
}

linux/kernel/sched.c
void __sched wait_for_completion(struct completion *x)
{
        wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
//事件唤醒
}
static long __sched wait_for_common(struct completion *x, long timeout, int state)
{
        might_sleep();//当前函数可以睡眠
        spin_lock_irq(&x->wait.lock);
        timeout = do_wait_for_common(x, timeout, state);//最大睡眠时间，如果提前唤醒，返回提前时间
        spin_unlock_irq(&x->wait.lock);
        return timeout;
}
static inline long __sched do_wait_for_common(struct completion *x, long timeout, int state)
{
        if (!x->done) {  //初始为0，成功就为1
                DECLARE_WAITQUEUE(wait, current);//把当前进程添加到wait等待队列中，就是创建一个新的结构体放到链表中
                __add_wait_queue_tail_exclusive(&x->wait, &wait);
                do {
                        if (signal_pending_state(state, current)) {
                                timeout = -ERESTARTSYS;//如果可以被信号唤醒，这是返回值，显然这里是不可以的
                                break;
                        }
                        __set_current_state(state);//设置进程状态，这里表示只有wake_up()可以唤醒，信号不能唤醒
                        //任务只有在TASK_RUNNING状态下才能被内核调度，所以唤醒后就可以被调度了
                        spin_unlock_irq(&x->wait.lock);//释放自旋锁，进程进入睡眠状态
                        timeout = schedule_timeout(timeout);//在有限的时间内调度执行其他进程，想要返回必须改变进程的状态为TASK_RUNNING
                        spin_lock_irq(&x->wait.lock);
                } while (!x->done && timeout);//知道x->done为1，或者时间耗尽
                __remove_wait_queue(&x->wait, &wait);//删掉等待队列中的该进程
                if (!x->done)
                        return timeout;
        }
        x->done--;
        return timeout ?: 1;
}

void complete(struct completion *x)
{
        unsigned long flags;
        spin_lock_irqsave(&x->wait.lock, flags);
        x->done++;//成功执行完，加1
        __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);//唤醒等待队列中的任务，就是把某个进程设置为TASK_RUNNING状态
        spin_unlock_irqrestore(&x->wait.lock, flags);
}
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,//mode表示是那个进程需要唤醒
                        int nr_exclusive, int wake_flags, void *key)//nr_exclusive需要唤醒的个数
{
        wait_queue_t *curr, *next;

        list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
                unsigned flags = curr->flags;

                if (curr->func(curr, mode, wake_flags, key) &&
                                (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
                        break;
        }
}
curr->func执行的函数如下：
int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
                          void *key)
{
        return try_to_wake_up(curr->private, mode, wake_flags);
}

static int try_to_wake_up(struct task_struct *p, unsigned int state,
                          int wake_flags)
{
        int cpu, orig_cpu, this_cpu, success = 0;
        unsigned long flags;
        unsigned long en_flags = ENQUEUE_WAKEUP;
        struct rq *rq;
        this_cpu = get_cpu();
        smp_wmb();//跟多核还有关系
        rq = task_rq_lock(p, &flags);
        if (!(p->state & state))
                goto out;

        if (p->se.on_rq)
                goto out_running;

        cpu = task_cpu(p);
        orig_cpu = cpu;

#ifdef CONFIG_SMP
        if (unlikely(task_running(rq, p)))
                goto out_activate;

        /*
         * In order to handle concurrent wakeups and release the rq->lock
         * we put the task in TASK_WAKING state.
         *
         * First fix up the nr_uninterruptible count:
         */
        if (task_contributes_to_load(p)) {
                if (likely(cpu_online(orig_cpu))
rq->nr_uninterruptible--;
                else
                        this_rq()->nr_uninterruptible--;
        }
        p->state = TASK_WAKING;

        if (p->sched_class->task_waking) {
                p->sched_class->task_waking(rq, p);
                en_flags |= ENQUEUE_WAKING;
        }

        cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
        if (cpu != orig_cpu)
                set_task_cpu(p, cpu);
        __task_rq_unlock(rq);

        rq = cpu_rq(cpu);
        raw_spin_lock(&rq->lock);

        /*
         * We migrated the task without holding either rq->lock, however
         * since the task is not on the task list itself, nobody else
         * will try and migrate the task, hence the rq should match the
         * cpu we just moved it to.
         */
        WARN_ON(task_cpu(p) != cpu);
        WARN_ON(p->state != TASK_WAKING);

#ifdef CONFIG_SCHEDSTATS
        schedstat_inc(rq, ttwu_count);
        if (cpu == this_cpu)
                schedstat_inc(rq, ttwu_local);
        else {
                struct sched_domain *sd;
                for_each_domain(this_cpu, sd) {
                        if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 schedstat_inc(sd, ttwu_wake_remote);
                                break;
                        }
                }
        }
#endif /* CONFIG_SCHEDSTATS */

out_activate:
#endif /* CONFIG_SMP */
        schedstat_inc(p, se.statistics.nr_wakeups);
        if (wake_flags & WF_SYNC)
                schedstat_inc(p, se.statistics.nr_wakeups_sync);
        if (orig_cpu != cpu)
                schedstat_inc(p, se.statistics.nr_wakeups_migrate);
        if (cpu == this_cpu)
                schedstat_inc(p, se.statistics.nr_wakeups_local);
        else
                schedstat_inc(p, se.statistics.nr_wakeups_remote);
        activate_task(rq, p, en_flags);
        success = 1;

out_running:
        trace_sched_wakeup(p, success);
        check_preempt_curr(rq, p, wake_flags);

        p->state = TASK_RUNNING;//好吧 我只是为了看到这个。。。。
#ifdef CONFIG_SMP
        if (p->sched_class->task_woken)
                p->sched_class->task_woken(rq, p);

        if (unlikely(rq->idle_stamp)) {
                u64 delta = rq->clock - rq->idle_stamp;
                u64 max = 2*sysctl_sched_migration_cost;

                if (delta > max)
                        rq->avg_idle = max;
                else
 update_avg(&rq->avg_idle, delta);
                rq->idle_stamp = 0;
        }
#endif
out:
        task_rq_unlock(rq, &flags);
        put_cpu();

        return success;
}

唤醒进程就是把进程状态改变为TASK_RUNNING，也就是加入内核调度队列。

现在说说为什么sd卡要用completion同步机制：

首先，向sd卡发命令必须等待前个命令完成，就是不能同时执行多条命令，本来对于一个cpu的时候是没有问题的，因为有请求队列。对sd卡的操作是一个接着一个的，但是在多个处理器的情况下，情况就不一样了，所以为了防止出错，使用同步机制，而信号量不适合多cpu的场合，所以就选择了completion同步机制。