调度程序schedule()注释

调度时机：
　　1、进程终止、睡眠，这些通常是进程自身行为，当然也有运行异常；
　　2、时间片用完时，而时间片更新是在时钟中断驱动下完成的；
　　3、设备驱动程序；
　　4、进程从异常、中断、及系统调用返回的时候会进行need_resched()检测，会触发调度。
问题：多cpu下，时钟中断如何处理的？时钟中断应该是只被一个cpu捕获并处理，那么其他cpu靠什么来驱动时间片更新？怎么触发调度？
调度具体实现
  1 asmlinkage void schedule(void)
  2 {
  3     struct schedule_data * sched_data;
  4     struct task_struct *prev, *next, *p;
  5     struct list_head *tmp;
  6     int this_cpu, c;
  7 
  8 
  9     spin_lock_prefetch(&runqueue_lock);                                    //锁运行队列
 10 
 11     if (!current->active_mm) BUG();                                        //内核线程没有mm空间，但其active_mm会借用当前进程
 12 need_resched_back:                                                        //的mm，保持与用户线程处理的统一性
 13     prev = current;                                                        //当前进程即将被调度出去，current也比较巧妙，task
 14                                                                         //_struct与内核堆栈共用一个8kB的union体，其中task
 15                                                                         //_struct占用低位的1kB左右，堆栈占用高位约7KB空间
 16                                                                         //通过对堆栈指针esp&0xffffe0000即可
 17     this_cpu = prev->processor;                                            //取得进程运行的cpu
 18 
 19     if (unlikely(in_interrupt())) {                                        //unlikely用于gcc>=2.96之后的编译优化，表示if内代
 20                                                                         //码运行的可能性比较低，这样编译器就可以将else里
 21                                                                         //面的代码提前，cpu在进行指令预取方面有性能提高
 22                                                                         //反之，likely则是if内代码运行可能性高
 23         printk("Scheduling in interrupt\n");                            //如果在中断中进行调度，有问题
 24         BUG();
 25     }
 26 
 27     release_kernel_lock(prev, this_cpu);                                //如果prev占用了全局内核锁，释放；如果当前cpu占
 28                                                                         //用了全局中断锁，释放；开当前cpu中断线
 29 
 30     /*
 31     * 'sched_data' is protected by the fact that we can run
 32     * only one process per CPU.
 33     */
 34     sched_data = &aligned_data[this_cpu].schedule_data;                    //这个也不大懂，schedule_data里面有64位的last_sch
 35                                                                         //edule信息，不知道smp中用这个干什么    
 36     spin_lock_irq(&runqueue_lock);                                        //锁运行队列，关中断
 37 
 38     /* move an exhausted RR process to be last.. */
 39     if (unlikely(prev->policy == SCHED_RR))                                //如果是实时进程
 40         if (!prev->counter) {                                            //时间片已经用完
 41             prev->counter = NICE_TO_TICKS(prev->nice);                    //将nice转换为时间片，nice为UNIX时期沿用的负向优
 42                                                                         //向优先级，取值-20~19，值越大越谦让，值越小，优
 43                                                                         //先级越高
 44             move_last_runqueue(prev);                                    //将其移动到运行队列尾部
 45         }
 46 
 47     switch (prev->state) {                                                //获取进程状态
 48     case TASK_INTERRUPTIBLE:                                            //可被信号唤醒的中断
 49         if (signal_pending(prev)) {                                        //如果有信号到来，就让其进入TASK_RUNNING状态
 50             prev->state = TASK_RUNNING;
 51             break;
 52         }
 53     default:
 54         del_from_runqueue(prev);                                        //TASK_STOPED,TASK_ZOMBE,TASK_UNINNTERRUPTIBLE状
 55                                                                         //态，比如调用exit(),wait4()等
 56     case TASK_RUNNING:;
 57     }
 58     prev->need_resched = 0;                                                //清空need_resched
 59 
 60     /*
 61     * this is the scheduler proper:
 62     */
 63 
 64 repeat_schedule:
 65     /*
 66     * Default process to select..
 67     */
 68     next = idle_task(this_cpu);                                            //获得空闲进程
 69     c = -1000;                                                            //找最大值的常用初始化
 70     list_for_each(tmp, &runqueue_head) {                                //遍历运行队列
 71         p = list_entry(tmp, struct task_struct, run_list);                
 72         if (can_schedule(p, this_cpu)) {                                //如果程序可以在cpu上跑，并且允许在这颗cpu上跑
 73             int weight = goodness(p, this_cpu, prev->active_mm);        //获取调度权重
 74             if (weight > c)
 75                 c = weight, next = p;                                    //更新最大权重与选中进程
 76         }
 77     }
 78 
 79     /* Do we need to re-calculate counters? */
 80     if (unlikely(!c)) {                                                    //c==0？说明所有进程时间片用完了，可能性很小
 81         struct task_struct *p;
 82 
 83         spin_unlock_irq(&runqueue_lock);                                //开运行队列锁，开中断
 84         read_lock(&tasklist_lock);                                        //锁住进程双向链表
 85         for_each_task(p)                                                
 86             p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);    //更新每个进程的时间片
 87         read_unlock(&tasklist_lock);                                    //开进程双向链表
 88         spin_lock_irq(&runqueue_lock);                                    //锁运行队列，开中断
 89         goto repeat_schedule;                                            //再次寻找最值得调度的进程
 90     }
 91 
 92     /*
 93     * from this point on nothing can prevent us from
 94     * switching to the next task, save this fact in
 95     * sched_data.
 96     */
 97     sched_data->curr = next;                                            //cpu的正在运行进程指向新进程
 98     task_set_cpu(next, this_cpu);                                        //将task_struct的processor与cpus_runnable更新
 99     spin_unlock_irq(&runqueue_lock);                                    //解锁运行队列，开中断
100 
101     if (unlikely(prev == next)) {                                        //如果选择到的进程仍为之前的进程
102         /* We won't go through the normal tail, so do this by hand */
103         prev->policy &= ~SCHED_YIELD;                                    //那就不再客气了
104         goto same_process;
105     }
106 
107 #ifdef CONFIG_SMP
108     /*
109     * maintain the per-process 'last schedule' value.
110     * (this has to be recalculated even if we reschedule to
111     * the same process) Currently this is only used on SMP,
112     * and it's approximate, so we do not have to maintain
113     * it while holding the runqueue spinlock.
114     */
115     sched_data->last_schedule = get_cycles();                            //更新调度进程时的时钟，用于smp中另外一个cpu调度参考
116 
117     /*
118     * We drop the scheduler lock early (it's a global spinlock),
119     * thus we have to lock the previous process from getting
120     * rescheduled during switch_to().
121     */
122 
123 #endif /* CONFIG_SMP */
124 
125     kstat.context_swtch++;                                                //记录调度次数
126     /*
127     * there are 3 processes which are affected by a context switch:
128     *
129     * prev == .... ==> (last => next)
130     *
131     * It's the 'much more previous' 'prev' that is on next's stack,
132     * but prev is set to (the just run) 'last' process by switch_to().
133     * This might sound slightly confusing but makes tons of sense.
134     */
135     prepare_to_switch();
136     {
137         struct mm_struct *mm = next->mm;                                //新进程的运行空间
138         struct mm_struct *oldmm = prev->active_mm;                        //原进程的运行空间
139         if (!mm) {                                                        //如果新进程没有运行空间，则是内核进程
140             if (next->active_mm) BUG();                                    //内核进程在调度出去的时候会释放其借用的运行空间，如
141                                                                         //果此处仍然存在，则有问题
142             next->active_mm = oldmm;                                    //借用原进程的运行空间
143             atomic_inc(&oldmm->mm_count);                                //原进程运行空间计数加1，用于内存交换信息
144             enter_lazy_tlb(oldmm, next, this_cpu);                        //tlb采用lazy刷新方式
145         }
146         else {                                                            //如果是用户进程
147             if (next->active_mm != mm) BUG();                            //用户进程的两个运行空间应该相同
148             switch_mm(oldmm, mm, next, this_cpu);                        //切换用户空间
149         }
150 
151         if (!prev->mm) {                                                //如果原进程是内核进程
152             prev->active_mm = NULL;                                        //释放其引用的运行空间
153             mmdrop(oldmm);                                                //运行空间计数-1
154         }
155     }
156 
157     /*
158     * This just switches the register state and the
159     * stack.
160     */
161     switch_to(prev, next, prev);                                        //切换寄存器状态与堆栈
162     __schedule_tail(prev);                                                //原进程放入运行队列尾部
163 
164 same_process:
165     reacquire_kernel_lock(current);                                        //针对smp，要将当前进程的内核深度清0
166     if (current->need_resched)                                            //再次调度
167         goto need_resched_back;
168     return;
169 }