sched misc

sched misc

查看一个线程的调度详情(sched count等)

在/proc/pid/sched里的nr_switches反映的是这个线程到目前为止已经经历了多少此的sched switch

cat /proc/pid/sched

console:/ # cat /proc/3201/sched                                               
thread1  (3201, #threads: 1)
-------------------------------------------------------------------
se.exec_start                                :       3649568.375043
se.vruntime                                  :         35988.132407
se.sum_exec_runtime                          :          2257.807624
se.nr_migrations                             :                  262
nr_switches                                  :                 7472
nr_voluntary_switches                        :                 7067
nr_involuntary_switches                      :                  405
se.load.weight                               :               140288
se.runnable_weight                           :               140288
se.avg.load_sum                              :                   21
se.avg.runnable_load_sum                     :                   21
se.avg.util_sum                              :                17408
se.avg.load_avg                              :                    0
se.avg.runnable_load_avg                     :                    0
se.avg.util_avg                              :                    0
se.avg.last_update_time                      :        3649568374784
se.avg.util_est.ewma                         :                   10
se.avg.util_est.enqueued                     :                    0
policy                                       :                    0
prio                                         :                  129
clock-delta                                  :                  250

/proc/pid/sched结果里的policy值的含义

#define SCHED_NORMAL    0 //CFS
#define SCHED_FIFO        1 //RT
#define SCHED_RR        2 //RT
#define SCHED_BATCH        3 //CFS
#define SCHED_IDLE        5 //CFS
#define SCHED_DEADLINE    6 //DL

sched priority范围

/*
 * Priority of a process goes from 0..MAX_PRIO-1, valid RT
 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
 * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
 * values are inverted: lower p->prio value means higher priority.
 *
 * The MAX_USER_RT_PRIO value allows the actual maximum
 * RT priority to be separate from the value exported to
 * user-space.  This allows kernel threads to set their
 * priority to a value higher than any user task. Note:
 * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
 */

#define MAX_USER_RT_PRIO     100
#define MAX_RT_PRIO          MAX_USER_RT_PRIO

#define MAX_PRIO            (MAX_RT_PRIO + 40)
#define DEFAULT_PRIO        (MAX_RT_PRIO + 20)    // 默认优先级,对应 nice 值为 0 的静态优先级

RT priority范围:0-MAX_RT_PRIO-1,即0-99;

非RT priority范围:MAX_RT_PRIO..MAX_PRIO-1,即100-139

priority值与实际priority是反相的,即priority值越大,priority越低

/proc/sched_debug字段说明

The first being tree-key column, it indicates the task's virtual runtime, and its name comes from the kernel sorting all runnable tasks by this key in a red-black tree. The switches column indicates the total number of switches (involuntary or not), and naturally the prio refers to the process priority. The wait-time value indicates the amount of time the task waited to be scheduled. Finally both sum-exec and sum-sleep account for the total amount of time (in nanoseconds) the task was running on the processor or asleep, respectively.

from:

https://documentation.suse.com/sles/15-SP1/html/SLES-all/cha-tuning-taskscheduler.html

task_struct.thread_info.preempt_count

* PREEMPT_MASK: 0x000000ff
* SOFTIRQ_MASK: 0x0000ff00

* HARDIRQ_MASK: 0x000f0000

* NMI_MASK: 0x00100000

* PREEMPT_NEED_RESCHED: 0x80000000

*/

#define PREEMPT_BITS 8

#define SOFTIRQ_BITS 8

#define HARDIRQ_BITS 4

#define NMI_BITS 1

#define PREEMPT_SHIFT 0

#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) //=8

#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) //=16

#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) //=20

#define __IRQ_MASK(x) ((1UL << (x))-1)

#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) //(1<<8) - 1

#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) //((1<<8)-1) <<8

#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) //((1<<4)-1) <<16

#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) //((1<<1)-1) << 20

#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) //1<<0

#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) //1<<8

#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) //1<<16

#define NMI_OFFSET (1UL << NMI_SHIFT) //1<<20

interrupt处理程序完成后,会去检查是否要执行sched switch

interrupt处理程序完成后,它会去监测preempt_count,如果它不为0,则会直接跳过sched switch;如果它为0,再检查是否TIF_NEED_RESCHED flag被置起来了,如果置起来了再去执行sched switch;

如果preempt_count不为0,关了抢占、关了软中断等均会使preempt_count不为0。所以如果在这之前有关软中断,此时将不会去执行sched switch,所以在(硬)中断处理完后,会直接返回被中断打断的thread继续执行:

arch/arm64/kernel/entry.S

el1_irq:
    kernel_entry 1
    enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
    bl    trace_hardirqs_off
#endif

    irq_handler

#ifdef CONFIG_PREEMPT
    ldr    w24, [tsk, #TSK_TI_PREEMPT]    // get preempt count
    cbnz    w24, 1f                // preempt count != 0
    ldr    x0, [tsk, #TSK_TI_FLAGS]    // get flags
    tbz    x0, #TIF_NEED_RESCHED, 1f    // needs rescheduling?
    bl    el1_preempt
1:
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
    bl    trace_hardirqs_on
#endif
    kernel_exit 1
ENDPROC(el1_irq)

#ifdef CONFIG_PREEMPT
el1_preempt:
    mov    x24, lr
1:    bl    preempt_schedule_irq        // irq en/disable is done inside
    ldr    x0, [tsk, #TSK_TI_FLAGS]    // get new tasks TI_FLAGS
    tbnz    x0, #TIF_NEED_RESCHED, 1b    // needs rescheduling?
    ret    x24
#endif

renice调整已经存在的进程的优先级

renice -p -n -20 $pid    #调整进程号为的pid的进程的nice值为-20,-p的含义是以进程为单位进行调整

原文地址:https://www.cnblogs.com/aspirs/p/15705633.html