调度器27—Freq Qos 和限频流程 Hello

基于 Linux-5.10

一、概述

freq qos 主要用于cpu调频使用的，基于qos中的实现。与pm qos不同的是，前者只有系统级实现，位于 kernel/power/qos.c 中。

二、相关结构

enum pm_qos_req_action {
    PM_QOS_ADD_REQ,        /* Add a new request */
    PM_QOS_UPDATE_REQ,    /* Update an existing request */
    PM_QOS_REMOVE_REQ    /* Remove an existing request */
};

#define FREQ_QOS_MIN_DEFAULT_VALUE    0
#define FREQ_QOS_MAX_DEFAULT_VALUE    S32_MAX

enum pm_qos_type {
    PM_QOS_UNITIALIZED,
    PM_QOS_MAX,        /* return the largest value */
    PM_QOS_MIN,        /* return the smallest value */
};

enum freq_qos_req_type {
    FREQ_QOS_MIN = 1,
    FREQ_QOS_MAX,
};

struct pm_qos_constraints {
    struct plist_head list; //所有此限制的freq_qos_request通过其pnode节点挂在这里
    /* Do not change to 64 bit */
    s32 target_value; //此限制，Qos最终体现的值
    s32 default_value; 
    s32 no_constraint_value;
    enum pm_qos_type type;
    struct blocking_notifier_head *notifiers;
};

struct freq_constraints {
    struct pm_qos_constraints min_freq;
    struct blocking_notifier_head min_freq_notifiers;
    struct pm_qos_constraints max_freq;
    struct blocking_notifier_head max_freq_notifiers;
};

struct freq_qos_request {
    enum freq_qos_req_type type;
    struct plist_node pnode; //数据是存在其prio成员中
    struct freq_constraints *qos;
    ANDROID_OEM_DATA_ARRAY(1, 2);
};

对频点的限制是一个区间，有最大值和最小值，所以 freq_constraints 中使用两个 pm_qos_constraints 成员表示。由结构中的两个notifier head可知，极大值和极小值改变后也是分别通知的。

三、相关函数

1. freq_constraints_init

/**
 * freq_constraints_init - Initialize frequency QoS constraints.
 * @qos: Frequency QoS constraints to initialize.
 */
void freq_constraints_init(struct freq_constraints *qos)
{
    struct pm_qos_constraints *c;

    c = &qos->min_freq;
    plist_head_init(&c->list);
    c->target_value = FREQ_QOS_MIN_DEFAULT_VALUE;
    c->default_value = FREQ_QOS_MIN_DEFAULT_VALUE;
    c->no_constraint_value = FREQ_QOS_MIN_DEFAULT_VALUE;
    c->type = PM_QOS_MAX;
    c->notifiers = &qos->min_freq_notifiers;
    BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers);

    c = &qos->max_freq;
    plist_head_init(&c->list);
    c->target_value = FREQ_QOS_MAX_DEFAULT_VALUE;
    c->default_value = FREQ_QOS_MAX_DEFAULT_VALUE;
    c->no_constraint_value = FREQ_QOS_MAX_DEFAULT_VALUE;
    c->type = PM_QOS_MIN;
    c->notifiers = &qos->max_freq_notifiers;
    BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers);
}

新分配一个 freq_constraints 结构后可以直接调用此函数，函数中分别对min_freq和max_freq进行初始化，这个函数没有export导出来。

注意其type的初始化。min_freq 这个限制赋值的type竟然是PM_QOS_MAX，而 max_freq 这个限制赋值的type竟然是PM_QOS_MIN！这样当限制最大频点的时候，pm_qos判断是PM_QOS_MIN，那么plist链表上生效的就是最小值，也就是说对最大频点的限制，谁限制的小谁生效。当限制最小频点的时候，pm_qos判断是PM_QOS_MAX，那么plist链表上生效的就是最大值，也就是说对最小频点的限制，谁限制的大谁生效。这是反着来利用Qos机制的！

2. freq_qos_add_notifier

/**
 * freq_qos_add_notifier - Add frequency QoS change notifier.
 * @qos: List of requests to add the notifier to.
 * @type: Request type.
 * @notifier: Notifier block to add.
 */
int freq_qos_add_notifier(struct freq_constraints *qos, enum freq_qos_req_type type, struct notifier_block *notifier)
{
    int ret;

    if (IS_ERR_OR_NULL(qos) || !notifier)
        return -EINVAL;

    switch (type) {
    case FREQ_QOS_MIN:
        ret = blocking_notifier_chain_register(qos->min_freq.notifiers, notifier);
        break;
    case FREQ_QOS_MAX:
        ret = blocking_notifier_chain_register(qos->max_freq.notifiers, notifier);
        break;
    default:
        WARN_ON(1);
        ret = -EINVAL;
    }

    return ret;
}
EXPORT_SYMBOL_GPL(freq_qos_add_notifier);

注册一个notifier，根据参数 type，决定使用max的或min的 pm_qos_constraints::notifiers，notifier 参数中有指定优先级，优先级数值大的插入在链表前面，优先级数值小的插入后面，优先级数值若相同，先插入的在前面。notifier->notifier_call()里面会对感感兴趣的action进行响应。

其中，Qos最终频点限制值改变了，也是通过这个notifier机制更新通知修改的，也就是说cpufreq驱动必须注册两个notifier根据Qos来设置频点值，一个是设置MAX限制，一个是设置MIN限制。

3. freq_qos_remove_notifier

/**
 * freq_qos_remove_notifier - Remove frequency QoS change notifier.
 * @qos: List of requests to remove the notifier from.
 * @type: Request type.
 * @notifier: Notifier block to remove.
 */
int freq_qos_remove_notifier(struct freq_constraints *qos, enum freq_qos_req_type type, struct notifier_block *notifier)
{
    int ret;

    if (IS_ERR_OR_NULL(qos) || !notifier)
        return -EINVAL;

    switch (type) {
    case FREQ_QOS_MIN:
        ret = blocking_notifier_chain_unregister(qos->min_freq.notifiers, notifier);
        break;
    case FREQ_QOS_MAX:
        ret = blocking_notifier_chain_unregister(qos->max_freq.notifiers, notifier);
        break;
    default:
        WARN_ON(1);
        ret = -EINVAL;
    }

    return ret;
}
EXPORT_SYMBOL_GPL(freq_qos_remove_notifier);

将此 notifier_block 结构从指定的 constraints 的 notifiers 链表上删除。任何对Qos的此限制感兴趣的都需要注册notifier，不再感兴趣时删除。在不需要对频点做要求时需要删除自己的 freq_qos_request 结构，否则它可能持续在生效，导致其它

freq_qos_request 表示的频点无法生效。

4. freq_qos_apply

/**
 * freq_qos_apply - Add/modify/remove frequency QoS request.
 * @req: Constraint request to apply.
 * @action: Action to perform (add/update/remove).
 * @value: Value to assign to the QoS request.
 *
 * This is only meant to be called from inside pm_qos, not drivers.
 */
int freq_qos_apply(struct freq_qos_request *req, enum pm_qos_req_action action, s32 value)
{
    int ret;

    switch(req->type) {
    case FREQ_QOS_MIN:
        ret = pm_qos_update_target(&req->qos->min_freq, &req->pnode, action, value);
        break;
    case FREQ_QOS_MAX:
        ret = pm_qos_update_target(&req->qos->max_freq, &req->pnode, action, value);
        break;
    default:
        ret = -EINVAL;
    }

    return ret;
}

只是在qos.c内部使用，没有导出。

/**
 * pm_qos_update_target - Update a list of PM QoS constraint requests.
 * @c: List of PM QoS requests.
 * @node: Target list entry.
 * @action: Action to carry out (add, update or remove).
 * @value: New request value for the target list entry.
 *
 * Update the given list of PM QoS constraint requests, @c, by carrying an
 * @action involving the @node list entry and @value on it.
 *
 * The recognized values of @action are PM_QOS_ADD_REQ (store @value in @node
 * and add it to the list), PM_QOS_UPDATE_REQ (remove @node from the list, store
 * @value in it and add it to the list again), and PM_QOS_REMOVE_REQ (remove
 * @node from the list, ignore @value).
 *
 * Return: 1 if the aggregate constraint value has changed, 0  otherwise.
 */
int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, enum pm_qos_req_action action, int value)
{
    int prev_value, curr_value, new_value;
    unsigned long flags;

    spin_lock_irqsave(&pm_qos_lock, flags);

    prev_value = pm_qos_get_value(c); //根据c->type是MAX还是MIN分别返回最后一个元素和第一个元素的prio的值(请求的值越大，越插入在后面)
    if (value == PM_QOS_DEFAULT_VALUE) //-1就是要设置默认值
        new_value = c->default_value;
    else
        new_value = value;

    switch (action) {
    case PM_QOS_REMOVE_REQ:
        plist_del(node, &c->list); //从plist中删除此 freq_qos_request 结构
        break;
    case PM_QOS_UPDATE_REQ:
        /*
         * To change the list, atomically remove, reinit with new value
         * and add, then see if the aggregate has changed.
         */
        plist_del(node, &c->list); //从plist中删除此 freq_qos_request 结构，然后再重新插入
        fallthrough;
    case PM_QOS_ADD_REQ:
        plist_node_init(node, new_value); //node->prio = new_value; 更新value
        plist_add(node, &c->list); //重新插入plist链表，prio(也就是request的value值)越大越插入后面，越小越插入前面
        break;
    default:
        /* no action */
        ;
    }

    curr_value = pm_qos_get_value(c);
    pm_qos_set_value(c, curr_value); //c->target_value=value，获取value时返回它

    spin_unlock_irqrestore(&pm_qos_lock, flags);

    trace_pm_qos_update_target(action, prev_value, curr_value);

    if (prev_value == curr_value)
        return 0;

    /*最终结果就是发出一个notifier*/
    if (c->notifiers)
        blocking_notifier_call_chain(c->notifiers, curr_value, NULL);

    return 1;
}

static int pm_qos_get_value(struct pm_qos_constraints *c)
{
    if (plist_head_empty(&c->list))
        return c->no_constraint_value; //empty 就返回 no_constraint_value

    switch (c->type) {
    case PM_QOS_MIN:
        return plist_first(&c->list)->prio; //最小就返回第一个元素

    case PM_QOS_MAX:
        return plist_last(&c->list)->prio; //最大返回最后一个元素

    default:
        WARN(1, "Unknown PM QoS type in %s\n", __func__);
        return PM_QOS_DEFAULT_VALUE;
    }
}

注意，在add request时，prio越大(value值越大)，越插入靠后，prio值越小，越插入靠前。这里获取PM_QOS_MIN值，返回的是plist链表第一个元素的值，返回的是最小值。获取PM_QOS_MAX值，返回的是plist链表最后一个元素的值，返回的是最大值。

freq_qos_add_request(&qos, &req, FREQ_QOS_MIN, FREQ_QOS_MAX_DEFAULT_VALUE/*S32_MAX*/) 就表示不限制最大频点了。之后再通过freq_qos_update_request(&req)来更新限制，否则任何其它人设置都无效了，因为这里已经设置了最大值，没有更大的合法值可以去设置了。

有个trace: trace_pm_qos_update_target，但是没有太大帮助，没有注明是哪个cluster的。

kthread-270     [004] .... 220691.970715: pm_qos_update_target: action=UPDATE_REQ prev_value=150 curr_value=2000000000
kthread-270     [004] .... 220691.970846: pm_qos_update_target: action=UPDATE_REQ prev_value=2000000000 curr_value=150

freq_qos_apply() 函数最终只是判断当前最终体现值 curr_value 和之前最终体现值 prev_value 是否相等，若是相等返回0，不相等就通过 pm_qos_constraints::notifiers 发出一个通知，这里只是一个通知而已。

5. freq_qos_add_request

/**
 * freq_qos_add_request - Insert new frequency QoS request into a given list.
 * @qos: Constraints to update.
 * @req: Preallocated request object.
 * @type: Request type.
 * @value: Request value.
 *
 * Insert a new entry into the @qos list of requests, recompute the effective
 * QoS constraint value for that list and initialize the @req object.  The
 * caller needs to save that object for later use in updates and removal.
 *
 * Return 1 if the effective constraint value has changed, 0 if the effective
 * constraint value has not changed, or a negative error code on failures.
 */
int freq_qos_add_request(struct freq_constraints *qos, struct freq_qos_request *req, enum freq_qos_req_type type, s32 value)
{
    int ret;

    if (IS_ERR_OR_NULL(qos) || !req)
        return -EINVAL;

    if (WARN(freq_qos_request_active(req), "%s() called for active request\n", __func__))
        return -EINVAL;

    req->qos = qos;
    req->type = type;
    ret = freq_qos_apply(req, PM_QOS_ADD_REQ, value);
    if (ret < 0) {
        req->qos = NULL;
        req->type = 0;
    }

    trace_android_vh_freq_qos_add_request(qos, req, type, value, ret);

    return ret;
}
EXPORT_SYMBOL_GPL(freq_qos_add_request);

req 是新分配直接使用的。调用结果就是向指定的 pm_qos_constraints::list 链表上插入一个 freq_qos_request 成员，高优先级(数值大，prio=value)的插入在plist后面，低优先级的插入在前面。若是这个 request 值最终使 Qos 的值改变通过notifier发出通知的话，就返回1，否则返回0，失败返回负的错误码。可以看出添加request是实时生效的。

6. freq_qos_update_request

/**
 * freq_qos_update_request - Modify existing frequency QoS request.
 * @req: Request to modify.
 * @new_value: New request value.
 *
 * Update an existing frequency QoS request along with the effective constraint
 * value for the list of requests it belongs to.
 *
 * Return 1 if the effective constraint value has changed, 0 if the effective
 * constraint value has not changed, or a negative error code on failures.
 */
int freq_qos_update_request(struct freq_qos_request *req, s32 new_value)
{
    if (!req)
        return -EINVAL;

    if (WARN(!freq_qos_request_active(req), "%s() called for unknown object\n", __func__))
        return -EINVAL;

    trace_android_vh_freq_qos_update_request(req, new_value); mtk_freq_qos_update_request //hook只是一個打印
    if (req->pnode.prio == new_value)
        return 0;

    return freq_qos_apply(req, PM_QOS_UPDATE_REQ, new_value);
}
EXPORT_SYMBOL_GPL(freq_qos_update_request);

使用新值替换 pm_qos_constraints::list 上对应 freq_qos_request::pnode::prio 的旧值。若最终Qos值改变了，发出通知并返回1，若最终Qos的值没有变，返回0。

7. freq_qos_remove_request

/**
 * freq_qos_remove_request - Remove frequency QoS request from its list.
 * @req: Request to remove.
 *
 * Remove the given frequency QoS request from the list of constraints it
 * belongs to and recompute the effective constraint value for that list.
 *
 * Return 1 if the effective constraint value has changed, 0 if the effective
 * constraint value has not changed, or a negative error code on failures.
 */
int freq_qos_remove_request(struct freq_qos_request *req)
{
    int ret;

    if (!req)
        return -EINVAL;

    if (WARN(!freq_qos_request_active(req), "%s() called for unknown object\n", __func__))
        return -EINVAL;

    trace_android_vh_freq_qos_remove_request(req);
    ret = freq_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); //此处参数3基本没用
    req->qos = NULL;
    req->type = 0;

    return ret;
}
EXPORT_SYMBOL_GPL(freq_qos_remove_request);

移除 pm_qos_constraints::list 上对应 freq_qos_request ，移除后，若最终Qos值改变了，发出通知并返回1，若最终Qos的值没有变，返回0。

8. freq_qos_read_value

/*
 * freq_qos_read_value - Get frequency QoS constraint for a given list.
 * @qos: Constraints to evaluate.
 * @type: QoS request type.
 */
s32 freq_qos_read_value(struct freq_constraints *qos, enum freq_qos_req_type type)
{
    s32 ret;

    switch (type) {
    case FREQ_QOS_MIN:
        ret = IS_ERR_OR_NULL(qos) ? FREQ_QOS_MIN_DEFAULT_VALUE : pm_qos_read_value(&qos->min_freq);
        break;
    case FREQ_QOS_MAX:
        ret = IS_ERR_OR_NULL(qos) ? FREQ_QOS_MAX_DEFAULT_VALUE : pm_qos_read_value(&qos->max_freq);
        break;
    default:
        WARN_ON(1);
        ret = 0;
    }

    return ret;
}

此函数没有导出来。返回指定 pm_qos_constraints 的 target_value 值。它是在 pm_qos_update_target() 中更新的。若qos参数传null，就可以得到默认的最大最小值。

四、逻辑介绍

1. 调频模块先注册频点设置notifier函数

static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) //cpufreq.c
{
    ...
    freq_constraints_init(&policy->constraints);

    //实际设置频点的函数
    policy->nb_min.notifier_call = cpufreq_notifier_min;
    policy->nb_max.notifier_call = cpufreq_notifier_max;
    
    freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MIN, &policy->nb_min);
    freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MAX, &policy->nb_max);
    ...
}

//合二为一，通过work串行执行
static int cpufreq_notifier_min(struct notifier_block *nb, unsigned long freq, void *data)
{
    struct cpufreq_policy *policy = container_of(nb, struct cpufreq_policy, nb_min);

    schedule_work(&policy->update); //handle_update
    return 0;
}
static int cpufreq_notifier_max(struct notifier_block *nb, unsigned long freq, void *data)
{
    struct cpufreq_policy *policy = container_of(nb, struct cpufreq_policy, nb_max);

    schedule_work(&policy->update);
    return 0;
}

static void handle_update(struct work_struct *work)
{
    struct cpufreq_policy *policy = container_of(work, struct cpufreq_policy, update);

    down_write(&policy->rwsem);
    refresh_frequency_limits(policy);
    up_write(&policy->rwsem);
}

void refresh_frequency_limits(struct cpufreq_policy *policy)
{
    if (!policy_is_inactive(policy)) { //return cpumask_empty(policy->cpus);
        cpufreq_set_policy(policy, policy->governor, policy->policy);
    }
}

static int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_governor *new_gov, unsigned int new_pol)
{
    ...
    new_data.freq_table = policy->freq_table;
    new_data.cpu = policy->cpu;

    //通过Qos获取最大最小频点限制
    new_data.min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN);
    new_data.max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX);

    cpufreq_driver->verify(&new_data);

    //限制值设置到policy中
    policy->min = new_data.min;
    policy->max = new_data.max;
    trace_cpu_frequency_limits(policy);

    if (new_gov == policy->governor) {
        cpufreq_governor_limits(policy); //这里调用到 policy->governor->limits(policy);
        return 0;
    }
    ...
}

static void sugov_limits(struct cpufreq_policy *policy)
{
    struct sugov_policy *sg_policy = policy->governor_data;

    sg_policy->limits_changed = true; //只是设置了一个标记
}


static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
{
    s64 delta_ns;

    //若设置了限频，就不等达到频点变化延迟了，直接设置频点，新设置的频点会受到新限制的钳位
    if (unlikely(sg_policy->limits_changed)) {
        sg_policy->limits_changed = false;
        sg_policy->need_freq_update = true; //唯一设置位置
        return true;
    }

    delta_ns = time - sg_policy->last_freq_update_time;
    return delta_ns >= sg_policy->min_rate_limit_ns;
}

//调频函数
static void sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
{
    if (sugov_should_update_freq(sg_policy, time)) { //判断需要设置才设置
        next_f = sugov_next_freq_shared(sg_cpu, time);

        if (sg_policy->policy->fast_switch_enabled)
            sugov_fast_switch(sg_policy, time, next_f); //设置频点
        else
            sugov_deferred_update(sg_policy, time, next_f);
    }
}


static void sugov_fast_switch(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq)
{
    struct cpufreq_policy *policy = sg_policy->policy;

    //这里也受 sg_policy->need_freq_update 的值影响
    if (!sugov_update_next_freq(sg_policy, time, next_freq))
        return;

    next_freq = cpufreq_driver_fast_switch(policy, next_freq);
    if (!next_freq)
        return;

    policy->cur = next_freq;

}

unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq)
{
    unsigned int freq;
    int cpu;

    //尊重Qos的限制值
    target_freq = clamp_val(target_freq, policy->min, policy->max);
    //调用驱动设置频点
    freq = cpufreq_driver->fast_switch(policy, target_freq);
}

可以看到，频率限制时对频点的设置其实并不是完全实时的，它只是设置一个标志位而已。然后需要等到有调频调用，即cpufreq_update_util --> sugov_update_shared/sugov_update_single --> 判断有pending的限频导致的设置，就不用等，立即设置。

2. 其它模块使用 freq_qos_add_request() / freq_qos_update_request() 来限制频点值

void set_each_cluster_maxfreq_to_2G() {
    struct freq_qos_request req;
    struct cpufreq_policy *policy;

    //每个cpu都限制到2GHz
    for_each_possible_cpu(cpu) {
        policy = cpufreq_cpu_get(cpu);
        freq_qos_add_request(&policy->constraints, &req, FREQ_QOS_MAX, 2000000000);
        cpu = cpumask_last(policy->related_cpus);//just cpu0 4 7
        cpufreq_cpu_put(policy);
    }
}

五、限制生效流程

由 Freq Qos 实现可值，当 qos_update 使 Qos 的最终限制结果改变时，会发出notifier，因此需要注册notifier block，并在接收到notifier通知后更新限频值，若当前频点不在新的限制范围内的话，还要设置当前频点。

//drivers/cpufreq/cpufreq.c
static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
{
    struct cpufreq_policy *policy;
    struct device *dev = get_cpu_device(cpu);

    ...
    freq_constraints_init(&policy->constraints);

    //收到notifier通知后的回调函数
    policy->nb_min.notifier_call = cpufreq_notifier_min;
    policy->nb_max.notifier_call = cpufreq_notifier_max;

    //注册 Freq Qos 限频后发出通知的响应函数
    ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MIN, &policy->nb_min);
    ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MAX, &policy->nb_max);

    //这个是处理函数，异步的
    INIT_WORK(&policy->update, handle_update);
    ...
}

响应回调函数：

static int cpufreq_notifier_min(struct notifier_block *nb, unsigned long freq, void *data)
{
    struct cpufreq_policy *policy = container_of(nb, struct cpufreq_policy, nb_min);

    schedule_work(&policy->update);
    return 0;
}

static int cpufreq_notifier_max(struct notifier_block *nb, unsigned long freq, void *data)
{
    struct cpufreq_policy *policy = container_of(nb, struct cpufreq_policy, nb_max);

    schedule_work(&policy->update);
    return 0;
}

static inline bool schedule_work(struct work_struct *work)
{
    return queue_work(system_wq, work); //就会调用到 handle_update() 
}

异步执行策略变更设置：

static void handle_update(struct work_struct *work)
{
    struct cpufreq_policy *policy = container_of(work, struct cpufreq_policy, update);

    pr_debug("handle_update for cpu %u called\n", policy->cpu);
    down_write(&policy->rwsem);
    refresh_frequency_limits(policy);
    up_write(&policy->rwsem);
}


void refresh_frequency_limits(struct cpufreq_policy *policy)
{
    if (!policy_is_inactive(policy)) {
        pr_debug("updating policy for CPU %u\n", policy->cpu);

        cpufreq_set_policy(policy, policy->governor, policy->policy);
    }
}
EXPORT_SYMBOL(refresh_frequency_limits);

实际上是调用 cpufreq_set_policy() 来使限频策略更新的。

static int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_governor *new_gov, unsigned int new_pol)
{
    struct cpufreq_policy_data new_data;

    memcpy(&new_data.cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo));
    new_data.freq_table = policy->freq_table;
    new_data.cpu = policy->cpu;


    //从Freq QoS 中读取min和max值
    new_data.min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN);
    new_data.max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX);

    //验证一下，确保min <= max.
    ret = cpufreq_driver->verify(&new_data);

    //policy的min和max成员保存的是生效的最大频点和最小频点的限制值
    policy->min = new_data.min;
    policy->max = new_data.max;

    //这里有个trace在限频策略生效时打印
    trace_cpu_frequency_limits(policy);

    policy->cached_target_freq = UINT_MAX;

    pr_debug("new min and max freqs are %u - %u kHz\n", policy->min, policy->max);

    //cpufreq_driver若是有setpolicy回调则调用，但是通常driver不会定义这个回调
    if (cpufreq_driver->setpolicy) {
        policy->policy = new_pol;
        pr_debug("setting range\n");
        return cpufreq_driver->setpolicy(policy);
    }

    //相同govrnor的限频走这里
    if (new_gov == policy->governor) {
        pr_debug("governor limits update\n");
        cpufreq_governor_limits(policy); //相同governor走这里
        return 0;
    }

    ...
}

上面从Freq Qos获取的最大最小频点限制已经保存到 policy->min 和 policy->max 中了，限制的任务已经完成，之后的频点设置都会和policy的min和max比较，将频点钳位到min和max之间。之后就是实时的使限制值生效了，这是通过调用governor->limits()回调函数来完成的。

static void cpufreq_governor_limits(struct cpufreq_policy *policy)
{
    //调用governor的limit回调来设置频点
    if (policy->governor->limits)
        policy->governor->limits(policy);
}


static void sugov_limits(struct cpufreq_policy *policy)
{
    struct sugov_policy *sg_policy = policy->governor_data;

    if (!policy->fast_switch_enabled) {
        mutex_lock(&sg_policy->work_lock);
        cpufreq_policy_apply_limits(policy);
        mutex_unlock(&sg_policy->work_lock);
    }

    sg_policy->limits_changed = true;
}

static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy)
{
    //只有在当前频点不在限制范围内才会设置
    if (policy->max < policy->cur)
        __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
    else if (policy->min > policy->cur)
        __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
}

这里判断了 policy->fast_switch_enabled 的值，若是为false就会调用下面函数直接设置频点。若是为true则不会立即设置，而是延后到下一次频点变更的时候进行设置，在 sugov_update_shared/sugov_update_single 中判断 sg_policy->limits_changed 为真时会立即更新频点，忽视up/down_rate_limit_us文件设置的值。

六、实验

1. 实验代码

/* 放到 kernel/sched 下面 */

#define pr_fmt(fmt) "freq_qos_debug: " fmt

#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/string.h>
#include <linux/printk.h>
#include <asm/topology.h>
#include <linux/cpumask.h>
#include <linux/cpufreq.h>
#include <linux/pm_qos.h>
#include <linux/plist.h>
#include <linux/sched/topology.h>
#include "sched.h"


static struct freq_qos_request qos_req[2][3];

static int freq_qos_debug_show(struct seq_file *m, void *v)
{
    int cpu = 4;
    struct freq_qos_request    *pos;
    struct cpufreq_policy *policy;
    struct plist_head *plh_max, *plh_min;

    for_each_possible_cpu(cpu) {
        policy = cpufreq_cpu_get(cpu);
        if (!policy) {
            pr_info("cpufreq_cpu_get return null!\n");
            return -EFAULT;
        }
        seq_printf(m, "policy->max=%u, policy->min=%u, policy->cur=%u\n", policy->max, policy->min, policy->cur);

        plh_max = &policy->constraints.max_freq.list;
        seq_printf(m, "max freq limit:\n");
        plist_for_each_entry(pos, plh_max, pnode) {
            seq_printf(m, "pos->type=%d, pos->pnode.prio=%d\n", pos->type, pos->pnode.prio);
        }

        plh_min = &policy->constraints.min_freq.list;
        seq_printf(m, "min freq limit:\n");
        plist_for_each_entry(pos, plh_min, pnode) {
            seq_printf(m, "pos->type=%d, pos->pnode.prio=%d\n", pos->type, pos->pnode.prio);
        }
        seq_printf(m, "\n");

        cpu = cpumask_last(policy->related_cpus);
        cpufreq_cpu_put(policy);
    }

    return 0;
}

static int freq_qos_debug_open(struct inode *inode, struct file *file)
{
    return single_open(file, freq_qos_debug_show, NULL);
}

static int freq_qos_debug_update_request(int cluster, int freq_req, int choose)
{
    int ret = freq_qos_update_request(&qos_req[choose][cluster], freq_req);
    if (ret == 1) {
        pr_info("new freq_req=%u take effect.\n", freq_req);
    } else if (ret == 0) {
        pr_info("new freq_req=%u not take effect.\n", freq_req);
    } else if (ret < 0) {
        pr_err("new freq_req=%u update failed.\n", freq_req);
    }

    return ret;
}

static ssize_t freq_qos_debug_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{

    int ret;
    int cluster, min_max, freq_req;
    char buffer[64] = {0};

    if (count >= sizeof(buffer)) {
        count = sizeof(buffer) - 1;
    }
    if (copy_from_user(buffer, buf, count)) {
        pr_info("copy_from_user failed\n");
        return -EFAULT;
    }
    ret = sscanf(buffer, "%d %d %d", &cluster, &min_max, &freq_req);
    if(ret != 3){
        pr_info("sscanf failed, ret=%d\n", ret);
        return -EINVAL;
    }
    if ((cluster < 0 || cluster > 2) || (min_max != 0 && min_max != 1)) {
        pr_info("cmd error: cluster=%d, freq=%d, choose=%d\n", cluster, freq_req, min_max);
    }
    pr_info("set: cluster=%d, freq=%d, choose=%s\n", cluster, freq_req, min_max==1 ? "max":"min");

    freq_qos_debug_update_request(cluster, freq_req, min_max);

    return count;
}

//Linux5.10 change file_operations to proc_ops
static const struct proc_ops freq_qos_debug_fops = {
    .proc_open    = freq_qos_debug_open,
    .proc_read    = seq_read,
    .proc_write   = freq_qos_debug_write,
    .proc_lseek  = seq_lseek,
    .proc_release = single_release,
};

static int freq_qos_debug_add_request(void)
{
    struct cpufreq_policy *policy;
    int ret, cpu;
    int i = 0;

    for_each_possible_cpu(cpu) {
        policy = cpufreq_cpu_get(cpu);
        if (!policy) {
            pr_info("cpufreq_cpu_get return null\n");
            return -EFAULT;
        }

        ret = freq_qos_add_request(&policy->constraints, &qos_req[1][i], FREQ_QOS_MAX, FREQ_QOS_MAX_DEFAULT_VALUE);
        if (ret < 0) {
            pr_err("add qos request max failed. cpu=%d\n", cpu);
            return -EFAULT;
        }
        ret = freq_qos_add_request(&policy->constraints, &qos_req[0][i], FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE);
        if (ret < 0) {
            pr_err("add qos request min failed. cpu=%d\n", cpu);
            return -EFAULT;
        }

        cpu = cpumask_last(policy->related_cpus);
        cpufreq_cpu_put(policy);

        i++;
    }

    return ret;
}


static int __init freq_qos_debug_init(void)
{
    proc_create("freq_qos_debug", S_IRUGO | S_IWUGO, NULL, &freq_qos_debug_fops);

    freq_qos_debug_add_request();

    pr_info("freq_qos_debug probed\n");

    /*
     * 若不编译成ko,编译进内核，打印的是 MODULE not defined!
     * 若编译成ko,打印的是 MODULE defined!
     */
#ifdef MODULE
    pr_info("MODULE defined!\n");
#else
    pr_info("MODULE not defined!\n");
#endif

    return 0;
}
//若不编译成模块，改成 late_initcall 仍然会打印 cpufreq_cpu_get return null
late_initcall(freq_qos_debug_init);

MODULE_DESCRIPTION("Freq Qos Debug");
MODULE_LICENSE("GPL v2"); //必须得有

补充：若编译成ko, MODULE宏就是定义的，此时各种 XXX_initcall(fn) 都为 module_init(fn)，是不考虑插入优先级的，见include/linux/module.h。若编译进内核，则MODULE宏是没有定义的，是考虑优先级的，XXX_initcall(fn)分别对应各自的优先级，见include/linux/init.h。也比较容易理解，比如一个模块编译成了ko，那么它就是在insmod时单独加载的，指定优先级也没有意义。

2. 实验结果

# cat /proc/freq_qos_debug
policy->max=1100000, policy->min=500000, policy->cur=500000
max freq limit:
pos->type=2, pos->pnode.prio=1100000
pos->type=2, pos->pnode.prio=1800000
pos->type=2, pos->pnode.prio=2147483647
min freq limit:
pos->type=1, pos->pnode.prio=0
pos->type=1, pos->pnode.prio=200000
pos->type=1, pos->pnode.prio=500000

policy->max=1800000, policy->min=200000, policy->cur=1400000
max freq limit:
pos->type=2, pos->pnode.prio=1800000
pos->type=2, pos->pnode.prio=2850000
pos->type=2, pos->pnode.prio=2147483647
min freq limit:
pos->type=1, pos->pnode.prio=0
pos->type=1, pos->pnode.prio=200000

policy->max=2300000, policy->min=1300000, policy->cur=1300000
max freq limit:
pos->type=2, pos->pnode.prio=2300000
pos->type=2, pos->pnode.prio=3050000
pos->type=2, pos->pnode.prio=2147483647
min freq limit:
pos->type=1, pos->pnode.prio=0
pos->type=1, pos->pnode.prio=1300000

3. 实验总结

通过设置实验可以看出，max选最小的，min选最大的，实验和理论对的上。/sys/devices/system/cpu/cpuX/cpufreq 下的 scaling_min_freq 文件，写它是 freq_qos_update_request() 一个对最小频点的限制值，cat它显示的是policy->min.

通过echo设置可以发现，若是设置并生效的min的限制值比当前max得限制值还大，min取max的值，相当于定频了，可以叫它向下定频。若设置并生效的max限制值比当前的min得限制值还小，此时max和min的频点都取设置并生效的max值，也相当于将频点定到设置的max值上，可以叫它向下定频。也可以理解为冲突后以对max的设定为准。

注：prio值越大越插入后面是正确的，通过移植plist链表验证过了。