tracer ftrace笔记(3)——宏展开和hook和注册 Hello

一、ftrace的宏

1. struct tracepoint 结构

使用 struct tracepoint 变量来描述一个 trace point。

//include/linux/tracepoint-defs.h
struct tracepoint {
    const char *name; //trace point的名字,内核中通过hash表管理所有的trace point,找到对应的hash slot后,需要通过name来识别具体的trace point。
    struct static_key key; //trace point状态,0表示disable,1表示enable,static_key_false(&key)判断的其实就只是key的真假。
    struct static_call_key *static_call_key;
    void *static_call_tramp;
    void *iterator;
    int (*regfunc)(void); //添加桩函数的函数
    void (*unregfunc)(void); //卸载桩函数的函数
    struct tracepoint_func __rcu *funcs; //trace point中所有的桩函数链表. 是个数组
};

struct tracepoint_func {
    void *func;
    void *data;
    int prio;
};

static key使用见:https://www.cnblogs.com/hellokitty2/p/15026568.html

2. DEFINE_TRACE_FN 展开后是

/*
 * include/linux/tracepoint.h
 * 就是定义一个名为 __tracepoint_##_name 的 struct tracepoint 结构,
 * 然后定义一个名为 __traceiter_##_name 的函数,它对 struct tracepoint::funcs[] 成员数组中的每个函数都进行调用,数组尾部要以NULL结尾。
 */
#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)        \
    static const char __tpstrtab_##_name[]                \
    __section("__tracepoints_strings") = #_name;            \
    extern struct static_call_key __SCK__tp_func_##_name;    \
    int __traceiter_##_name(void *__data, proto);            \
    struct tracepoint __tracepoint_##_name    __used __section("__tracepoints") = {    \
        .name = __tpstrtab_##_name,                \
        .key = STATIC_KEY_INIT_FALSE,                \
        .static_call_key = &__SCK__tp_func_##_name,    \
        .static_call_tramp = NULL, \
        .iterator = &__traceiter_##_name,            \
        .regfunc = _reg,                    \
        .unregfunc = _unreg,                    \
        .funcs = NULL    \
    };                    \
    __TRACEPOINT_ENTRY(_name);                    \
    int __nocfi __traceiter_##_name(void *__data, proto)            \
    {                                \
        struct tracepoint_func *it_func_ptr;            \
        void *it_func;                        \
        it_func_ptr = rcu_dereference_raw((&__tracepoint_##_name)->funcs); \
        if (it_func_ptr) {                    \
            do {                        \
                it_func = (it_func_ptr)->func;        \
                __data = (it_func_ptr)->data;        \
                ((void(*)(void *, proto))(it_func))(__data, args); \
            } while ((++it_func_ptr)->func);        \
        }                            \
        return 0;                        \
    }                                \
    extern struct static_call_key __SCK__tp_func_##_name;  \
    extern typeof(__traceiter_##_name) __SCT__tp_func_##_name;         \
    struct static_call_key __SCK__tp_func_##_name = {      \
        .func = __traceiter_##_name,                        \
    }

3. __DECLARE_TRACE 宏展开后就是:

/*
 * include/linux/tracepoint.h
 * 这个宏主要定义了一系列函数集合,常用的有 register_trace_##name、
 * trace_##name##_enabled
 * rcuidle 的还特殊弄了一个函数,还可以注册带有优先级的trace
 */
#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
    extern int __traceiter_##name(data_proto);            \
    extern struct static_call_key __SCK__tp_func_##name;        \
    extern typeof(__traceiter_##name) __SCT__tp_func_##name;    \
    extern struct tracepoint __tracepoint_##name;            \
    static inline void __nocfi trace_##name(proto)                \
    {                                \
        if (static_key_false(&__tracepoint_##name.key))        \
            do {                                \
                struct tracepoint_func *it_func_ptr;            \
                int __maybe_unused __idx = 0;                \
                void *__data;                        \
                                            \
                if (!(cond))                        \
                    return;                     \
                /* keep srcu and sched-rcu usage consistent */        \
                preempt_disable_notrace();                \
                it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs); \
                if (it_func_ptr) {                    \
                    __data = (it_func_ptr)->data;            \
                    __traceiter_##name(data_args);            \
                }                            \
                preempt_enable_notrace();                \
            } while (0)    \
        if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {        \
            rcu_read_lock_sched_notrace();            \
            rcu_dereference_sched(__tracepoint_##name.funcs);\
            rcu_read_unlock_sched_notrace();        \
        }                            \
    }                                \
    static inline void trace_##name##_rcuidle(proto)        \
    {                                \
        if (static_key_false(&__tracepoint_##name.key))     \
            do {                                \
                struct tracepoint_func *it_func_ptr;            \
                int __maybe_unused __idx = 0;                \
                void *__data;                        \
                                            \
                if (!(cond))                        \
                    return;                     \
                                            \
                /* srcu can't be used from NMI */            \
                WARN_ON_ONCE(in_nmi());            \
                                            \
                /* keep srcu and sched-rcu usage consistent */        \
                preempt_disable_notrace();                \
                                            \
                /*                            \
                 * For rcuidle callers, use srcu since sched-rcu    \
                 * doesn't work from the idle path.         \
                 */                         \
                __idx = srcu_read_lock_notrace(&tracepoint_srcu);\
                rcu_irq_enter_irqson();             \
                                            \
                it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs); \
                if (it_func_ptr) {                    \
                    __data = (it_func_ptr)->data;            \
                    __traceiter_##name(data_args);            \
                }                            \
                                            \
                rcu_irq_exit_irqson();                \
                srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
                                            \
                preempt_enable_notrace();                \
            } while (0)    \
    }    \
     static inline int register_trace_##name(void (*probe)(data_proto), void *data)    \
    {                                \
        return tracepoint_probe_register(&__tracepoint_##name, (void *)probe, data);    \
    }                                \
    static inline int register_trace_prio_##name(void (*probe)(data_proto), void *data, int prio) \
    {                                \
        return tracepoint_probe_register_prio(&__tracepoint_##name, (void *)probe, data, prio); \
    }                                \
    static inline int unregister_trace_##name(void (*probe)(data_proto), void *data)    \
    {                                \
        return tracepoint_probe_unregister(&__tracepoint_##name, (void *)probe, data);    \
    }                                \
    static inline void check_trace_callback_type_##name(void (*cb)(data_proto))    \
    {                                \
    }                                \
    static inline bool trace_##name##_enabled(void)                    \
    {                                \
        return static_key_false(&__tracepoint_##name.key);    \
    }

trace_##name(proto) 中判断 __tracepoint_##name.key 的值为真才会调用执行各个钩子函数,在下面路径中会将这个key设置为真。

register_trace_##name() //具体tracepoint的define位置
    tracepoint_probe_register //tracepoint.c
        tracepoint_probe_register_prio //tracepoint.c
            tracepoint_add_func //tracepoint.c
                static_key_enable(&tp->key);

也就是说注册了 hook 才会真,否则为假。 

4. 使用 DECLARE_TRACE 的宏

#define DEFINE_TRACE(name, proto, args)    DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));

//为空
#define TRACE_EVENT_FLAGS(event, flag)

//为空
#define TRACE_EVENT_PERF_PERM(event, expr...)

/*
 * include/linux/tracepoint-defs.h
 * 不建议直接使用,此头文件是包含在最head位置的
 */
#define DECLARE_TRACEPOINT(tp) extern struct tracepoint __tracepoint_##tp

/*
 * 建议使用,它的作用和 trace_##name##_enabled(void) 一致,但是在头文件中
 * 使用是安全的,然而 trace_##name##_enabled(void) 在头文件中是不安全的,应
 * 该是因为不能重复定义一个函数。
 */
#define tracepoint_enabled(tp) static_key_false(&(__tracepoint_##tp).key)

/*
 * include/linux/tracepoint.h
 * 就是上面的一组函数集合,包含register_trace_##name、trace_##name##_enabled 等
 */
#define DECLARE_TRACE(name, proto, args)                \
    __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto),    PARAMS(__data, args))
/*
 * 两个宏之间的区别就是后者 arg4 逻辑与上了 cond 参数,主要是 trace_##name、trace_##name##_rcuidle 两个函数中使用,若是判断 cond 为假,
 * 就直接返回了。
 */
#define DECLARE_TRACE_CONDITION(name, proto, args, cond)        \
    __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), PARAMS(void *__data, proto), PARAMS(__data, args))

/* include/linux/tracepoint.h */
#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)

#define DEFINE_EVENT(template, name, proto, args)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))

#define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))

#define DEFINE_EVENT_PRINT(template, name, proto, args, print)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))

#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond))

#define TRACE_EVENT(name, proto, args, struct, assign, print)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))

#define TRACE_EVENT_FN(name, proto, args, struct, assign, print, reg, unreg)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))

#define TRACE_EVENT_FN_COND(name, proto, args, cond, struct, assign, print, reg, unreg) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond))

#define TRACE_EVENT_CONDITION(name, proto, args, cond, struct, assign, print) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond))

#define TRACE_EVENT_FLAGS(event, flag)

#define TRACE_EVENT_PERF_PERM(event, expr...)

#define DECLARE_EVENT_NOP(name, proto, args)                \
    static inline void trace_##name(proto)                \
    { }                                \
    static inline bool trace_##name##_enabled(void)            \
    {                                \
        return false;                        \
    }

#define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)    DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))

#define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print)

#define DEFINE_EVENT_NOP(template, name, proto, args)    DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))

tracepoint.h 中的定义可能不是最终的,因为有文件中会先执行 #undef XXX,然后重新进行 define。观察可以发现,这些宏主要使用的是 DECLARE_TRACE,对照展开后的函数,显示是不完整的,因为 DEFINE_TRACE 相关的部分没有。因此每个trace应该还存在对 DEFINE_TRACE 进行使用的一部分。两者都存在,一个trace才圆满。


5. 使用 DEFINE_TRACE 的部分

/* include/trace/define_trace.h */
#undef TRACE_EVENT
#define TRACE_EVENT(name, proto, args, tstruct, assign, print)    DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))

#undef TRACE_EVENT_CONDITION
#define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \
    TRACE_EVENT(name, PARAMS(proto), PARAMS(args), PARAMS(tstruct), PARAMS(assign),    PARAMS(print))

#undef TRACE_EVENT_FN
#define TRACE_EVENT_FN(name, proto, args, tstruct, assign, print, reg, unreg)    \
    DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args))

#undef TRACE_EVENT_FN_COND
#define TRACE_EVENT_FN_COND(name, proto, args, cond, tstruct, assign, print, reg, unreg)    \
    DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args))

#undef TRACE_EVENT_NOP
#define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)

#undef DEFINE_EVENT_NOP
#define DEFINE_EVENT_NOP(template, name, proto, args)

#undef DEFINE_EVENT
#define DEFINE_EVENT(template, name, proto, args) DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))

#undef DEFINE_EVENT_FN
#define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg) \
    DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args))

#undef DEFINE_EVENT_PRINT
#define DEFINE_EVENT_PRINT(template, name, proto, args, print)    \
    DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))

#undef DEFINE_EVENT_CONDITION
#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \
    DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))

#undef DECLARE_TRACE
#define DECLARE_TRACE(name, proto, args)    DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))

6. EXPORT_TRACEPOINT_SYMBOL_GPL 和 EXPORT_TRACEPOINT_SYMBOL

导出这些trace符号后,模块中才能在模块中使用

/*
 * include/linux/tracepoint.h
 * 展开后就是
 */
#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)                \
    EXPORT_SYMBOL_GPL(__tracepoint_##name);                \
    EXPORT_SYMBOL_GPL(__traceiter_##name);                \
    EXPORT_SYMBOL_GPL(__SCK__tp_func_##name);

#define EXPORT_TRACEPOINT_SYMBOL(name)                    \
    EXPORT_SYMBOL(__tracepoint_##name);                \
    EXPORT_SYMBOL(__traceiter_##name);                \
    EXPORT_SYMBOL(__SCK__tp_func_##name)

7. 定义一个trace,TRACE_EVENT 各个成员使用的宏

/* include/linux/tracepoint.h */
#define PARAMS(args...) args
#define TP_PROTO(args...)    args
#define TP_ARGS(args...)    args
#define TP_CONDITION(args...)    args
//include/trace/trace_events.h
#define TP_STRUCT__entry(args...) args
#define TP_fast_assign(args...) args
#define TP_printk(fmt, args...) "\"" fmt "\", "  __stringify(args)

include/trace/events/sched.h 文件中定义了大量的CPU调度相关的trace,但是它只include了 linux/tracepoint.h 文件,说明其使用的宏全部都是来自linux/tracepoint.h 文件的,但是 tracepoint.h 中又包含了其它头文件,不排除其它头文件中又包含了其它头文件,比如 include/trace/trace_events.h 。

8. 以 sched_migrate_task 为例来看 TRACE_EVENT

//include/trace/events/sched.h
TRACE_EVENT(sched_migrate_task,

    TP_PROTO(struct task_struct *p, int dest_cpu),

    TP_ARGS(p, dest_cpu),

    TP_STRUCT__entry(
        __array(    char,    comm,    TASK_COMM_LEN    )
        __field(    pid_t,    pid            )
        __field(    int,    prio            )
        __field(    int,    orig_cpu        )
        __field(    int,    dest_cpu        )
        __field(    int,    running            )
    ),

    TP_fast_assign(
        memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
        __entry->pid        = p->pid;
        __entry->prio        = p->prio; /* XXX SCHED_DEADLINE */
        __entry->orig_cpu    = task_cpu(p);
        __entry->dest_cpu    = dest_cpu;
        __entry->running    = (p->state == TASK_RUNNING);
    ),

    TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d running=%d",
          __entry->comm, __entry->pid, __entry->prio,
          __entry->orig_cpu, __entry->dest_cpu,
          __entry->running)
);

include/linux/tracepoint.h 中有注释:__field(pid_t, prev_prid) 等于 pid_t prev_pid; __array(char, prev_comm, TASK_COMM_LEN) 等于 char prev_comm[TASK_COMM_LEN];
声明的 'local variable' 叫做 '__entry',可以在 TP_fast_assign 中使用 __entry->XX 来引用。TP_STRUCT__entry 指定环形缓冲区中的存储格式,也是 /sys/kernel/debug/tracing/events/<*>/format 导出到用户空间的格式。

按照如下宏定义进行展开:

#define TRACE_EVENT(name, proto, args, struct, assign, print)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
#define DECLARE_TRACE(name, proto, args)                \
    __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto),    PARAMS(__data, args))
//直接映射也就是:
#define TRACE_EVENT(name, proto, args, struct, assign, print) \
    __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto),    PARAMS(__data, args))


#define TRACE_EVENT(name, proto, args, struct, assign, print)    DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))
#define DEFINE_TRACE(name, proto, args)        DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
//直接映射也就是:
#define TRACE_EVENT(name, proto, args, struct, assign, print)    DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));

全部展开后为:

#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
    extern int __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu);            \
    extern struct static_call_key __SCK__tp_func_sched_migrate_task;        \
    extern typeof(__traceiter_sched_migrate_task) __SCT__tp_func_sched_migrate_task;    \
    extern struct tracepoint __tracepoint_sched_migrate_task;            \
    static inline void __nocfi trace_sched_migrate_task(struct task_struct *p, int dest_cpu)                \
    {                                \
        if (static_key_false(&__tracepoint_sched_migrate_task.key))        \
            do {                                \
                struct tracepoint_func *it_func_ptr;            \
                int __maybe_unused __idx = 0;                \
                void *__data;                        \
                                            \
                if (!cpu_online(raw_smp_processor_id()))                        \
                    return;                     \
                /* keep srcu and sched-rcu usage consistent */        \
                preempt_disable_notrace();                \
                it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \
                if (it_func_ptr) {                    \
                    __data = (it_func_ptr)->data;            \
                    __traceiter_sched_migrate_task(__data, p, dest_cpu);            \
                }                            \
                preempt_enable_notrace();                \
            } while (0)    \
        if (IS_ENABLED(CONFIG_LOCKDEP) && cpu_online(raw_smp_processor_id())) {        \
            rcu_read_lock_sched_notrace();            \
            rcu_dereference_sched(__tracepoint_sched_migrate_task.funcs);\
            rcu_read_unlock_sched_notrace();        \
        }                            \
    }                                \
    static inline void trace_sched_migrate_task_rcuidle(struct task_struct *p, int dest_cpu)        \
    {                                \
        if (static_key_false(&__tracepoint_sched_migrate_task.key))     \
            do {                                \
                struct tracepoint_func *it_func_ptr;            \
                int __maybe_unused __idx = 0;                \
                void *__data;                        \
                                            \
                if (!cpu_online(raw_smp_processor_id()))                        \
                    return;                     \
                                            \
                /* srcu can't be used from NMI */            \
                WARN_ON_ONCE(in_nmi());            \
                                            \
                /* keep srcu and sched-rcu usage consistent */        \
                preempt_disable_notrace();                \
                                            \
                /*                            \
                 * For rcuidle callers, use srcu since sched-rcu    \
                 * doesn't work from the idle path.         \
                 */                         \
                __idx = srcu_read_lock_notrace(&tracepoint_srcu);\
                rcu_irq_enter_irqson();             \
                                            \
                it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \
                if (it_func_ptr) {                    \
                    __data = (it_func_ptr)->data;            \
                    __traceiter_sched_migrate_task(__data, p, dest_cpu);            \
                }                            \
                                            \
                rcu_irq_exit_irqson();                \
                srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
                                            \
                preempt_enable_notrace();                \
            } while (0)    \
    }    \
     static inline int register_trace_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data)    \
    {                                \
        return tracepoint_probe_register(&__tracepoint_sched_migrate_task, (void *)probe, data);    \
    }                                \
    static inline int register_trace_prio_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data, int prio) \
    {                                \
        return tracepoint_probe_register_prio(&__tracepoint_sched_migrate_task, (void *)probe, data, prio); \
    }                                \
    static inline int unregister_trace_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data)    \
    {                                \
        return tracepoint_probe_unregister(&__tracepoint_sched_migrate_task, (void *)probe, data);    \
    }                                \
    static inline void check_trace_callback_type_sched_migrate_task(void (*cb)(void *__data, struct task_struct *p, int dest_cpu))    \
    {                                \
    }                                \
    static inline bool trace_sched_migrate_task_enabled(void)                    \
    {                                \
        return static_key_false(&__tracepoint_sched_migrate_task.key);    \
    }

#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)        \
        static const char __tpstrtab_sched_migrate_task[]                \
        __section("__tracepoints_strings") = "sched_migrate_task";            \
        extern struct static_call_key __SCK__tp_func_sched_migrate_task;    \
        int __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu);            \
        struct tracepoint __tracepoint_sched_migrate_task    __used __section("__tracepoints") = {    \
            .name = __tpstrtab_sched_migrate_task,             \
            .key = STATIC_KEY_INIT_FALSE,                \
            .static_call_key = &__SCK__tp_func_sched_migrate_task, \
            .static_call_tramp = NULL, \
            .iterator = &__traceiter_sched_migrate_task,            \
            .regfunc = NULL,                    \
            .unregfunc = NULL,                    \
            .funcs = NULL    \
        };                    \
        __TRACEPOINT_ENTRY(sched_migrate_task);                    \
        int __nocfi __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu)            \
        {                                \
            struct tracepoint_func *it_func_ptr;            \
            void *it_func;                        \
            it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \
            if (it_func_ptr) {                    \
                do {                        \
                    it_func = (it_func_ptr)->func;        \
                    __data = (it_func_ptr)->data;        \
                    ((void(*)(void *, struct task_struct *p, int dest_cpu))(it_func))(__data, p, dest_cpu); \
                } while ((++it_func_ptr)->func);        \
            }                            \
            return 0;                        \
        }                                \
        extern struct static_call_key __SCK__tp_func_sched_migrate_task;  \
        extern typeof(__traceiter_sched_migrate_task) __SCT__tp_func_sched_migrate_task;        \
        struct static_call_key __SCK__tp_func_sched_migrate_task = {      \
            .func = __traceiter_sched_migrate_task,                        \
        }

TODO: 其它部分是怎么起作用的?

从展开后的内容可以看到,当调用 trace_sched_migrate_task() 进行trace的时候,会调用 __traceiter_sched_migrate_task() 来遍历 struct tracepoint::funcs 数组中的每一个函数进行trace,也就是说一个trace上可以注册多个hook函数

若使用 EXPORT_TRACEPOINT_SYMBOL_GPL(sched_migrate_task) 导出,上面加黑加粗的 __tracepoint_sched_migrate_task __traceiter_sched_migrate_task __SCK__tp_func_sched_migrate_task 三个符号会被导出来。

9. 一个trace上注册多个hook

既然一个trace上可以注册多个hook,那么一定会涉及到这些hook函数的调用次序的问题,见 tracepoint_probe_register 实现可知,有一个默认优先级 TRACEPOINT_DEFAULT_PRIO=10,注册函数中会传递给 struct tracepoint_func::prio,在插入到 struct tracepoint::funcs 数组时会判断优先级,优先级数值越大,越插在靠前的位置,相同优先级的话,后注册的插在后面。 比如此例子中,注册默认优先级的使用函数 register_trace_sched_migrate_task,自己指定优先级使用函数 register_trace_prio_sched_migrate_task。

int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
{
    return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO);
}
EXPORT_SYMBOL_GPL(tracepoint_probe_register);

二、Google搞的vendor hook

1. hook 的 DEFINE_HOOK_FN 解析后是

//include/trace/hooks/vendor_hooks.h
#define DEFINE_HOOK_FN(_name, _reg, _unreg, proto, args)        \
    static const char __tpstrtab_##_name[]                \
    __section("__tracepoints_strings") = #_name;            \
    extern struct static_call_key __SCK__tp_func_##_name;    \
    int __traceiter_##_name(void *__data, proto);            \
    struct tracepoint __tracepoint_##_name    __used __section("__tracepoints") = {    \
        .name = __tpstrtab_##_name,             \
        .key = STATIC_KEY_INIT_FALSE,                \
        .static_call_key = &__SCK__tp_func_##_name,    \
        .static_call_tramp = NULL,    \
        .iterator = &__traceiter_##_name,            \
        .regfunc = _reg,                    \
        .unregfunc = _unreg,                    \
        .funcs = NULL };                    \
    __TRACEPOINT_ENTRY(_name);                    \
    int __nocfi __traceiter_##_name(void *__data, proto)            \
    {                                \
        struct tracepoint_func *it_func_ptr;            \
        void *it_func;                        \
                                    \
        it_func_ptr = (&__tracepoint_##_name)->funcs; //不同:这里是直接访问的,ftrace是rcu_dereference_raw    \
        it_func = (it_func_ptr)->func; //不同:这里是先获取一个,ftrace中的是先判断it_func_ptr    \
        do {                            \
            __data = (it_func_ptr)->data;            \
            ((void(*)(void *, proto))(it_func))(__data, args); \
            it_func = READ_ONCE((++it_func_ptr)->func); \
        } while (it_func);    \
        return 0;                        \
    }                                   \
    extern struct static_call_key __SCK__tp_func_##_name;  \
    extern typeof(__traceiter_##_name) __SCT__tp_func_##_name;         \
    struct static_call_key __SCK__tp_func_##_name = {      \
        .func = __traceiter_##_name,                        \
    }

注意备注上的一些和ftrace之间的不同点。

2. hook 的 __DECLARE_HOOK 解析后是:

//include/trace/hooks/vendor_hooks.h
#define __DECLARE_HOOK(name, proto, args, cond, data_proto, data_args)    \
    extern int __traceiter_##name(data_proto);            \
    extern struct static_call_key __SCK__tp_func_##name;        \
    extern typeof(__traceiter_##name) __SCT__tp_func_##name;
    extern struct tracepoint __tracepoint_##name;            \
                                                            \
    static inline void __nocfi trace_##name(proto)            \
    {                                \
        if (static_key_false(&__tracepoint_##name.key))     \
            do {                                \
                struct tracepoint_func *it_func_ptr;            \
                void *__data;                        \
                                            \
                if (!(cond))                        \
                    return;                        \
                                            \
                it_func_ptr = (&__tracepoint_##name)->funcs;        \
                if (it_func_ptr) {                    \
                    __data = (it_func_ptr)->data;            \
                    __traceiter_##name(data_args);     \
                }                            \
            } while (0)    \
    }                                \
    static inline bool trace_##name##_enabled(void)        \
    {                                \
        return static_key_false(&__tracepoint_##name.key);    \
    }                                \
    static inline int register_trace_##name(void (*probe)(data_proto), void *data)    \
    {                                \
        return android_rvh_probe_register(&__tracepoint_##name, (void *)probe, data);    \
    }                                \
    /* vendor hooks cannot be unregistered */            \

相比与ftrace,hook的trace 删除了 trace_##name##_rcuidle()、register_trace_prio_##name()、unregister_trace_##name()、check_trace_callback_type_##name()

3. 其它宏

#undef DECLARE_RESTRICTED_HOOK
#define DECLARE_RESTRICTED_HOOK(name, proto, args, cond) \
    DEFINE_HOOK_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args))

#undef DECLARE_RESTRICTED_HOOK
#define DECLARE_RESTRICTED_HOOK(name, proto, args, cond) \
    __DECLARE_HOOK(name, PARAMS(proto), PARAMS(args), cond, PARAMS(void *__data, proto),PARAMS(__data, args))

4. 总结

Google的vendor hook在ftrace的基础上做了改动,由于Google的Hook宏删除了ftrace中的 register_trace_prio_##name(),因此不能注册带有优先级的钩子函数了。

三、实验

1. 对5.10内核中的 util_est_update 中的trace添加hook

static inline void util_est_update(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) //fair.c
{
    ...
    //Google 搞的 vendor hook
    trace_android_rvh_util_est_update(cfs_rq, p, task_sleep, &ret);
    if (ret)
        return;
    ...
    //普通的ftrace
    trace_sched_util_est_se_tp(&p->se);
}

这两个trace符号Google已经导出来了:

EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_util_est_update); //vendor_hooks.c
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp); //core.c

2. 实验代码

/* 1. 包含头文件 */
#include <trace/events/sched.h>


/* 2. 实现handler钩子函数,类型要与 trace##name() 的类型相同 */
//util_est_update() //fair.c
void android_rvh_util_est_update_handler(void *data, struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep, int *ret_o)
{
    struct util_est *se_ue = &p->se.avg.util_est;
    struct util_est *rq_ue = &cfs_rq->avg.util_est;
    trace_printk("start: first_register: se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d, task_sleep=%d\n",
            se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma, task_sleep);
    *ret_o = 0;
}

void android_rvh_util_est_update_handler_second(void *data, struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep, int *ret_o)
{
    struct util_est *se_ue = &p->se.avg.util_est;
    struct util_est *rq_ue = &cfs_rq->avg.util_est;
    trace_printk("start: second_register: se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d, task_sleep=%d\n",
            se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma, task_sleep);
    *ret_o = 0;
}

//只改变这一个debug优先级, 默认优先级是10
void sched_util_est_se_tp_handler(void *data, struct sched_entity *se)
{
    static int count = 0;
    int prio = 10;

    if (entity_is_task(se)) {
        struct task_struct *p = container_of(se, struct task_struct, se);
        struct rq *rq = cpu_rq(task_cpu(p));
        struct cfs_rq *cfs_rq = &rq->cfs;
        struct util_est *se_ue = &p->se.avg.util_est;
        struct util_est *rq_ue = &cfs_rq->avg.util_est;
        trace_printk("end: count=%d, prio=%d, se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d\n",
                count++, prio, se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma);
    } else {
        trace_printk("end: se is not task\n");
    }
}

void sched_util_est_se_tp_handler_prio_12(void *data, struct sched_entity *se)
{
    static int count = 0;
    int prio = 12;

    if (entity_is_task(se)) {
        struct task_struct *p = container_of(se, struct task_struct, se);
        struct rq *rq = cpu_rq(task_cpu(p));
        struct cfs_rq *cfs_rq = &rq->cfs;
        struct util_est *se_ue = &p->se.avg.util_est;
        struct util_est *rq_ue = &cfs_rq->avg.util_est;
        trace_printk("end: count=%d, prio=%d, se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d\n",
                count++, prio, se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma);
    } else {
        trace_printk("end: se is not task\n");
    }
}


/* 3. 注册handler */
//common register
register_trace_android_rvh_util_est_update(android_rvh_util_est_update_handler, NULL);
register_trace_sched_util_est_se_tp(sched_util_est_se_tp_handler, NULL);
//google vendor couldn't use prio, because not defined.
register_trace_android_rvh_util_est_update(android_rvh_util_est_update_handler_second, NULL);
//ftrace register with prio.
register_trace_prio_sched_util_est_se_tp(sched_util_est_se_tp_handler_prio_12, NULL, 12);

3. 实验结果,打印的前后关系:

# echo 1 > tracing_on
# cat trace_pipe
<...>-338     [005] d..3    32.158404: sched_util_est_se_tp_handler_prio_12: end: count=28494, prio=12, se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0
<...>-338     [005] d..3    32.158404: sched_util_est_se_tp_handler: end: count=28493, prio=10, se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0

<...>-338     [005] d..2    32.158410: android_rvh_util_est_update_handler: start: first_register: se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0, task_sleep=1
<...>-338     [005] d..2    32.158410: android_rvh_util_est_update_handler_second: start: second_register: se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0, task_sleep=1

普通ftrace,注册时指定的优先级数值越大,越先调用。vendor hook 没有带有优先级注册的钩子函数,先注册的钩子函数调用在前,后注册的钩子函数调用在后。

看代码实现,就算是不执行 “echo 1 > tracing_on” 这些钩子函数应该也会被调用执行,只不过不会打印出来。

4. 另一种注册trace hook的方法

struct tracepoints_table {
    const char *name;
    void *func;
    struct tracepoint *tp;
    bool registered;
};

static struct tracepoints_table g_tracepoints_table[] = {
    {.name = "android_rvh_util_est_update", .func = android_rvh_util_est_update_handler},
    {.name = "sched_util_est_se_tp", .func = sched_util_est_se_tp_handler},
};

static void lookup_tracepoints(struct tracepoint *tp, void *ignore)
{
    int i;

    for (i = 0; i < ARRAY_SIZE(g_tracepoints_table); i++) {
        if (!strcmp(g_tracepoints_table[i].name, tp->name))
            g_tracepoints_table[i].tp = tp;
    }
}

static void register_tracepoints_table(void)
{
    int i, ret;
    struct tracepoints_table *tt;

    for_each_kernel_tracepoint(lookup_tracepoints, NULL); //找到匹配的tracepoint结构
    for (i = 0; i < ARRAY_SIZE(g_tracepoints_table); i++) {
        tt = &g_tracepoints_table[i];
        if (tt->tp) {
            ret = tracepoint_probe_register(tt->tp, tt->func,  NULL);
            if (ret) {
                pr_info("couldn't activate tracepoint %pf\n", tt->func);
                tracepoint_cleanup(i);
            }
            tt->registered = true;
        }
    }
}

void tracepoint_cleanup(int index)
{
    int i;
    struct tracepoints_table *tt;

    for (i = 0; i < index; i++) { 
        tracepoints_table *tt = &g_tracepoints_table[i];
        if (tt->registered) {
            tracepoint_probe_unregister(tt->tp, tt->func, NULL);
            tt->registered = false;
        }
    }
}

可见这种注册需要便利 tracepoint 区域对name进行对比,效率比较低,优点是涉及的文件比较少。

原文地址:https://www.cnblogs.com/hellokitty2/p/15522289.html