Flower Classifier

7.TC routine: htb

    qdisc_enqueue_root(sch_generic.h) -> qdisc_enqueue(sch_generic.h) -> htb_enqueue(sch_htb.c) ->htb_classify(sch_htb.c) -> flow_classify(cls_flow.c) -> tcf_exts_exec(pkt_cls.h) ->tcf_action_exec(act_api.c) -> tcf_act_police(act_police.c)

mellanox   flow_indr_block_cb_register

drivers/net/ethernet/mellanox/mlx5/core/en_rep.c:837:   err = __flow_indr_block_cb_register(netdev, rpriv,
include/net/flow_offload.h:396:int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
include/net/flow_offload.h:404:int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,

 Tc Flower allows to specify three operating modes: skip_sw, skip_hw and not specified.

In the skip_sw mode TC Flower tries to offload the rule to the NIC driver. If this operation fails the rule will not be added. I

n the skip_hw mode TC Flower ignores completely the underling hardware and sets the rules in software.

If we do not specify the operating mode, TC Flower first tries to put the rule in hardware and if the operation fails tries to allocate it in software.

               if (f && !tc_skip_sw(f->flags)) {   //硬件offload不调用
                        *res = f->res;
                        return tcf_exts_exec(skb, &f->exts, res);
                }
static inline bool tc_skip_hw(u32 flags)
{
        return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false;
}

static inline bool tc_skip_sw(u32 flags)
{
        return (flags & TCA_CLS_FLAGS_SKIP_SW) ? true : false;
}
net/sched/cls_bpf.c:422:        ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
net/sched/cls_flower.c没有调用tcf_exts_validate
 tcf_exts_validate
     tcf_action_init
           tcf_action_init_1

 

 

 

 

 

 

 

 

 

 

 

 

 

 

TC_SETUP_BLOCK TC_SETUP_CLSFLOWER

 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
 tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower,
                              skip_sw, &f->flags, &f->in_hw_count, rtnl_held);

struct tcf_exts

struct tcf_exts {
#ifdef CONFIG_NET_CLS_ACT
    __u32    type; /* for backward compat(TCA_OLD_COMPAT) */
    int nr_actions;
    struct tc_action **actions;
#endif
    /* Map to export classifier specific extension TLV types to the
     * generic extensions API. Unsupported extensions must be set to 0.
     */
    int action;
    int police;
};
struct tc_action {
// 私有数据
 void   *priv;
// 操作结构
 struct tc_action_ops *ops;
// 类型
 __u32   type; /* for backward compat(TCA_OLD_COMPAT) */
// 阶数
 __u32   order;
// 动作链表下一项
 struct tc_action *next;
};
#define TCA_CAP_NONE 0
// action操作结构, 实际就是定义目标操作, 通常每个匹配操作都由一个静态tcf_action_ops
// 结构定义, 作为一个内核模块, 初始化事登记系统的链表
struct tc_action_ops {
// 链表中的下一项
 struct tc_action_ops *next;
 struct tcf_hashinfo *hinfo;
// 名称
 char    kind[IFNAMSIZ];
 __u32   type; /* TBD to match kind */
 __u32  capab;  /* capabilities includes 4 bit version */
 struct module  *owner;
// 动作
 int     (*act)(struct sk_buff *, struct tc_action *, struct tcf_result *);
// 获取统计参数
 int     (*get_stats)(struct sk_buff *, struct tc_action *);
// 输出
 int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
// 清除
 int     (*cleanup)(struct tc_action *, int bind);
// 查找
 int     (*lookup)(struct tc_action *, u32);
// 初始化
 int     (*init)(struct rtattr *, struct rtattr *, struct tc_action *, int , int);
// 遍历
 int     (*walk)(struct sk_buff *, struct netlink_callback *, int, struct tc_action *);
};
/*
   2 * net/sched/cls_flower.c               Flower classifier
   3 *
   4 * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 */
  11
  12#include <linux/kernel.h>
  13#include <linux/init.h>
  14#include <linux/module.h>
  15#include <linux/rhashtable.h>
  16
  17#include <linux/if_ether.h>
  18#include <linux/in6.h>
  19#include <linux/ip.h>
  20
  21#include <net/sch_generic.h>
  22#include <net/pkt_cls.h>
  23#include <net/ip.h>
  24#include <net/flow_dissector.h>
  25
  26struct fl_flow_key {
  27        int     indev_ifindex;
  28        struct flow_dissector_key_control control;
  29        struct flow_dissector_key_basic basic;
  30        struct flow_dissector_key_eth_addrs eth;
  31        struct flow_dissector_key_addrs ipaddrs;
  32        union {
  33                struct flow_dissector_key_ipv4_addrs ipv4;
  34                struct flow_dissector_key_ipv6_addrs ipv6;
  35        };
  36        struct flow_dissector_key_ports tp;
  37} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
  38
  39struct fl_flow_mask_range {
  40        unsigned short int start;
  41        unsigned short int end;
  42};
  43
  44struct fl_flow_mask {
  45        struct fl_flow_key key;
  46        struct fl_flow_mask_range range;
  47        struct rcu_head rcu;
  48};
  49
  50struct cls_fl_head {
  51        struct rhashtable ht;
  52        struct fl_flow_mask mask;
  53        struct flow_dissector dissector;
  54        u32 hgen;
  55        bool mask_assigned;
  56        struct list_head filters;
  57        struct rhashtable_params ht_params;
  58        struct rcu_head rcu;
  59};
  60
  61struct cls_fl_filter {
  62        struct rhash_head ht_node;
  63        struct fl_flow_key mkey;
  64        struct tcf_exts exts;
  65        struct tcf_result res;
  66        struct fl_flow_key key;
  67        struct list_head list;
  68        u32 handle;
  69        u32 flags;
  70        struct rcu_head rcu;
  71};
  72
  73static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
  74{
  75        return mask->range.end - mask->range.start;
  76}
  77
  78static void fl_mask_update_range(struct fl_flow_mask *mask)
  79{
  80        const u8 *bytes = (const u8 *) &mask->key;
  81        size_t size = sizeof(mask->key);
  82        size_t i, first = 0, last = size - 1;
  83
  84        for (i = 0; i < sizeof(mask->key); i++) {
  85                if (bytes[i]) {
  86                        if (!first && i)
  87                                first = i;
  88                        last = i;
  89                }
  90        }
  91        mask->range.start = rounddown(first, sizeof(long));
  92        mask->range.end = roundup(last + 1, sizeof(long));
  93}
  94
  95static void *fl_key_get_start(struct fl_flow_key *key,
  96                              const struct fl_flow_mask *mask)
  97{
  98        return (u8 *) key + mask->range.start;
  99}
 100
 101static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
 102                              struct fl_flow_mask *mask)
 103{
 104        const long *lkey = fl_key_get_start(key, mask);
 105        const long *lmask = fl_key_get_start(&mask->key, mask);
 106        long *lmkey = fl_key_get_start(mkey, mask);
 107        int i;
 108
 109        for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
 110                *lmkey++ = *lkey++ & *lmask++;
 111}
 112
 113static void fl_clear_masked_range(struct fl_flow_key *key,
 114                                  struct fl_flow_mask *mask)
 115{
 116        memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
 117}
 118

 tcf_exts_exec

  • TCA_CLS_FLAGS_SKIP_HW:只在软件(系统内核TC模块)添加规则,不在硬件添加。如果规则不能添加则报错。
  • TCA_CLS_FLAGS_SKIP_SW:只在硬件(规则挂载的网卡)添加规则,不在软件添加。如果规则不能添加则报错。
  • 默认(不带标志位):尝试同时在硬件和软件下载规则,如果规则不能在软件添加则报错。

通过TC命令查看规则,如果规则已经卸载到硬件了,可以看到 in_hw标志位。


 119static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 120                       struct tcf_result *res)
 121{
 122        struct cls_fl_head *head = rcu_dereference_bh(tp->root);
 123        struct cls_fl_filter *f;
 124        struct fl_flow_key skb_key;
 125        struct fl_flow_key skb_mkey;
 126
 127        if (!atomic_read(&head->ht.nelems))
 128                return -1;
 129
 130        fl_clear_masked_range(&skb_key, &head->mask);
 131        skb_key.indev_ifindex = skb->skb_iif;
 132        /* skb_flow_dissect() does not set n_proto in case an unknown protocol,
 133         * so do it rather here.
 134         */
 135        skb_key.basic.n_proto = skb->protocol;
 136        skb_flow_dissect(skb, &head->dissector, &skb_key, 0);
 137
 138        fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
 139
 140        f = rhashtable_lookup_fast(&head->ht,
 141                                   fl_key_get_start(&skb_mkey, &head->mask),
 142                                   head->ht_params);
 143        if (f && !tc_skip_sw(f->flags)) {
 144                *res = f->res;
 145                return tcf_exts_exec(skb, &f->exts, res);
 146        }
 147        return -1;
 148}

tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
           struct tcf_result *res)
{
    if (exts->nr_actions)
        return tcf_action_exec(skb, exts->actions, exts->nr_actions,
                       res);
    return 0;
}

int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
            int nr_actions, struct tcf_result *res)
{
    int ret = -1, i;

    if (skb->tc_verd & TC_NCLS) {
        skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
        ret = TC_ACT_OK;
        goto exec_done;
    }
    for (i = 0; i < nr_actions; i++) {
        const struct tc_action *a = actions[i];

repeat:
        ret = a->ops->act(skb, a, res);
        if (ret == TC_ACT_REPEAT)
            goto repeat;    /* we need a ttl - JHS */
        if (ret != TC_ACT_PIPE)
            goto exec_done;
    }
exec_done:
    return ret;
}
net/sched/cls_flower.c  初始化

tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);

General informations

The Linux kernel configuration item CONFIG_MLX5_CLS_ACT:

 前动作为policce 应的回调为tcf_act_police,下面单独分析

tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
net/sched/cls_flow.c:441:       err = tcf_exts_init(&fnew->exts, net, TCA_FLOW_ACT, TCA_FLOW_POLICE);
et/sched/cls_flower.c:1577:    err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
                                int action, int police)
{
#ifdef CONFIG_NET_CLS_ACT
        exts->type = 0;
        exts->nr_actions = 0; // 不执行tcf_action_exec
        exts->net = net;
        exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
                                GFP_KERNEL);
        if (!exts->actions)
                return -ENOMEM;
#endif
        exts->action = action;
        exts->police = police;
        return 0;
}

fl_classify

 tcf_classify -->  fl_classify

static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
                       struct tcf_result *res)
{
        struct cls_fl_head *head = rcu_dereference_bh(tp->root);
        struct fl_flow_key skb_mkey;
        struct fl_flow_key skb_key;
        struct fl_flow_mask *mask;
        struct cls_fl_filter *f;

        list_for_each_entry_rcu(mask, &head->masks, list) {
                flow_dissector_init_keys(&skb_key.control, &skb_key.basic);
                fl_clear_masked_range(&skb_key, mask);

                skb_flow_dissect_meta(skb, &mask->dissector, &skb_key);
                /* skb_flow_dissect() does not set n_proto in case an unknown
                 * protocol, so do it rather here.
                 */
                skb_key.basic.n_proto = skb_protocol(skb, false);
                skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key);
                skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
                                    fl_ct_info_to_flower_map,
                                    ARRAY_SIZE(fl_ct_info_to_flower_map));
                skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);

                fl_set_masked_key(&skb_mkey, &skb_key, mask);

                f = fl_lookup(mask, &skb_mkey, &skb_key);
                if (f && !tc_skip_sw(f->flags)) {
                        *res = f->res;
                        return tcf_exts_exec(skb, &f->exts, res);
                }
        }
        return -1;
}
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
                                          u32 prio, struct tcf_chain *chain,
                                          bool rtnl_held,
                                          struct netlink_ext_ack *extack)
{
        struct tcf_proto *tp;
        int err;

        tp = kzalloc(sizeof(*tp), GFP_KERNEL);
        if (!tp)
                return ERR_PTR(-ENOBUFS);

        tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
        if (IS_ERR(tp->ops)) {
                err = PTR_ERR(tp->ops);
                goto errout;
        }
        tp->classify = tp->ops->classify;
        tp->protocol = protocol;
        tp->prio = prio;
        tp->chain = chain;
        spin_lock_init(&tp->lock);
        refcount_set(&tp->refcnt, 1);

        err = tp->ops->init(tp);
        if (err) {
                module_put(tp->ops->owner);
                goto errout;
        }
        return tp;

errout:
        kfree(tp);
        return ERR_PTR(err);
}



149 150static int fl_init(struct tcf_proto *tp) 151{ 152 struct cls_fl_head *head; 153 154 head = kzalloc(sizeof(*head), GFP_KERNEL); 155 if (!head) 156 return -ENOBUFS; 157 158 INIT_LIST_HEAD_RCU(&head->filters); 159 rcu_assign_pointer(tp->root, head); 160 161 return 0; 162} 163 164static void fl_destroy_filter(struct rcu_head *head) 165{ 166 struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); 167 168 tcf_exts_destroy(&f->exts); 169 kfree(f); 170} 171 172static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie) 173{ 174 struct net_device *dev = tp->q->dev_queue->dev; 175 struct tc_cls_flower_offload offload = {0}; 176 struct tc_to_netdev tc; 177 178 if (!tc_should_offload(dev, tp, 0)) 179 return; 180 181 offload.command = TC_CLSFLOWER_DESTROY; 182 offload.cookie = cookie; 183 184 tc.type = TC_SETUP_CLSFLOWER; 185 tc.cls_flower = &offload; 186 187 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); 188} 189 190static int fl_hw_replace_filter(struct tcf_proto *tp, 191 struct flow_dissector *dissector, 192 struct fl_flow_key *mask, 193 struct fl_flow_key *key, 194 struct tcf_exts *actions, 195 unsigned long cookie, u32 flags) 196{ 197 struct net_device *dev = tp->q->dev_queue->dev; 198 struct tc_cls_flower_offload offload = {0}; 199 struct tc_to_netdev tc; 200 int err; 201 202 if (!tc_should_offload(dev, tp, flags)) 203 return tc_skip_sw(flags) ? -EINVAL : 0; 204 205 offload.command = TC_CLSFLOWER_REPLACE; 206 offload.cookie = cookie; 207 offload.dissector = dissector; 208 offload.mask = mask; 209 offload.key = key; 210 offload.exts = actions; 211 212 tc.type = TC_SETUP_CLSFLOWER; 213 tc.cls_flower = &offload; 214 215 err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); 216 217 if (tc_skip_sw(flags)) 218 return err; 219 220 return 0; 221} 222 223static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) 224{ 225 struct net_device *dev = tp->q->dev_queue->dev; 226 struct tc_cls_flower_offload offload = {0}; 227 struct tc_to_netdev tc; 228 229 if (!tc_should_offload(dev, tp, 0)) 230 return; 231 232 offload.command = TC_CLSFLOWER_STATS; 233 offload.cookie = (unsigned long)f; 234 offload.exts = &f->exts; 235 236 tc.type = TC_SETUP_CLSFLOWER; 237 tc.cls_flower = &offload; 238 239 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); 240} 241 242static bool fl_destroy(struct tcf_proto *tp, bool force) 243{ 244 struct cls_fl_head *head = rtnl_dereference(tp->root); 245 struct cls_fl_filter *f, *next; 246 247 if (!force && !list_empty(&head->filters)) 248 return false; 249 250 list_for_each_entry_safe(f, next, &head->filters, list) { 251 fl_hw_destroy_filter(tp, (unsigned long)f); 252 list_del_rcu(&f->list); 253 call_rcu(&f->rcu, fl_destroy_filter); 254 } 255 RCU_INIT_POINTER(tp->root, NULL); 256 if (head->mask_assigned) 257 rhashtable_destroy(&head->ht); 258 kfree_rcu(head, rcu); 259 return true; 260} 261 262static unsigned long fl_get(struct tcf_proto *tp, u32 handle) 263{ 264 struct cls_fl_head *head = rtnl_dereference(tp->root); 265 struct cls_fl_filter *f; 266 267 list_for_each_entry(f, &head->filters, list) 268 if (f->handle == handle) 269 return (unsigned long) f; 270 return 0; 271} 272 273static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { 274 [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC }, 275 [TCA_FLOWER_CLASSID] = { .type = NLA_U32 }, 276 [TCA_FLOWER_INDEV] = { .type = NLA_STRING, 277 .len = IFNAMSIZ }, 278 [TCA_FLOWER_KEY_ETH_DST] = { .len = ETH_ALEN }, 279 [TCA_FLOWER_KEY_ETH_DST_MASK] = { .len = ETH_ALEN }, 280 [TCA_FLOWER_KEY_ETH_SRC] = { .len = ETH_ALEN }, 281 [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .len = ETH_ALEN }, 282 [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NLA_U16 }, 283 [TCA_FLOWER_KEY_IP_PROTO] = { .type = NLA_U8 }, 284 [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NLA_U32 }, 285 [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NLA_U32 }, 286 [TCA_FLOWER_KEY_IPV4_DST] = { .type = NLA_U32 }, 287 [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NLA_U32 }, 288 [TCA_FLOWER_KEY_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, 289 [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) }, 290 [TCA_FLOWER_KEY_IPV6_DST] = { .len = sizeof(struct in6_addr) }, 291 [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) }, 292 [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 }, 293 [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 }, 294 [TCA_FLOWER_KEY_UDP_SRC] = { .type = NLA_U16 }, 295 [TCA_FLOWER_KEY_UDP_DST] = { .type = NLA_U16 }, 296}; 297 298static void fl_set_key_val(struct nlattr **tb, 299 void *val, int val_type, 300 void *mask, int mask_type, int len) 301{ 302 if (!tb[val_type]) 303 return; 304 memcpy(val, nla_data(tb[val_type]), len); 305 if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type]) 306 memset(mask, 0xff, len); 307 else 308 memcpy(mask, nla_data(tb[mask_type]), len); 309} 310 311static int fl_set_key(struct net *net, struct nlattr **tb, 312 struct fl_flow_key *key, struct fl_flow_key *mask) 313{ 314#ifdef CONFIG_NET_CLS_IND 315 if (tb[TCA_FLOWER_INDEV]) { 316 int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]); 317 if (err < 0) 318 return err; 319 key->indev_ifindex = err; 320 mask->indev_ifindex = 0xffffffff; 321 } 322#endif 323 324 fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, 325 mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, 326 sizeof(key->eth.dst)); 327 fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC, 328 mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, 329 sizeof(key->eth.src)); 330 331 fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE, 332 &mask->basic.n_proto, TCA_FLOWER_UNSPEC, 333 sizeof(key->basic.n_proto)); 334 335 if (key->basic.n_proto == htons(ETH_P_IP) || 336 key->basic.n_proto == htons(ETH_P_IPV6)) { 337 fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, 338 &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, 339 sizeof(key->basic.ip_proto)); 340 } 341 342 if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) { 343 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 344 fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, 345 &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, 346 sizeof(key->ipv4.src)); 347 fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST, 348 &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK, 349 sizeof(key->ipv4.dst)); 350 } else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) { 351 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 352 fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC, 353 &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK, 354 sizeof(key->ipv6.src)); 355 fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST, 356 &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK, 357 sizeof(key->ipv6.dst)); 358 } 359 360 if (key->basic.ip_proto == IPPROTO_TCP) { 361 fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, 362 &mask->tp.src, TCA_FLOWER_UNSPEC, 363 sizeof(key->tp.src)); 364 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, 365 &mask->tp.dst, TCA_FLOWER_UNSPEC, 366 sizeof(key->tp.dst)); 367 } else if (key->basic.ip_proto == IPPROTO_UDP) { 368 fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, 369 &mask->tp.src, TCA_FLOWER_UNSPEC, 370 sizeof(key->tp.src)); 371 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, 372 &mask->tp.dst, TCA_FLOWER_UNSPEC, 373 sizeof(key->tp.dst)); 374 } 375 376 return 0; 377} 378 379static bool fl_mask_eq(struct fl_flow_mask *mask1, 380 struct fl_flow_mask *mask2) 381{ 382 const long *lmask1 = fl_key_get_start(&mask1->key, mask1); 383 const long *lmask2 = fl_key_get_start(&mask2->key, mask2); 384 385 return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) && 386 !memcmp(lmask1, lmask2, fl_mask_range(mask1)); 387} 388 389static const struct rhashtable_params fl_ht_params = { 390 .key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */ 391 .head_offset = offsetof(struct cls_fl_filter, ht_node), 392 .automatic_shrinking = true, 393}; 394 395static int fl_init_hashtable(struct cls_fl_head *head, 396 struct fl_flow_mask *mask) 397{ 398 head->ht_params = fl_ht_params; 399 head->ht_params.key_len = fl_mask_range(mask); 400 head->ht_params.key_offset += mask->range.start; 401 402 return rhashtable_init(&head->ht, &head->ht_params); 403} 404 405#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member) 406#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member)) 407#define FL_KEY_MEMBER_END_OFFSET(member) 408 (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member)) 409 410#define FL_KEY_IN_RANGE(mask, member) 411 (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && 412 FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start) 413 414#define FL_KEY_SET(keys, cnt, id, member) 415 do { 416 keys[cnt].key_id = id; 417 keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member); 418 cnt++; 419 } while(0); 420 421#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) 422 do { 423 if (FL_KEY_IN_RANGE(mask, member)) 424 FL_KEY_SET(keys, cnt, id, member); 425 } while(0); 426 427static void fl_init_dissector(struct cls_fl_head *head, 428 struct fl_flow_mask *mask) 429{ 430 struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX]; 431 size_t cnt = 0; 432 433 FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control); 434 FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic); 435 FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, 436 FLOW_DISSECTOR_KEY_ETH_ADDRS, eth); 437 FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, 438 FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); 439 FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, 440 FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); 441 FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, 442 FLOW_DISSECTOR_KEY_PORTS, tp); 443 444 skb_flow_dissector_init(&head->dissector, keys, cnt); 445} 446 447static int fl_check_assign_mask(struct cls_fl_head *head, 448 struct fl_flow_mask *mask) 449{ 450 int err; 451 452 if (head->mask_assigned) { 453 if (!fl_mask_eq(&head->mask, mask)) 454 return -EINVAL; 455 else 456 return 0; 457 } 458 459 /* Mask is not assigned yet. So assign it and init hashtable 460 * according to that. 461 */ 462 err = fl_init_hashtable(head, mask); 463 if (err) 464 return err; 465 memcpy(&head->mask, mask, sizeof(head->mask)); 466 head->mask_assigned = true; 467 468 fl_init_dissector(head, mask); 469 470 return 0; 471} 472 473static int fl_set_parms(struct net *net, struct tcf_proto *tp, 474 struct cls_fl_filter *f, struct fl_flow_mask *mask, 475 unsigned long base, struct nlattr **tb, 476 struct nlattr *est, bool ovr) 477{ 478 struct tcf_exts e; 479 int err; 480 481 tcf_exts_init(&e, TCA_FLOWER_ACT, 0); 482 err = tcf_exts_validate(net, tp, tb, est, &e, ovr); 483 if (err < 0) 484 return err; 485 486 if (tb[TCA_FLOWER_CLASSID]) { 487 f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); 488 tcf_bind_filter(tp, &f->res, base); 489 } 490 491 err = fl_set_key(net, tb, &f->key, &mask->key); 492 if (err) 493 goto errout; 494 495 fl_mask_update_range(mask); 496 fl_set_masked_key(&f->mkey, &f->key, mask); 497 498 tcf_exts_change(tp, &f->exts, &e); 499 500 return 0; 501errout: 502 tcf_exts_destroy(&e); 503 return err; 504} 505 506static u32 fl_grab_new_handle(struct tcf_proto *tp, 507 struct cls_fl_head *head) 508{ 509 unsigned int i = 0x80000000; 510 u32 handle; 511 512 do { 513 if (++head->hgen == 0x7FFFFFFF) 514 head->hgen = 1; 515 } while (--i > 0 && fl_get(tp, head->hgen)); 516 517 if (unlikely(i == 0)) { 518 pr_err("Insufficient number of handles "); 519 handle = 0; 520 } else { 521 handle = head->hgen; 522 } 523 524 return handle; 525} 526 527static int fl_change(struct net *net, struct sk_buff *in_skb, 528 struct tcf_proto *tp, unsigned long base, 529 u32 handle, struct nlattr **tca, 530 unsigned long *arg, bool ovr) 531{ 532 struct cls_fl_head *head = rtnl_dereference(tp->root); 533 struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg; 534 struct cls_fl_filter *fnew; 535 struct nlattr *tb[TCA_FLOWER_MAX + 1]; 536 struct fl_flow_mask mask = {}; 537 int err; 538 539 if (!tca[TCA_OPTIONS]) 540 return -EINVAL; 541 542 err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy); 543 if (err < 0) 544 return err; 545 546 if (fold && handle && fold->handle != handle) 547 return -EINVAL; 548 549 fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); 550 if (!fnew) 551 return -ENOBUFS; 552 553 tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); 554 555 if (!handle) { 556 handle = fl_grab_new_handle(tp, head); 557 if (!handle) { 558 err = -EINVAL; 559 goto errout; 560 } 561 } 562 fnew->handle = handle; 563 564 if (tb[TCA_FLOWER_FLAGS]) { 565 fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); 566 567 if (!tc_flags_valid(fnew->flags)) { 568 err = -EINVAL; 569 goto errout; 570 } 571 } 572 573 err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); 574 if (err) 575 goto errout; 576 577 err = fl_check_assign_mask(head, &mask); 578 if (err) 579 goto errout; 580 581 if (!tc_skip_sw(fnew->flags)) { 582 err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, 583 head->ht_params); 584 if (err) 585 goto errout; 586 } 587 //调用硬件offload 588 err = fl_hw_replace_filter(tp, 589 &head->dissector, 590 &mask.key, 591 &fnew->key, 592 &fnew->exts, 593 (unsigned long)fnew, 594 fnew->flags); 595 if (err) 596 goto errout; 597 598 if (fold) { 599 rhashtable_remove_fast(&head->ht, &fold->ht_node, 600 head->ht_params); 601 fl_hw_destroy_filter(tp, (unsigned long)fold); 602 } 603 604 *arg = (unsigned long) fnew; 605 606 if (fold) { 607 list_replace_rcu(&fold->list, &fnew->list); 608 tcf_unbind_filter(tp, &fold->res); 609 call_rcu(&fold->rcu, fl_destroy_filter); 610 } else { 611 list_add_tail_rcu(&fnew->list, &head->filters); 612 } 613 614 return 0; 615 616errout: 617 kfree(fnew); 618 return err; 619} 620 621static int fl_delete(struct tcf_proto *tp, unsigned long arg) 622{ 623 struct cls_fl_head *head = rtnl_dereference(tp->root); 624 struct cls_fl_filter *f = (struct cls_fl_filter *) arg; 625 626 rhashtable_remove_fast(&head->ht, &f->ht_node, 627 head->ht_params); 628 list_del_rcu(&f->list); 629 fl_hw_destroy_filter(tp, (unsigned long)f); 630 tcf_unbind_filter(tp, &f->res); 631 call_rcu(&f->rcu, fl_destroy_filter); 632 return 0; 633} 634 635static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) 636{ 637 struct cls_fl_head *head = rtnl_dereference(tp->root); 638 struct cls_fl_filter *f; 639 640 list_for_each_entry_rcu(f, &head->filters, list) { 641 if (arg->count < arg->skip) 642 goto skip; 643 if (arg->fn(tp, (unsigned long) f, arg) < 0) { 644 arg->stop = 1; 645 break; 646 } 647skip: 648 arg->count++; 649 } 650} 651 652static int fl_dump_key_val(struct sk_buff *skb, 653 void *val, int val_type, 654 void *mask, int mask_type, int len) 655{ 656 int err; 657 658 if (!memchr_inv(mask, 0, len)) 659 return 0; 660 err = nla_put(skb, val_type, len, val); 661 if (err) 662 return err; 663 if (mask_type != TCA_FLOWER_UNSPEC) { 664 err = nla_put(skb, mask_type, len, mask); 665 if (err) 666 return err; 667 } 668 return 0; 669} 670 671static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, 672 struct sk_buff *skb, struct tcmsg *t) 673{ 674 struct cls_fl_head *head = rtnl_dereference(tp->root); 675 struct cls_fl_filter *f = (struct cls_fl_filter *) fh; 676 struct nlattr *nest; 677 struct fl_flow_key *key, *mask; 678 679 if (!f) 680 return skb->len; 681 682 t->tcm_handle = f->handle; 683 684 nest = nla_nest_start(skb, TCA_OPTIONS); 685 if (!nest) 686 goto nla_put_failure; 687 688 if (f->res.classid && 689 nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid)) 690 goto nla_put_failure; 691 692 key = &f->key; 693 mask = &head->mask.key; 694 695 if (mask->indev_ifindex) { 696 struct net_device *dev; 697 698 dev = __dev_get_by_index(net, key->indev_ifindex); 699 if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name)) 700 goto nla_put_failure; 701 } 702 703 fl_hw_update_stats(tp, f); 704 705 if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, 706 mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, 707 sizeof(key->eth.dst)) || 708 fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC, 709 mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, 710 sizeof(key->eth.src)) || 711 fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE, 712 &mask->basic.n_proto, TCA_FLOWER_UNSPEC, 713 sizeof(key->basic.n_proto))) 714 goto nla_put_failure; 715 if ((key->basic.n_proto == htons(ETH_P_IP) || 716 key->basic.n_proto == htons(ETH_P_IPV6)) && 717 fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, 718 &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, 719 sizeof(key->basic.ip_proto))) 720 goto nla_put_failure; 721 722 if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && 723 (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, 724 &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, 725 sizeof(key->ipv4.src)) || 726 fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST, 727 &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK, 728 sizeof(key->ipv4.dst)))) 729 goto nla_put_failure; 730 else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS && 731 (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC, 732 &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK, 733 sizeof(key->ipv6.src)) || 734 fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST, 735 &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK, 736 sizeof(key->ipv6.dst)))) 737 goto nla_put_failure; 738 739 if (key->basic.ip_proto == IPPROTO_TCP && 740 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, 741 &mask->tp.src, TCA_FLOWER_UNSPEC, 742 sizeof(key->tp.src)) || 743 fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, 744 &mask->tp.dst, TCA_FLOWER_UNSPEC, 745 sizeof(key->tp.dst)))) 746 goto nla_put_failure; 747 else if (key->basic.ip_proto == IPPROTO_UDP && 748 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, 749 &mask->tp.src, TCA_FLOWER_UNSPEC, 750 sizeof(key->tp.src)) || 751 fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, 752 &mask->tp.dst, TCA_FLOWER_UNSPEC, 753 sizeof(key->tp.dst)))) 754 goto nla_put_failure; 755 756 nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); 757 758 if (tcf_exts_dump(skb, &f->exts)) 759 goto nla_put_failure; 760 761 nla_nest_end(skb, nest); 762 763 if (tcf_exts_dump_stats(skb, &f->exts) < 0) 764 goto nla_put_failure; 765 766 return skb->len; 767 768nla_put_failure: 769 nla_nest_cancel(skb, nest); 770 return -1; 771} 772

tcf_proto_ops   cls_fl_ops


773static struct tcf_proto_ops cls_fl_ops __read_mostly = { 774 .kind = "flower", 775 .classify = fl_classify, 776 .init = fl_init, 777 .destroy = fl_destroy, 778 .get = fl_get, 779 .change = fl_change, 780 .delete = fl_delete, 781 .walk = fl_walk, 782 .dump = fl_dump, 783 .owner = THIS_MODULE, 784}; 785 786static int __init cls_fl_init(void) 787{ 788 return register_tcf_proto_ops(&cls_fl_ops); 789} 790 791static void __exit cls_fl_exit(void) 792{ 793 unregister_tcf_proto_ops(&cls_fl_ops); 794} 795 796module_init(cls_fl_init); 797module_exit(cls_fl_exit); 798 799MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); 800MODULE_DESCRIPTION("Flower classifier"); 801MODULE_LICENSE("GPL v2"); 802

cls_bpf



static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
    .kind        =    "bpf",
    .owner        =    THIS_MODULE,
    .classify    =    cls_bpf_classify,
    .init        =    cls_bpf_init,
    .destroy    =    cls_bpf_destroy,
    .get        =    cls_bpf_get,
    .change        =    cls_bpf_change,
    .delete        =    cls_bpf_delete,
    .walk        =    cls_bpf_walk,
    .dump        =    cls_bpf_dump,
    .bind_class    =    cls_bpf_bind_class,
};
 
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
              struct tcf_proto *tp, unsigned long base,
              u32 handle, struct nlattr **tca,
              void **arg, bool ovr)
{
    struct cls_bpf_head *head = rtnl_dereference(tp->root);
    struct cls_bpf_prog *oldprog = *arg;
    struct nlattr *tb[TCA_BPF_MAX + 1];
    struct cls_bpf_prog *prog;
    int ret;
    if (tca[TCA_OPTIONS] == NULL)
        return -EINVAL;
    ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy,
                   NULL);
    if (ret < 0)
        return ret;
    prog = kzalloc(sizeof(*prog), GFP_KERNEL);
    if (!prog)
        return -ENOBUFS;
    ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
    if (ret < 0)
        goto errout;
    if (oldprog) {
        if (handle && oldprog->handle != handle) {
            ret = -EINVAL;
            goto errout;
        }
    }
    if (handle == 0)
        prog->handle = cls_bpf_grab_new_handle(tp, head);
    else
        prog->handle = handle;
    if (prog->handle == 0) {
        ret = -EINVAL;
        goto errout;
    }
    ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
    if (ret < 0)
        goto errout;
    ret = cls_bpf_offload(tp, prog, oldprog);
    if (ret) {
        __cls_bpf_delete_prog(prog);
        return ret;
    }
    if (!tc_in_hw(prog->gen_flags))
        prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
    if (oldprog) {
        list_replace_rcu(&oldprog->link, &prog->link);
        tcf_unbind_filter(tp, &oldprog->res);
        call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
    } else {
        list_add_rcu(&prog->link, &head->plist);
    }
    *arg = prog;
    return 0;
errout:
    tcf_exts_destroy(&prog->exts);
    kfree(prog);
    return ret;
}

 bpf action

static struct tc_action_ops act_bpf_ops __read_mostly = {
        .kind           =       "bpf",
        .id             =       TCA_ID_BPF,
        .owner          =       THIS_MODULE,
        .act            =       tcf_bpf_act,
        .dump           =       tcf_bpf_dump,
        .cleanup        =       tcf_bpf_cleanup,
        .init           =       tcf_bpf_init,
        .walk           =       tcf_bpf_walker,
        .lookup         =       tcf_bpf_search,
        .size           =       sizeof(struct tcf_bpf),
};

static __net_init int bpf_init_net(struct net *net)
{
        struct tc_action_net *tn = net_generic(net, bpf_net_id);

        return tc_action_net_init(net, tn, &act_bpf_ops);
}
static int __init bpf_init_module(void)
{
        return tcf_register_action(&act_bpf_ops, &bpf_net_ops);
}
 

Linux flow offload提高路由转发效率

https://blog.csdn.net/dog250/article/details/103422860


原文地址:https://www.cnblogs.com/dream397/p/14435841.html