tcf_block_cb_register

○ As explained above, smartNIC driver’s callback is registered for hardware offload with TCF block by invoking tcf_block_cb_register() KPI.

static const struct net_device_ops mlx5e_netdev_ops_rep = {
 809        .ndo_open                = mlx5e_rep_open,
 810        .ndo_stop                = mlx5e_rep_close,
 811        .ndo_start_xmit          = mlx5e_xmit,
 812        .ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
 813        .ndo_setup_tc            = mlx5e_rep_setup_tc,
 814        .ndo_get_stats64         = mlx5e_rep_get_stats,
 815        .ndo_has_offload_stats   = mlx5e_has_offload_stats,
 816        .ndo_get_offload_stats   = mlx5e_get_offload_stats,
 817};
static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
 717                              void *type_data)
 718{
 719        switch (type) {
 720        case TC_SETUP_BLOCK:
 721                return mlx5e_rep_setup_tc_block(dev, type_data);
 722        default:
 723                return -EOPNOTSUPP;
 724        }
 725}
 726
static int mlx5e_rep_setup_tc_block(struct net_device *dev,
 697                                    struct tc_block_offload *f)
 698{
 699        struct mlx5e_priv *priv = netdev_priv(dev);
 700
 701        if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 702                return -EOPNOTSUPP;
 703
 704        switch (f->command) {
 705        case TC_BLOCK_BIND:
 706                return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb,
 707                                             priv, priv);
 708        case TC_BLOCK_UNBIND:
 709                tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv);
 710                return 0;
 711        default:
 712                return -EOPNOTSUPP;
 713        }
 714}
static int
 662mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
 663                              struct tc_cls_flower_offload *cls_flower)
 664{
 665        if (cls_flower->common.chain_index)
 666                return -EOPNOTSUPP;
 667
 668        switch (cls_flower->command) {
 669        case TC_CLSFLOWER_REPLACE:
 670                return mlx5e_configure_flower(priv, cls_flower);
 671        case TC_CLSFLOWER_DESTROY:
 672                return mlx5e_delete_flower(priv, cls_flower);
 673        case TC_CLSFLOWER_STATS:
 674                return mlx5e_stats_flower(priv, cls_flower);
 675        default:
 676                return -EOPNOTSUPP;
 677        }
 678}
 679
 680static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
 681                                 void *cb_priv)
 682{
 683        struct mlx5e_priv *priv = cb_priv;
 684
 685        if (!tc_can_offload(priv->netdev))
 686                return -EOPNOTSUPP;
 687
 688        switch (type) {
 689        case TC_SETUP_CLSFLOWER:
 690                return mlx5e_rep_setup_tc_cls_flower(priv, type_data);
 691        default:
 692                return -EOPNOTSUPP;
 693        }
 694}

mlx5e_configure_flower-->mlx5e_tc_add_fdb_flow-->mlx5_eswitch_add_offloaded_rule

mlx5e_configure_flower

  • parse_cls_flower解析ovs传递过来的流表中match的key信息
  • parse_tc_fdb_actions解析ovs传递过来的流表中action信息
  • mlx5e_tc_add_fdb_flow主要是讲match和action添加到fdb流表
  • 后面先不看了,multipath的那些操作

mlx5e_tc_add_fdb_flow

  • 如果action包含encap,那么调用mlx5e_attach_encap生成vxlan所需要的报文头信息
  • mlx5_eswitch_add_vlan_action添加vlan的action
  • mlx5_eswitch_add_offloaded_rule添加offloaded规则

mlx5_eswitch_add_offloaded_rule-->mlx5_add_flow_rules-->_mlx5_add_flow_rules 主要是向firmware发送指令添加offload规则。

mlx5e_xmit

mlx5e_xmit  -->   mlx5e_sq_xmit --> mlx5e_txwqe_complete

为该排队规程添加一个过滤器块

int tcf_block_get(struct tcf_block **p_block,
          //p_filter_chain为q->filter_list,即排队规程的过滤器头
          struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,//
          struct netlink_ext_ack *extack)
{
    struct tcf_block_ext_info ei = {
        .chain_head_change = tcf_chain_head_change_dflt,//
        .chain_head_change_priv = p_filter_chain,
    };

    WARN_ON(!p_filter_chain);
    return tcf_block_get_ext(p_block, q, &ei, extack);
}
EXPORT_SYMBOL(tcf_block_get);
//默认修改排队规程过滤器链表头函数,即将q->filter_list设置为tp_head
static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
{
    struct tcf_proto __rcu **p_filter_chain = priv;
    //改变默认过滤器链表
    rcu_assign_pointer(*p_filter_chain, tp_head);
}
int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
              struct tcf_block_ext_info *ei,
              struct netlink_ext_ack *extack)
{
    struct net *net = qdisc_net(q);
    struct tcf_block *block = NULL;
    bool created = false;
    int err;

    if (ei->block_index) {//指定了block索引,说明是共享block,一般只有在ingress sch才有
        /* block_index not 0 means the shared block is requested */
        block = tcf_block_lookup(net, ei->block_index);
        if (block)
            block->refcnt++;
    }

    if (!block) {//没有使用共享block,或者该共享block首次创建,则分配一个block
        block = tcf_block_create(net, q, ei->block_index, extack);
        if (IS_ERR(block))
            return PTR_ERR(block);
        created = true;
        if (tcf_block_shared(block)) {
            err = tcf_block_insert(block, net, extack);
            if (err)
                goto err_block_insert;
        }
    }
    //为该block添加一个引用者,一般是一个派对规程
    err = tcf_block_owner_add(block, q, ei->binder_type);
    if (err)
        goto err_block_owner_add;

    tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
    //将block中的第一个过滤规则作为排队规程的第一个过滤器
    err = tcf_chain_head_change_cb_add(tcf_block_chain_zero(block),
                       ei, extack);
    if (err)
        goto err_chain_head_change_cb_add;
    //如果有卸载,则进行卸载绑定
    err = tcf_block_offload_bind(block, q, ei);
    if (err)
        goto err_block_offload_bind;

    *p_block = block;
    return 0;

err_block_offload_bind:
    tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
err_chain_head_change_cb_add:
    tcf_block_owner_del(block, q, ei->binder_type);
err_block_owner_add:
    if (created) {
        if (tcf_block_shared(block))
            tcf_block_remove(block, net);
err_block_insert:
        kfree(tcf_block_chain_zero(block));
        kfree(block);
    } else {
        block->refcnt--;
    }
    return err;
}
EXPORT_SYMBOL(tcf_block_get_ext);
//添加一个过滤器链表头修改表项
static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
                  struct tcf_block_ext_info *ei,
                  struct netlink_ext_ack *extack)
{
    struct net_device *dev = q->dev_queue->dev;
    int err;

    down_write(&block->cb_lock);

    /* If tc offload feature is disabled and the block we try to bind
     * to already has some offloaded filters, forbid to bind.
     */
    if (dev->netdev_ops->ndo_setup_tc &&
        !tc_can_offload(dev) &&
        tcf_block_offload_in_use(block)) {
        NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
        err = -EOPNOTSUPP;
        goto err_unlock;
    }

    err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack);
    if (err == -EOPNOTSUPP)
        goto no_offload_dev_inc;
    if (err)
        goto err_unlock;

    up_write(&block->cb_lock);
    return 0;

no_offload_dev_inc:
    if (tcf_block_offload_in_use(block))
        goto err_unlock;

    err = 0;
    block->nooffloaddevcnt++;
err_unlock:
    up_write(&block->cb_lock);
    return err;
}
static int tcf_block_offload_cmd(struct tcf_block *block,
                 struct net_device *dev, struct Qdisc *sch,
                 struct tcf_block_ext_info *ei,
                 enum flow_block_command command,
                 struct netlink_ext_ack *extack)
{
    struct flow_block_offload bo = {};

    tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type,
                   &block->flow_block, tcf_block_shared(block),
                   extack);

    if (dev->netdev_ops->ndo_setup_tc) {
        int err;

        err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
        if (err < 0) {
            if (err != -EOPNOTSUPP)
                NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
            return err;
        }

        return tcf_block_setup(block, &bo);
    }

    flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo,
                    tc_block_indr_cleanup);
    tcf_block_setup(block, &bo);

    return -EOPNOTSUPP;
}

flow_indr_dev_setup_offload

int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
                enum tc_setup_type type, void *data,
                struct flow_block_offload *bo,
                void (*cleanup)(struct flow_block_cb *block_cb))
{
    struct flow_indr_dev *this;

    mutex_lock(&flow_indr_block_lock);
    list_for_each_entry(this, &flow_block_indr_dev_list, list)
        this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup);

    mutex_unlock(&flow_indr_block_lock);

    return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0;
}

tcf_block_setup

static int tcf_block_setup(struct tcf_block *block,
               struct flow_block_offload *bo)
{
    int err;

    switch (bo->command) {
    case FLOW_BLOCK_BIND:
        err = tcf_block_bind(block, bo);
        break;
    case FLOW_BLOCK_UNBIND:
        err = 0;
        tcf_block_unbind(block, bo);
        break;
    default:
        WARN_ON_ONCE(1);
        err = -EOPNOTSUPP;
    }

    return err;
}
原文地址:https://www.cnblogs.com/dream397/p/14492979.html