○ As explained above, smartNIC driver’s callback is registered for hardware offload with TCF block by invoking tcf_block_cb_register() KPI.
static const struct net_device_ops mlx5e_netdev_ops_rep = { 809 .ndo_open = mlx5e_rep_open, 810 .ndo_stop = mlx5e_rep_close, 811 .ndo_start_xmit = mlx5e_xmit, 812 .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, 813 .ndo_setup_tc = mlx5e_rep_setup_tc, 814 .ndo_get_stats64 = mlx5e_rep_get_stats, 815 .ndo_has_offload_stats = mlx5e_has_offload_stats, 816 .ndo_get_offload_stats = mlx5e_get_offload_stats, 817};
static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, 717 void *type_data) 718{ 719 switch (type) { 720 case TC_SETUP_BLOCK: 721 return mlx5e_rep_setup_tc_block(dev, type_data); 722 default: 723 return -EOPNOTSUPP; 724 } 725} 726
static int mlx5e_rep_setup_tc_block(struct net_device *dev, 697 struct tc_block_offload *f) 698{ 699 struct mlx5e_priv *priv = netdev_priv(dev); 700 701 if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) 702 return -EOPNOTSUPP; 703 704 switch (f->command) { 705 case TC_BLOCK_BIND: 706 return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb, 707 priv, priv); 708 case TC_BLOCK_UNBIND: 709 tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv); 710 return 0; 711 default: 712 return -EOPNOTSUPP; 713 } 714}
static int 662mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, 663 struct tc_cls_flower_offload *cls_flower) 664{ 665 if (cls_flower->common.chain_index) 666 return -EOPNOTSUPP; 667 668 switch (cls_flower->command) { 669 case TC_CLSFLOWER_REPLACE: 670 return mlx5e_configure_flower(priv, cls_flower); 671 case TC_CLSFLOWER_DESTROY: 672 return mlx5e_delete_flower(priv, cls_flower); 673 case TC_CLSFLOWER_STATS: 674 return mlx5e_stats_flower(priv, cls_flower); 675 default: 676 return -EOPNOTSUPP; 677 } 678} 679 680static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, 681 void *cb_priv) 682{ 683 struct mlx5e_priv *priv = cb_priv; 684 685 if (!tc_can_offload(priv->netdev)) 686 return -EOPNOTSUPP; 687 688 switch (type) { 689 case TC_SETUP_CLSFLOWER: 690 return mlx5e_rep_setup_tc_cls_flower(priv, type_data); 691 default: 692 return -EOPNOTSUPP; 693 } 694}
mlx5e_configure_flower-->mlx5e_tc_add_fdb_flow-->mlx5_eswitch_add_offloaded_rule
mlx5e_configure_flower
parse_cls_flower
解析ovs传递过来的流表中match的key信息parse_tc_fdb_actions
解析ovs传递过来的流表中action信息mlx5e_tc_add_fdb_flow
主要是讲match和action添加到fdb流表- 后面先不看了,multipath的那些操作
mlx5e_tc_add_fdb_flow
- 如果action包含encap,那么调用
mlx5e_attach_encap
生成vxlan所需要的报文头信息 mlx5_eswitch_add_vlan_action
添加vlan的actionmlx5_eswitch_add_offloaded_rule
添加offloaded规则
mlx5_eswitch_add_offloaded_rule-->mlx5_add_flow_rules-->_mlx5_add_flow_rules
主要是向firmware发送指令添加offload规则。
mlx5e_xmit
mlx5e_xmit --> mlx5e_sq_xmit --> mlx5e_txwqe_complete
为该排队规程添加一个过滤器块
int tcf_block_get(struct tcf_block **p_block, //p_filter_chain为q->filter_list,即排队规程的过滤器头 struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,// struct netlink_ext_ack *extack) { struct tcf_block_ext_info ei = { .chain_head_change = tcf_chain_head_change_dflt,// .chain_head_change_priv = p_filter_chain, }; WARN_ON(!p_filter_chain); return tcf_block_get_ext(p_block, q, &ei, extack); } EXPORT_SYMBOL(tcf_block_get); //默认修改排队规程过滤器链表头函数,即将q->filter_list设置为tp_head static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv) { struct tcf_proto __rcu **p_filter_chain = priv; //改变默认过滤器链表 rcu_assign_pointer(*p_filter_chain, tp_head); } int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) { struct net *net = qdisc_net(q); struct tcf_block *block = NULL; bool created = false; int err; if (ei->block_index) {//指定了block索引,说明是共享block,一般只有在ingress sch才有 /* block_index not 0 means the shared block is requested */ block = tcf_block_lookup(net, ei->block_index); if (block) block->refcnt++; } if (!block) {//没有使用共享block,或者该共享block首次创建,则分配一个block block = tcf_block_create(net, q, ei->block_index, extack); if (IS_ERR(block)) return PTR_ERR(block); created = true; if (tcf_block_shared(block)) { err = tcf_block_insert(block, net, extack); if (err) goto err_block_insert; } } //为该block添加一个引用者,一般是一个派对规程 err = tcf_block_owner_add(block, q, ei->binder_type); if (err) goto err_block_owner_add; tcf_block_owner_netif_keep_dst(block, q, ei->binder_type); //将block中的第一个过滤规则作为排队规程的第一个过滤器 err = tcf_chain_head_change_cb_add(tcf_block_chain_zero(block), ei, extack); if (err) goto err_chain_head_change_cb_add; //如果有卸载,则进行卸载绑定 err = tcf_block_offload_bind(block, q, ei); if (err) goto err_block_offload_bind; *p_block = block; return 0; err_block_offload_bind: tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei); err_chain_head_change_cb_add: tcf_block_owner_del(block, q, ei->binder_type); err_block_owner_add: if (created) { if (tcf_block_shared(block)) tcf_block_remove(block, net); err_block_insert: kfree(tcf_block_chain_zero(block)); kfree(block); } else { block->refcnt--; } return err; } EXPORT_SYMBOL(tcf_block_get_ext); //添加一个过滤器链表头修改表项
static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) { struct net_device *dev = q->dev_queue->dev; int err; down_write(&block->cb_lock); /* If tc offload feature is disabled and the block we try to bind * to already has some offloaded filters, forbid to bind. */ if (dev->netdev_ops->ndo_setup_tc && !tc_can_offload(dev) && tcf_block_offload_in_use(block)) { NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled"); err = -EOPNOTSUPP; goto err_unlock; } err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack); if (err == -EOPNOTSUPP) goto no_offload_dev_inc; if (err) goto err_unlock; up_write(&block->cb_lock); return 0; no_offload_dev_inc: if (tcf_block_offload_in_use(block)) goto err_unlock; err = 0; block->nooffloaddevcnt++; err_unlock: up_write(&block->cb_lock); return err; }
static int tcf_block_offload_cmd(struct tcf_block *block, struct net_device *dev, struct Qdisc *sch, struct tcf_block_ext_info *ei, enum flow_block_command command, struct netlink_ext_ack *extack) { struct flow_block_offload bo = {}; tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type, &block->flow_block, tcf_block_shared(block), extack); if (dev->netdev_ops->ndo_setup_tc) { int err; err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); if (err < 0) { if (err != -EOPNOTSUPP) NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed"); return err; } return tcf_block_setup(block, &bo); } flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo, tc_block_indr_cleanup); tcf_block_setup(block, &bo); return -EOPNOTSUPP; }
flow_indr_dev_setup_offload
int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void *data, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)) { struct flow_indr_dev *this; mutex_lock(&flow_indr_block_lock); list_for_each_entry(this, &flow_block_indr_dev_list, list) this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup); mutex_unlock(&flow_indr_block_lock); return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0; }
tcf_block_setup
static int tcf_block_setup(struct tcf_block *block, struct flow_block_offload *bo) { int err; switch (bo->command) { case FLOW_BLOCK_BIND: err = tcf_block_bind(block, bo); break; case FLOW_BLOCK_UNBIND: err = 0; tcf_block_unbind(block, bo); break; default: WARN_ON_ONCE(1); err = -EOPNOTSUPP; } return err; }