每天一个topic -- net 和linux 路由表

  1.最近学习linux 路由表相关知识现在总结如下:

       系统查询路由信息的时候,分为两部先查路由缓存信息,然后查询路由表,route cash信息保存在一个全局的数据结构 rt_hash_table

net/ipv4/ af_inet.c         inet_init()     ->      net/ipv4/ ip_output.c    ip_init()            ->        net/ipv4/ route.c         ip_rt_init()

    rt_hash_table = (struct rt_hash_bucket *)                                                                                                  
        alloc_large_system_hash("IP route cache",
                    sizeof(struct rt_hash_bucket),
                    rhash_entries,
                    (totalram_pages >= 128 * 1024) ?
                    15 : 17, 
                    0,  
                    &rt_hash_log,
                    &rt_hash_mask,
                    rhash_entries ? 0 : 512 * 1024);

 rt_hash_table   

            chain    
            chain
 
 
               chain

            
 ------>rt_table---->rt_table------>rt_table

-------->rt_table------->rt_table-------->rt_table

这个是route cache结构数组和一个链表存储

策略路由表结构

   net->ipv4.fib_table_hash = kzalloc(                                                                                                        
            sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
    if (net->ipv4.fib_table_hash == NULL)
        return -ENOMEM;

    for (i = 0; i < FIB_TABLE_HASHSZ; i++)
        INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);

注意 根据不同的network namespce和协议都有不同的路由表 比如ipv4和ipv6

查看一个函数fib_get_table

    head = &net->ipv4.fib_table_hash[h]; //hash clash                                                                                          
    hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
        if (tb->tb_id == id) {
            rcu_read_unlock();
            return tb; 
        }   
    }   

可以看到查找table的时候先是根据hash表来计算,然后在查找冲突

最后谈谈net 这个结构体的

struct net inet_net;

这个结构体定义为network namespace 

struct net {                                                                                                                                   
    atomic_t        count;      /* To decided when the network
                         *  namespace should be freed.
                         */
#ifdef NETNS_REFCNT_DEBUG
    atomic_t        use_count;  /* To track references we
                         * destroy on demand
                         */
#endif
    struct list_head    list;       /* list of network namespaces */
    struct work_struct  work;       /* work struct for freeing */

    struct proc_dir_entry   *proc_net;
    struct proc_dir_entry   *proc_net_stat;

#ifdef CONFIG_SYSCTL
    struct ctl_table_set    sysctls;
#endif

    struct net_device       *loopback_dev;          /* The loopback */

    struct list_head    dev_base_head;
    struct hlist_head   *dev_name_head;
    struct hlist_head   *dev_index_head;

    /* core fib_rules */
    struct list_head    rules_ops;
    spinlock_t      rules_mod_lock;

    struct sock         *rtnl;          /* rtnetlink socket */
    struct sock     *genl_sock;

    struct netns_core   core;
    struct netns_mib    mib;
    struct netns_packet packet;
    struct netns_unix   unx;
    struct netns_ipv4   ipv4;
   #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
    struct netns_ipv6   ipv6;
#endif
#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
    struct netns_dccp   dccp;
#endif
#ifdef CONFIG_NETFILTER
    struct netns_xt     xt;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
    struct netns_ct     ct;
#endif
#endif
#ifdef CONFIG_XFRM
    struct netns_xfrm   xfrm;
#endif
#ifdef CONFIG_WIRELESS_EXT
    struct sk_buff_head wext_nlevents;
#endif
        struct net_generic  *gen;
};

struct net 结构体中和ipv4路由表相关的有  

struct list_head    rules_ops;              //这个是和协议族相关的对策略路由表操作的函数rule_ops 每一个协议族对应一个ops主要用于策略路由表的添加删除匹配等,以及策略路由描述信息
struct netns_ipv4   ipv4;                  //这个表示ipv4协议族中的路由表相关信息

struct netns ipv4 中有两个重要成员


struct fib_rules_ops *rules_ops;                       // 策略路由表的描述信息 比如 dev br0 mark 2 的查路由表2
struct hlist_head *fib_table_hash;                   //256张路由表 指定出口下一调

我们来看下rules_ops这个结构体体

struct fib_rules_ops                                                                                                                           
{
    int         family;
    struct list_head    list;
    int         rule_size;
    int         addr_size;
    int         unresolved_rules;
    int         nr_goto_rules;

    int         (*action)(struct fib_rule *,
                      struct flowi *, int,
                      struct fib_lookup_arg *); 
    int         (*match)(struct fib_rule *,
                     struct flowi *, int);
    int         (*configure)(struct fib_rule *,
                         struct sk_buff *,
                         struct fib_rule_hdr *,
                         struct nlattr **);
    int         (*compare)(struct fib_rule *,
                       struct fib_rule_hdr *,
                       struct nlattr **);
    int         (*fill)(struct fib_rule *, struct sk_buff *,
                    struct fib_rule_hdr *); 
    u32         (*default_pref)(struct fib_rules_ops *ops);
    size_t          (*nlmsg_payload)(struct fib_rule *); 

    /* Called after modifications to the rules set, must flush
     * the route cache if one exists. */
    void            (*flush_cache)(struct fib_rules_ops *ops);

    int         nlgroup;
    const struct nla_policy *policy;
    struct list_head    rules_list;
    struct module       *owner;
    struct net      *fro_net;
};

其中主要成员说明

int family 表示协议族

struct list_head rules_list;  策略路由表的描述  mark 2查找table 2 ; mark 3 查找table3 策略路由表的描述信息

struct net *fro_net;  指向net 通过net 和table Id 找到所需要的table

现在我们来看看茶表的过程

 list_for_each_entry_rcu(rule, &ops->rules_list, list) {                                                                                    
jumped:
        if (!fib_rule_match(rule, ops, fl, flags))
            continue;

        if (rule->action == FR_ACT_GOTO) {
            struct fib_rule *target;

            target = rcu_dereference(rule->ctarget);
            if (target == NULL) {
                continue;
            } else {
                rule = target;
                goto jumped;
            }   
        } else if (rule->action == FR_ACT_NOP)
            continue;
        else
            err = ops->action(rule, fl, flags, arg);

        if (err != -EAGAIN) {
            fib_rule_get(rule);
            arg->rule = rule;
            goto out;
        }   
    }   

fib_rule_match 就是根据策略路由表的信息找到需要查寻哪一张策略路由表

ops->action  就是根据策略路由表决定下一跳信息,这个函数主要分两部根据rule的table id 找到特定的table rule->table rule->net->ipv4->rt_hash_table,这两个来确定的

第二部分就是决定下一调

    if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL)                                                                              
    {   
        if ( rule->table < 254 )
        {   
            err = 1;
        }   
        goto errout;
    }   

    err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
    if (err > 0)
    {   
        if ( rule->table < 254 )
        {   
            err = 1;
        }   
        else
        {   
            err = -EAGAIN;
        }   
    }   
 


原文地址:https://www.cnblogs.com/chonghui1001/p/3276653.html