Linux eth0, eth1, ..., eth%d 的生成【转】

转自:https://blog.csdn.net/xiruanliuwei/article/details/78765255

一直很好奇,Linux下的eth0, eth1,eth2等是如何生成的~

特别函数:

__dev_get_by_name

通过 eth1 这样的名字,找 struct net_device

分三个步骤:

1. 创建 struct net_device 类型的变量 XA ;

2. 将创建的变量 XA 通过 register_netdevice 函数进行注册;

3. ifconfig ethx up,有了这最后一步,才能在 ifconfig 命令的输出中看到增加的 ethx;

1. 从 alloc_etherdev 开始分析,其实 alloc_etherdev 是一个宏:

alloc_etherdev   -->   alloc_etherdev_mq   -->   alloc_etherdev_mqs

最后调用的 alloc_etherdev_mqs 才是一个函数,而这个函数又调用了 alloc_netdev_mqs 函数,具体的操作都是在这个函数中完成的,因此:

alloc_etherdev   -->   alloc_etherdev_mq   -->   alloc_etherdev_mqs    -->   alloc_netdev_mqs

/**
* alloc_etherdev_mqs - Allocates and sets up an Ethernet device
* @sizeof_priv: Size of additional driver-private structure to be allocated
* for this Ethernet device
* @txqs: The number of TX queues this device has.
* @rxqs: The number of RX queues this device has.
*
* Fill in the fields of the device structure with Ethernet-generic
* values. Basically does everything except registering the device.
*
* Constructs a new net device, complete with a private data area of
* size (sizeof_priv). A 32-byte (not bit) alignment is enforced for
* this private data area.
*/

struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
unsigned int rxqs)
{
return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN,
ether_setup, txqs, rxqs);
}
 

/**
* alloc_netdev_mqs - allocate network device
* @sizeof_priv: size of private data to allocate space for
* @name: device name format string
* @name_assign_type: origin of device name
* @setup: callback to initialize device
* @txqs: the number of TX subqueues to allocate
* @rxqs: the number of RX subqueues to allocate
*
* Allocates a struct net_device with private data area for driver use
* and performs basic initialization. Also allocates subqueue structs
* for each queue on the device.
*/
struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *),
unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
size_t alloc_size;
struct net_device *p;

BUG_ON(strlen(name) >= sizeof(dev->name));

if (txqs < 1) {
pr_err("alloc_netdev: Unable to allocate device with zero queues ");
return NULL;
}

#ifdef CONFIG_SYSFS
if (rxqs < 1) {
pr_err("alloc_netdev: Unable to allocate device with zero RX queues ");
return NULL;
}
#endif

alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
/* ensure 32-byte alignment of private area */
alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
alloc_size += sizeof_priv;
}
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;

// 分配 struct net_device 变量空间,在下一步中调用 PTR_ALIGN 宏进行对齐
p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!p)
return NULL;

// dev 的值,就是最后 alloc_etherdev 返回的指向 struct net_device 变量/空间的指针
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;

dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
goto free_dev;

if (dev_addr_init(dev))
goto free_pcpu;

dev_mc_init(dev);
dev_uc_init(dev);

dev_net_set(dev, &init_net);

dev->gso_max_size = GSO_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;

INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
INIT_LIST_HEAD(&dev->close_list);
INIT_LIST_HEAD(&dev->link_watch_list);
INIT_LIST_HEAD(&dev->adj_list.upper);
INIT_LIST_HEAD(&dev->adj_list.lower);
INIT_LIST_HEAD(&dev->ptype_all);
INIT_LIST_HEAD(&dev->ptype_specific);
#ifdef CONFIG_NET_SCHED
hash_init(dev->qdisc_hash);
#endif
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;

// 这个 setup,是一个函数指针,由 alloc_etherdev_mqs 调用时传递过来的, alloc_etherdev_mqs 调用时,传递
// 过来的函数是:void ether_setup(struct net_device *dev)
 setup(dev);

if (!dev->tx_queue_len) {
dev->priv_flags |= IFF_NO_QUEUE;
dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
}

dev->num_tx_queues = txqs;
dev->real_num_tx_queues = txqs;
if (netif_alloc_netdev_queues(dev))
goto free_all;

#ifdef CONFIG_SYSFS
dev->num_rx_queues = rxqs;
dev->real_num_rx_queues = rxqs;
if (netif_alloc_rx_queues(dev))
goto free_all;
#endif

strcpy(dev->name, name);
dev->name_assign_type = name_assign_type;
dev->group = INIT_NETDEV_GROUP;
if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops;

nf_hook_ingress_init(dev);

return dev;

free_all:
free_netdev(dev);
return NULL;

free_pcpu:
free_percpu(dev->pcpu_refcnt);
free_dev:
netdev_freemem(dev);
return NULL;
}
 

对于这两个函数,除了代码中的注释,最重要的就是 alloc_netdev_mqs 的第二个参数 "eth%d" :

在 alloc_netdev_mqs 中,

因此,此时 alloc_etherdev 返回的 net_device 中的数据成员 name 的值为 "eth%d" (不包含双引号)

2.  从 register_netdevice 开始,步骤一种返回的 struct net_device* 值,刚好是 register_netdevice 函数的参数:

/**
* register_netdevice - register a network device
* @dev: device to register
*
* Take a completed network device structure and add it to the kernel
* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
* chain. 0 is returned on success. A negative errno code is returned
* on a failure to set up the device, or if the name is a duplicate.
*
* Callers must hold the rtnl semaphore. You may want
* register_netdev() instead of this.
*
* BUGS:
* The locking appears insufficient to guarantee two parallel registers
* will not get the same name.
*/

// 此时, dev->name 的值是 "eth%d", 不包含双引号
int register_netdevice(struct net_device *dev)
{
int ret;
struct net *net = dev_net(dev);

BUG_ON(dev_boot_phase);
ASSERT_RTNL();

might_sleep();

/* When net_device's are persistent, this will be fatal. */
BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
BUG_ON(!net);

spin_lock_init(&dev->addr_list_lock);
netdev_set_addr_lockdep_class(dev);

ret = dev_get_valid_name(net, dev, dev->name);
if (ret < 0)
goto out;

/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
if (ret) {
if (ret > 0)
ret = -EIO;
goto out;
}
}

if (((dev->hw_features | dev->features) &
NETIF_F_HW_VLAN_CTAG_FILTER) &&
(!dev->netdev_ops->ndo_vlan_rx_add_vid ||
!dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
netdev_WARN(dev, "Buggy VLAN acceleration in driver! ");
ret = -EINVAL;
goto err_uninit;
}

ret = -EBUSY;
if (!dev->ifindex)
dev->ifindex = dev_new_index(net);
else if (__dev_get_by_index(net, dev->ifindex))
goto err_uninit;

/* Transfer changeable features to wanted_features and enable
* software offloads (GSO and GRO).
*/
dev->hw_features |= NETIF_F_SOFT_FEATURES;
dev->features |= NETIF_F_SOFT_FEATURES;

if (dev->netdev_ops->ndo_udp_tunnel_add) {
dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
}

dev->wanted_features = dev->features & dev->hw_features;

if (!(dev->flags & IFF_LOOPBACK))
dev->hw_features |= NETIF_F_NOCACHE_COPY;

/* If IPv4 TCP segmentation offload is supported we should also
* allow the device to enable segmenting the frame with the option
* of ignoring a static IP ID value. This doesn't enable the
* feature itself but allows the user to enable it later.
*/
if (dev->hw_features & NETIF_F_TSO)
dev->hw_features |= NETIF_F_TSO_MANGLEID;
if (dev->vlan_features & NETIF_F_TSO)
dev->vlan_features |= NETIF_F_TSO_MANGLEID;
if (dev->mpls_features & NETIF_F_TSO)
dev->mpls_features |= NETIF_F_TSO_MANGLEID;
if (dev->hw_enc_features & NETIF_F_TSO)
dev->hw_enc_features |= NETIF_F_TSO_MANGLEID;

/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
*/
dev->vlan_features |= NETIF_F_HIGHDMA;

/* Make NETIF_F_SG inheritable to tunnel devices.
*/
dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL;

/* Make NETIF_F_SG inheritable to MPLS.
*/
dev->mpls_features |= NETIF_F_SG;

ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
if (ret)
goto err_uninit;

ret = netdev_register_kobject(dev);
if (ret)
goto err_uninit;
dev->reg_state = NETREG_REGISTERED;

__netdev_update_features(dev);

/*
* Default initial state at registry is that the
* device is present.
*/

set_bit(__LINK_STATE_PRESENT, &dev->state);

linkwatch_init_dev(dev);

dev_init_scheduler(dev);
dev_hold(dev);
list_netdevice(dev);
add_device_randomness(dev->dev_addr, dev->addr_len);

/* If the device has permanent device address, driver should
* set dev_addr and also addr_assign_type should be set to
* NET_ADDR_PERM (default value).
*/
if (dev->addr_assign_type == NET_ADDR_PERM)
memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);

/* Notify protocols, that a new device appeared. */
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
ret = notifier_to_errno(ret);
if (ret) {
rollback_registered(dev);
dev->reg_state = NETREG_UNREGISTERED;
}
/*
* Prevent userspace races by waiting until the network
* device is fully setup before sending notifications.
*/
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);

out:
return ret;

err_uninit:
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
if (dev->priv_destructor)
dev->priv_destructor(dev);
goto out;
}

register_netdevice   -->   dev_get_valid_name   -->   dev_alloc_name_ns   -->   __dev_alloc_name

register_netdevice 函数中调用:
// 此时,dev->name 的值是 “eth%d”
ret = dev_get_valid_name(net, dev, dev->name);

int dev_get_valid_name(struct net *net, struct net_device *dev,
               const char *name)
{
    BUG_ON(!net);

    if (!dev_valid_name(name))
        return -EINVAL;

// char *strchr(const char *s, int c);
// The strchr() function returns a pointer to the first occurrence of the character c in the string s.
// 只要 % 在 name 中出现过,则返回值不为 NULL,此处的 name 是 eth%d ,因此返回值不为 NULL
    if (strchr(name, '%'))
        return dev_alloc_name_ns(net, dev, name);
    else if (__dev_get_by_name(net, name))
        return -EEXIST;
    else if (dev->name != name)
        strlcpy(dev->name, name, IFNAMSIZ);

    return 0;
}

static int dev_alloc_name_ns(struct net *net,
                 struct net_device *dev,
                 const char *name)
{
    char buf[IFNAMSIZ];
    int ret;
// 此处的 name 是 eth%d , buf 中的内容应该是全 的,但是跟编译器有关系
    ret = __dev_alloc_name(net, name, buf);
    if (ret >= 0)
        strlcpy(dev->name, buf, IFNAMSIZ);
    return ret;
}

// 主要的操作在 __dev_alloc_name 函数中,一般 PAGE_SIZE 是 4K,因此, net_device 数目最多有 8 * 4K = 32K 个
// 在这个函数中,利用了位图,每一个bit,代表一个 net_device
static int __dev_alloc_name(struct net *net, const char *name, char *buf)
{
    int i = 0;
    const char *p;
// 通常 PAGE_SIZE 是 4K,因此是 32K
    const int max_netdevices = 8*PAGE_SIZE;
    unsigned long *inuse;
    struct net_device *d;

// IFNAMSIZ 的值 是 16,此处在 前 16 - 1 = 15 个字符中匹配 %
// 因为 net_device 中 name 的定义是: char name[IFNAMSIZ];
    p = strnchr(name, IFNAMSIZ-1, '%');
    if (p) {
        /*
         * Verify the string as this thing may have come from
         * the user.  There must be either one "%d" and no other "%"
         * characters.
         */
// % 后必须是 d
        if (p[1] != 'd' || strchr(p + 2, '%'))
            return -EINVAL;

// 申请一个全是 0 的 page
        /* Use one page as a bit array of possible slots */
        inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
        if (!inuse)
            return -ENOMEM;

// 针对 net 空间中已经存在的 net_device,找到每个 net_device 在这个 page 中的 bit 的位置
        for_each_netdev(net, d) {
            if (!sscanf(d->name, name, &i)) // 特别需要注意 sscanf 函数的用法,此时 d->name 中
// 的值应该是 eth1, eth2这样的,而 name 中是 eth%d,
// 这个函数作用是将 1, 2这样的值,保存到 i 中
                continue;

            if (i < 0 || i >= max_netdevices)
                continue;

            /*  avoid cases where sscanf is not exact inverse of printf */
// 将 sscanf 分解的 d->name 再重组一遍,进行比较,确定 sscanf 分解的没有错误,
// 才通过 set_bit 将 page 中相应的 bit 值位
            snprintf(buf, IFNAMSIZ, name, i); // 重组的内容(字符串) 保存在 buf 中
            if (!strncmp(buf, d->name, IFNAMSIZ)) // 如果 buf 与 d->name 中长度 IFNAMSIZ 的内容完全一致,
// 则返回 0
                set_bit(i, inuse);
        }

// 找到 page 中第一个没有被使用的 bit
        i = find_first_zero_bit(inuse, max_netdevices);
        free_page((unsigned long) inuse);
    }

// 确定 buf 和 name 不是同一个空间,将组成的新名字保存到 buf 中,此时 name 是 eth%d ,
  // i 是一个1,2,3这样的数值
    if (buf != name)
        snprintf(buf, IFNAMSIZ, name, i);

// 通过新组成的名字去找 net_device 设备,确保没有重复的
    if (!__dev_get_by_name(net, buf))
        return i;

    /* It is possible to run out of possible slots
     * when the name is long and there isn't enough space left
     * for the digits, or if all bits are used.
     */
    return -ENFILE;
}

在  static int __dev_alloc_name(struct net *net, const char *name, char *buf) 中组成的名字 ethx,会通过 buf 返回,然后在

dev_alloc_name_ns 函数中保存到步骤一生成的 struct net_device 变量的成员 name 中:

至此, 一个 新的 ethx 这样的名字就生成了。

3. ifconfig ethx up,则在 ifconfig 输出中就能够看到它的相关信息了。
————————————————
版权声明:本文为CSDN博主「xiruanliuwei」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/xiruanliuwei/article/details/78765255

原文地址:https://www.cnblogs.com/sky-heaven/p/12092731.html