dpdk rte_memzone_reserve

[root@localhost dpdk-19.11]# cat   /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages 
0
0
0
0
[root@localhost dpdk-19.11]# cat   /sys/devices/system/node/node*/hugepages/hugepages-524288kB/nr_hugepages 
64
64
64
64
[root@localhost dpdk-19.11]# 
Breakpoint 1, main (argc=4, argv=0xfffffffff518) at /data1/dpdk-19.11/demo/memzone/main.c:45
45                   mz = rte_memzone_reserve("memzone", sizeof(int)*2,
(gdb) s
rte_memzone_reserve (name=0xba6358 "memzone", len=8, socket_id=-1, flags=0)
    at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_memzone.c:240
240             return rte_memzone_reserve_thread_safe(name, len, socket_id,
(gdb) s
rte_memzone_reserve_thread_safe (name=0xba6358 "memzone", len=8, socket_id=-1, flags=0, align=128, bound=0)
    at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_memzone.c:192
192             const struct rte_memzone *mz = NULL;
(gdb) list
187     static const struct rte_memzone *
188     rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id,
189                     unsigned int flags, unsigned int align, unsigned int bound)
190     {
191             struct rte_mem_config *mcfg;
192             const struct rte_memzone *mz = NULL;
193
194             /* get pointer to global configuration */
195             mcfg = rte_eal_get_configuration()->mem_config;
196
(gdb) n
195             mcfg = rte_eal_get_configuration()->mem_config;
(gdb) p *mcfg
Cannot access memory at address 0xfffffffff380
(gdb) n
197             rte_rwlock_write_lock(&mcfg->mlock);
(gdb) p *mcfg
$1 = {magic = 19820526, version = 319488099, nchannel = 0, nrank = 0, mlock = {cnt = 0}, qlock = {cnt = 0}, 
  mplock = {cnt = 0}, tlock = {locked = 0}, memory_hotplug_lock = {cnt = 0}, memzones = {
    name = "memzone", '00' <repeats 56 times>, count = 143, len = 2560, elt_sz = 72, data = 0x100010000, 
    rwlock = {cnt = 0}}, memsegs = {{{base_va = 0x120000000, addr_64 = 4831838208}, page_sz = 536870912, 
      socket_id = 0, version = 1, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-0-0", '00' <repeats 45 times>, count = 1, len = 64, elt_sz = 48, 
        data = 0x100040000, rwlock = {cnt = 0}}}, {{base_va = 0x940000000, addr_64 = 39728447488}, 
      page_sz = 536870912, socket_id = 0, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-0-1", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x920000000, rwlock = {cnt = 0}}}, {{base_va = 0x1160000000, addr_64 = 74625056768}, 
      page_sz = 536870912, socket_id = 0, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-0-2", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x1140000000, rwlock = {cnt = 0}}}, {{base_va = 0x1980000000, addr_64 = 109521666048}, 
      page_sz = 536870912, socket_id = 0, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-0-3", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x1960000000, rwlock = {cnt = 0}}}, {{base_va = 0x21a0000000, addr_64 = 144418275328}, 
      page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-1-0", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x2180000000, rwlock = {cnt = 0}}}, {{base_va = 0x29c0000000, addr_64 = 179314884608}, 
      page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-1-1", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x29a0000000, rwlock = {cnt = 0}}}, {{base_va = 0x31e0000000, addr_64 = 214211493888}, 
      page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-1-2", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x31c0000000, rwlock = {cnt = 0}}}, {{base_va = 0x3a00000000, addr_64 = 249108103168}, 
      page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-1-3", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x39e0000000, rwlock = {cnt = 0}}}, {{base_va = 0x4220000000, addr_64 = 284004712448}, 
      page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-2-0", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x4200000000, rwlock = {cnt = 0}}}, {{base_va = 0x4a40000000, addr_64 = 318901321728}, 
---Type <return> to continue, or q <return> to quit---
      page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-2-1", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x4a20000000, rwlock = {cnt = 0}}}, {{base_va = 0x5260000000, addr_64 = 353797931008}, 
      page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-2-2", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x5240000000, rwlock = {cnt = 0}}}, {{base_va = 0x5a80000000, addr_64 = 388694540288}, 
      page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-2-3", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x5a60000000, rwlock = {cnt = 0}}}, {{base_va = 0x62a0000000, addr_64 = 423591149568}, 
      page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-3-0", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x6280000000, rwlock = {cnt = 0}}}, {{base_va = 0x6ac0000000, addr_64 = 458487758848}, 
      page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-3-1", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x6aa0000000, rwlock = {cnt = 0}}}, {{base_va = 0x72e0000000, addr_64 = 493384368128}, 
      page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-3-2", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x72c0000000, rwlock = {cnt = 0}}}, {{base_va = 0x7b00000000, addr_64 = 528280977408}, 
      page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
        name = "memseg-524288k-3-3", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
        data = 0x7ae0000000, rwlock = {cnt = 0}}}, {{base_va = 0x0, addr_64 = 0}, page_sz = 0, socket_id = 0, 
      version = 0, len = 0, external = 0, heap = 0, memseg_arr = {name = '00' <repeats 63 times>, count = 0, 
        len = 0, elt_sz = 0, data = 0x0, rwlock = {cnt = 0}}} <repeats 48 times>}, tailq_head = {{tailq_head = {
        tqh_first = 0x0, tqh_last = 0x100002288}, name = "RTE_LPM", '00' <repeats 24 times>}, {tailq_head = {
        tqh_first = 0x0, tqh_last = 0x1000022b8}, name = "RTE_LPM6", '00' <repeats 23 times>}, {tailq_head = {
        tqh_first = 0x0, tqh_last = 0x1000022e8}, name = "RTE_ACL", '00' <repeats 24 times>}, {tailq_head = {
        tqh_first = 0x13ff79c00, tqh_last = 0x13ff79c00}, name = "RTE_HASH", '00' <repeats 23 times>}, {
      tailq_head = {tqh_first = 0x0, tqh_last = 0x100002348}, name = "RTE_FBK_HASH", '00' <repeats 19 times>}, 
    {tailq_head = {tqh_first = 0x0, tqh_last = 0x100002378}, name = "RTE_MEMBER", '00' <repeats 21 times>}, {
      tailq_head = {tqh_first = 0x0, tqh_last = 0x1000023a8}, 
      name = "RTE_MBUF_DYNFIELD", '00' <repeats 14 times>}, {tailq_head = {tqh_first = 0x0, 
---Type <return> to continue, or q <return> to quit---
        tqh_last = 0x1000023d8}, name = "RTE_MBUF_DYNFLAG", '00' <repeats 15 times>}, {tailq_head = {
        tqh_first = 0x0, tqh_last = 0x100002408}, name = "RTE_EVENT_RING", '00' <repeats 17 times>}, {
      tailq_head = {tqh_first = 0x13febd800, tqh_last = 0x13febd800}, 
      name = "RTE_MEMPOOL", '00' <repeats 20 times>}, {tailq_head = {tqh_first = 0x0, tqh_last = 0x100002468}, 
      name = "RTE_STACK", '00' <repeats 22 times>}, {tailq_head = {tqh_first = 0x13ff9a080, 
        tqh_last = 0x13febd500}, name = "RTE_RING", '00' <repeats 23 times>}, {tailq_head = {tqh_first = 0x0, 
        tqh_last = 0x1000024c8}, name = "RTE_REORDER", '00' <repeats 20 times>}, {tailq_head = {
        tqh_first = 0x0, tqh_last = 0x1000024f8}, name = "RTE_KNI", '00' <repeats 24 times>}, {tailq_head = {
        tqh_first = 0x13ffd4d80, tqh_last = 0x13ffd4d80}, 
      name = "VFIO_RESOURCE_LIST", '00' <repeats 13 times>}, {tailq_head = {tqh_first = 0x0, 
        tqh_last = 0x100002558}, name = "UIO_RESOURCE_LIST", '00' <repeats 14 times>}, {tailq_head = {
        tqh_first = 0x0, tqh_last = 0x100002588}, name = "VMBUS_RESOURCE_LIST", '00' <repeats 12 times>}, {
      tailq_head = {tqh_first = 0x0, tqh_last = 0x0}, name = '00' <repeats 31 times>} <repeats 15 times>}, 
  malloc_heaps = {{lock = {locked = 0}, free_head = {{lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x0}, {
          lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x13fe81000}, {lh_first = 0x0}, {lh_first = 0x0}, {
          lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x120000000}, {lh_first = 0x0}}, 
      first = 0x120000000, last = 0x13fffdf80, alloc_count = 259, socket_id = 0, total_size = 536870912, 
      name = "socket_0", '00' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{
          lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 1, 
      total_size = 0, name = "socket_1", '00' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{
          lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 2, 
      total_size = 0, name = "socket_2", '00' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{
          lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 3, 
      total_size = 0, name = "socket_3", '00' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{
          lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 0, 
      total_size = 0, name = '00' <repeats 31 times>} <repeats 28 times>}, next_socket_id = 256, 
  mem_cfg_addr = 4294967296, legacy_mem = 0, single_file_segments = 0, tsc_hz = 100000000, 
  dma_maskbits = 0 '00'}
(gdb) 
(gdb) n
199             mz = memzone_reserve_aligned_thread_unsafe(
(gdb) s
memzone_reserve_aligned_thread_unsafe (name=0xba6358 "memzone", len=8, socket_id=-1, flags=0, align=128, bound=0)
    at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_memzone.c:69
69              mcfg = rte_eal_get_configuration()->mem_config;
(gdb) n
70              arr = &mcfg->memzones;
(gdb) n
73              if (arr->count >= arr->len) {
(gdb) p *arr
$2 = {name = "memzone", '00' <repeats 56 times>, count = 143, len = 2560, elt_sz = 72, data = 0x100010000, 
  rwlock = {cnt = 0}}
(gdb) n
79              if (strlen(name) > sizeof(mz->name) - 1) {
(gdb) n
87              if ((memzone_lookup_thread_unsafe(name)) != NULL) {
(gdb) n
95              if (align && !rte_is_power_of_2(align)) {
(gdb) n
103             if (align < RTE_CACHE_LINE_SIZE)
(gdb) n
107             if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
(gdb) n
112             len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE);
(gdb) n
115             requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE,  len);
(gdb) n
118             if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) {
(gdb) n
123             if ((socket_id != SOCKET_ID_ANY) && socket_id < 0) {
(gdb) n
131             if (!rte_eal_has_hugepages() && socket_id < RTE_MAX_NUMA_NODES)
(gdb) n
134             contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0;
(gdb) n
136             flags &= ~RTE_MEMZONE_IOVA_CONTIG;
(gdb) n
138             if (len == 0 && bound == 0) {
(gdb) n
144                     if (len == 0)
(gdb) n
147                     mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,            ---------------------mz_addr从heap分配
(gdb) n
150             if (mz_addr == NULL) {
(gdb) n
155             struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
(gdb) n
158             mz_idx = rte_fbarray_find_next_free(arr, 0);
(gdb) n
160             if (mz_idx < 0) {
(gdb) n
163                     rte_fbarray_set_used(arr, mz_idx);
(gdb) n
164                     mz = rte_fbarray_get(arr, mz_idx);
(gdb) p *mz
$3 = {name = "356o.01c00v230000000000000000377377377377", '00' <repeats 11 times>, 
  {phys_addr = 0, iova = 0}, {addr = 0x656e6f7a6d656d, addr_64 = 28550397722191213}, len = 0, hugepage_sz = 0, 
  socket_id = 0, flags = 0}
(gdb) n
167             if (mz == NULL) {
(gdb) n
174             strlcpy(mz->name, name, sizeof(mz->name));
(gdb) n
175             mz->iova = rte_malloc_virt2iova(mz_addr);
(gdb) n
176             mz->addr = mz_addr;
(gdb) n
178                             elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
(gdb) n
177             mz->len = requested_len == 0 ?
(gdb) n
180             mz->hugepage_sz = elem->msl->page_sz;
(gdb) n
181             mz->socket_id = elem->msl->socket_id;
(gdb) p *elem
$4 = {heap = 0x100002900, prev = 0x13fe81000, next = 0x13febc800, free_list = {le_next = 0x0, le_prev = 0x0}, 
  msl = 0x100000088, state = ELEM_BUSY, pad = 0, size = 256, orig_elem = 0x120000000, orig_size = 536870912}
(gdb) n
182             mz->flags = 0;
(gdb) n
184             return mz;
(gdb) p *mz
$5 = {name = "memzone", '00' <repeats 24 times>, {phys_addr = 261454808960, iova = 261454808960}, {
    addr = 0x13febc780, addr_64 = 5367383936}, len = 128, hugepage_sz = 536870912, socket_id = 0, flags = 0}
(gdb) c
static const struct rte_memzone *
memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
        int socket_id, unsigned flags, unsigned align, unsigned bound)
{
    struct rte_memzone *mz;
    struct rte_mem_config *mcfg;
    size_t requested_len;
    int socket, i;

    /* 获取全局变量rte_mem_config结构的指针 */
    mcfg = rte_eal_get_configuration()->mem_config;

    /* no more room in config */
    /*如果分配的memzone数量已经超过了最大值,则返错(数组大小是有限的)*/
    if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) {
        RTE_LOG(ERR, EAL, "%s(): No more room in config
", __func__);
        rte_errno = ENOSPC;
        return NULL;
    }
    /*检查memzone的名字长度是否超过了限制*/
    if (strlen(name) > sizeof(mz->name) - 1) {
        RTE_LOG(DEBUG, EAL, "%s(): memzone <%s>: name too long
",
            __func__, name);
        rte_errno = ENAMETOOLONG;
        return NULL;
    }

    /* 在mcfg->memzone[]中查找是否已有同名的memzone,如果有表示已存在,返回创建出错*/
    if ((memzone_lookup_thread_unsafe(name)) != NULL) {
        RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists
",
            __func__, name);
        rte_errno = EEXIST;
        return NULL;
    }

    /* 检查对齐内存大小是否是2的幂大小 */
    if (align && !rte_is_power_of_2(align)) {
        RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u
", __func__,
                align);
        rte_errno = EINVAL;
        return NULL;
    }

    /* alignment less than cache size is not allowed */
    if (align < RTE_CACHE_LINE_SIZE)/*对齐大小不能小于cache_line大小*/
        align = RTE_CACHE_LINE_SIZE;

    /* align length on cache boundary. Check for overflow before doing so */
    if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
        rte_errno = EINVAL; /* requested size too big */
        return NULL;
    }

    len += RTE_CACHE_LINE_MASK;
    len &= ~((size_t) RTE_CACHE_LINE_MASK); /*申请内存大小进行内存对齐计算*/

    /* save minimal requested length */
    /*当申请的内存大小小于RTE_CACHE_LINE_SIZE时,则至少要分配RTE_CACHE_LINE_SIZE大小的内存*/
    requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len);

    /* check that boundary condition is valid */
    if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) {
        rte_errno = EINVAL;
        return NULL;
    }
    /*检查socket_id的合法性*/
    if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) {
        rte_errno = EINVAL;
        return NULL;
    }
    /*如果不使用hugepage,memzone的内存分配就不会考虑socke_id,而直接设置为SOCKET_ID_ANY*/
    if (!rte_eal_has_hugepages())
        socket_id = SOCKET_ID_ANY;

    if (len == 0) { /*申请内存大小等于0的情况,则申请申请最大的连续内存空间*/
        if (bound != 0)
            requested_len = bound;
        else {
            requested_len = find_heap_max_free_elem(&socket_id, align);
            if (requested_len == 0) {
                rte_errno = ENOMEM;
                return NULL;
            }
        }
    }
    /*如果socket_id为SOCKET_ID_ANY,则先在当前cpu所在的socket上分配内存*/
    if (socket_id == SOCKET_ID_ANY)
        socket = malloc_get_numa_socket();
    else
        socket = socket_id;

    /* 尝试在当前socket对应的malloc_heap上分配内存 */
    void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL,
            requested_len, flags, align, bound);
    /*如果socket_id为SOCKET_ID_ANY,且在当前socket上分配失败,就尝试在其他cpu分配*/
    if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) {
        /* try other heaps */
        for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
            if (socket == i)
                continue;

            mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i],
                    NULL, requested_len, flags, align, bound);
            if (mz_addr != NULL)
                break;
        }
    }

    if (mz_addr == NULL) {
        rte_errno = ENOMEM;
        return NULL;
    }
    /*获取对应内存的malloc_elem结构*/
    const struct malloc_elem *elem = malloc_elem_from_data(mz_addr);

    /* 从mcfg->memzone[]中找到一个还为使用的memzone结构 */
    mz = get_next_free_memzone();

    if (mz == NULL) {
        RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room "
                "in config!
", __func__);
        rte_errno = ENOSPC;
        return NULL;
    }
    /*增加mcfg的memzone计数*/
    mcfg->memzone_cnt++;
    snprintf(mz->name, sizeof(mz->name), "%s", name);
    mz->phys_addr = rte_malloc_virt2phy(mz_addr);
    mz->addr = mz_addr;
    mz->len = (requested_len == 0 ? elem->size : requested_len);
    mz->hugepage_sz = elem->ms->hugepage_sz;/*memzone对应的socketid和hupagesize即为对应malloc_elem的值*/
    mz->socket_id = elem->ms->socket_id;
    mz->flags = 0;
    mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;

    return mz;
}
    mz = get_next_free_memzone();

19.11版本

mz = rte_fbarray_get(arr, mz_idx);
if (len == 0 && bound == 0) {
                /* no size constraints were placed, so use malloc elem len */
                requested_len = 0;
                mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags,
                                align, contig);
        } else {
                if (len == 0)
                        requested_len = bound;
                /* allocate memory on heap */
                mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,
                                flags, align, bound, contig);
        }
        if (mz_addr == NULL) {
                rte_errno = ENOMEM;
                return NULL;
        }
struct malloc_elem *elem = malloc_elem_from_data(mz_addr);

        /* fill the zone in config */
        mz_idx = rte_fbarray_find_next_free(arr, 0);

        if (mz_idx < 0) {
                mz = NULL;
        } else {
                rte_fbarray_set_used(arr, mz_idx);
                mz = rte_fbarray_get(arr, mz_idx);
        }

        if (mz == NULL) {
                RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone
", __func__);
                malloc_heap_free(elem);
                rte_errno = ENOSPC;
                return NULL;
        }

        strlcpy(mz->name, name, sizeof(mz->name));
        mz->iova = rte_malloc_virt2iova(mz_addr);
        mz->addr = mz_addr;
        mz->len = requested_len == 0 ?
                        elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
                        requested_len;
        mz->hugepage_sz = elem->msl->page_sz;
        mz->socket_id = elem->msl->socket_id;
        mz->flags = 0;

        return mz;

 

看一下memzone的结构体, 包含了zone的name、起始IO addr、virt addr、长度、对应的大页大小等。

/**
 * A structure describing a memzone, which is a contiguous portion of
 * physical memory identified by a name.
 */
struct rte_memzone {

#define RTE_MEMZONE_NAMESIZE 32       /**< Maximum length of memory zone name.*/
	char name[RTE_MEMZONE_NAMESIZE];  /**< Name of the memory zone. */

	RTE_STD_C11
	union {
		phys_addr_t phys_addr;        /**< deprecated - Start physical address. */
		rte_iova_t iova;              /**< Start IO address. */
	};
	RTE_STD_C11
	union {
		void *addr;                   /**< Start virtual address. */
		uint64_t addr_64;             /**< Makes sure addr is always 64-bits */
	};
	size_t len;                       /**< Length of the memzone. */

	uint64_t hugepage_sz;             /**< The page size of underlying memory */

	int32_t socket_id;                /**< NUMA socket ID. */

	uint32_t flags;                   /**< Characteristics of this memzone. */
	uint32_t memseg_id;               /**< Memseg it belongs. */
} __attribute__((__packed__));

接下来,我们从rte_memzone_reserve()开始看起,用户程序会调用该函数申请memzone,此时不会指定align和bound,DPDK为提高内存读写效率,到处运用了内存对齐技术,但是暴露给客户的时候不会像他底层的实现那样需要到处留意,从这段就可以大概看到DPDK的封装确实很好,只暴露有必要暴露的。

const struct rte_memzone *
rte_memzone_reserve(const char *name, size_t len, int socket_id,
		    unsigned flags)
{
	return rte_memzone_reserve_thread_safe(name, len, socket_id,
					       flags, RTE_CACHE_LINE_SIZE, 0);
}

这里继续封装一层,上了一把锁,因此 memzone_reserve_aligned_thread_unsafe这个函数的实现将不会再考虑线程安全的问题了。

static const struct rte_memzone *
rte_memzone_reserve_thread_safe(const char *name, size_t len,
				int socket_id, unsigned flags, unsigned align,
				unsigned bound)
{
	rte_rwlock_write_lock(&mcfg->mlock);
	mz = memzone_reserve_aligned_thread_unsafe(
		name, len, socket_id, flags, align, bound);
	rte_rwlock_write_unlock(&mcfg->mlock);
	return mz;
}

继续分析 memzone_reserve_aligned_thread_unsafe()。首先检查memzone数量,这个最大值是用户编译DPDK前通过配置文件指定的,因此这里也可以看到,并不是DPDK绑定的所有大页内存都拿来做memzone了,还有其他的内存模块会使用到。

	/* no more room in config */
	if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) {
		RTE_LOG(ERR, EAL, "%s(): No more room in config
", __func__);
		rte_errno = ENOSPC;
		return NULL;
	}

检查用户申请的name是否已经存在。这个函数里面的实现很简单,在memzone数组中一个一个memzone地找过去,一个一个比较这个name是否已经存在。这里就可以看到memzone的申请确实效率很低,不适合大数量多次数地申请,只适合对申请效率要求不高的程序,或者预先规划好在程序初始化过程中一次性把需要的memzone全部申请完。

	/* zone already exist */
	if ((memzone_lookup_thread_unsafe(name)) != NULL) {
		RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists
",
			__func__, name);
		rte_errno = EEXIST;
		return NULL;
	}

如果用户不指定要求alloc的memzone的内存长度,DPDK会在所有heap中找个最大的memsegelem给用户。find_heap_max_free_elem()这个函数效率更低,要每一个heap的每一个queue的每一个elem地遍历过去,全部遍历完了之后才能知道空闲的哪个elem才是长度最大的。

			requested_len = find_heap_max_free_elem(&socket_id, align);
			if (requested_len == 0) {
				rte_errno = ENOMEM;
				return NULL;
			}

如果用户指定了len,就以用户指定为准,如果没指定(即len=0),就以找到的最大长度来申请elem。

	/* allocate memory on heap */
	void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL,
			requested_len, flags, align, bound);

如果用户没有指定socket id的话,就到其他的heap中去申请一下内存,但这样存在一个问题,会出现跨socket访问内存的问题,这个对效率影响非常大,程序性能甚至会降到30%左右,直接打了3折。

	if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) {
		/* try other heaps */
		for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
			if (socket == i)
				continue;
			mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i],
					NULL, requested_len, flags, align, bound);
			if (mz_addr != NULL)
				break;
		}
	}

最后根据alloc到的elem和相关信息填写一下新的memzone,返回给用户

	struct malloc_elem *elem = malloc_elem_from_data(mz_addr);

	/* fill the zone in config */
	mz = get_next_free_memzone();
	mcfg->memzone_cnt++;
	snprintf(mz->name, sizeof(mz->name), "%s", name);
	mz->iova = rte_malloc_virt2iova(mz_addr);
	mz->addr = mz_addr;
	mz->len = (requested_len == 0 ? elem->size : requested_len);
	mz->hugepage_sz = elem->ms->hugepage_sz;
	mz->socket_id = elem->ms->socket_id;
	mz->flags = 0;
	mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;

接下来看看memzone的释放流程。memset清空掉内存块后,最后调用rte_free。我们再下一篇文章再来分析这个rte_free的实现。

int
rte_memzone_free(const struct rte_memzone *mz)
{
	rte_rwlock_write_lock(&mcfg->mlock);

	idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
	idx = idx / sizeof(struct rte_memzone);

	addr = mcfg->memzone[idx].addr;
	if (addr == NULL)
		ret = -EINVAL;
	else if (mcfg->memzone_cnt == 0) {
		rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!
",
				__func__);
	} else {
		memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx]));
		mcfg->memzone_cnt--;
	}

	rte_rwlock_write_unlock(&mcfg->mlock);

	rte_free(addr);

	return ret;
}
原文地址:https://www.cnblogs.com/dream397/p/13601025.html