CUDA实例练习(九):页锁定主机内存

      malloc()分配的内存与cudaHostAlloc()分配的内存之间存在着一个重要差异。C库函数malloc()将分配标准的,可分页的(Pagable)主机内存,而cudaHostAlloc()将分配页锁定的主机内存。页锁定内存也称为固定内存(Pinned Memory)或者不可分页内存,它有一个重要的属性:操作系统将不会对这块内存分页并交换到磁盘上,从而确保了该内存始终驻留在物理内存中。因此,操作系统能够安全地使某个应用程序访问该内存的物理地址,因为这块内存将不会被破坏或者重新定位。

 1 #include <stdio.h>
 2 #include <cuda_runtime.h>
 3 #include <device_launch_parameters.h>
 4 #include "book.h"
 5 
 6 #define SIZE (10*1024*1024)
 7 
 8 float cuda_malloc_test(int size, bool up){
 9     cudaEvent_t start, stop;
10     int *a, *dev_a;
11     float elapsedTime;
12 
13     HANDLE_ERROR(cudaEventCreate(&start));
14     HANDLE_ERROR(cudaEventCreate(&stop));
15 
16     a = (int *)malloc(size * sizeof(*a));
17     HANDLE_NULL(a);
18     HANDLE_ERROR(cudaMalloc((void**)&dev_a, size * sizeof(*dev_a)));
19     HANDLE_ERROR(cudaEventRecord(start, 0));
20     for (int i = 0; i < 100; i++){
21         if (up)
22             HANDLE_ERROR(cudaMemcpy(dev_a, a, size * sizeof(*dev_a), cudaMemcpyHostToDevice));
23         else
24             HANDLE_ERROR(cudaMemcpy(a, dev_a, size * sizeof(*dev_a), cudaMemcpyDeviceToHost));
25     }
26     HANDLE_ERROR(cudaEventRecord(stop, 0));
27     HANDLE_ERROR(cudaEventSynchronize(stop));
28     HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime, start, stop));
29     free(a);
30     HANDLE_ERROR(cudaFree(dev_a));
31     HANDLE_ERROR(cudaEventDestroy(start));
32     HANDLE_ERROR(cudaEventDestroy(stop));
33 
34     return elapsedTime;
35 }
36 
37 float cuda_host_alloc_test(int size, bool up){
38     cudaEvent_t start, stop;
39     int *a, *dev_a;
40     float elapsedTime;
41 
42     HANDLE_ERROR(cudaEventCreate(&start));
43     HANDLE_ERROR(cudaEventCreate(&stop));
44 
45     HANDLE_ERROR(cudaHostAlloc((void **)&a, size * sizeof(*a), cudaHostAllocDefault));
46     HANDLE_ERROR(cudaMalloc((void**)&dev_a, size * sizeof(*dev_a)));
47 
48     HANDLE_ERROR(cudaEventRecord(start, 0));
49     for (int i = 0; i < 100; i++){
50         if (up)
51             HANDLE_ERROR(cudaMemcpy(dev_a, a, size * sizeof(*a), cudaMemcpyHostToDevice));
52         else
53             HANDLE_ERROR(cudaMemcpy(a, dev_a, size * sizeof(*a), cudaMemcpyDeviceToHost));
54     }
55     HANDLE_ERROR(cudaEventRecord(stop, 0));
56     HANDLE_ERROR(cudaEventSynchronize(stop));
57     HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime, start, stop));
58 
59     HANDLE_ERROR(cudaFreeHost(a));
60     HANDLE_ERROR(cudaFree(dev_a));
61     HANDLE_ERROR(cudaEventDestroy(start));
62     HANDLE_ERROR(cudaEventDestroy(stop));
63 
64     return elapsedTime;
65 }
66 
67 int main(void){
68     float elapsedTime;
69     float MB = (float)100 * SIZE*sizeof(int) / 1024 / 1024;
70     elapsedTime = cuda_malloc_test(SIZE, true);
71     printf("Time using cudaMalloc: %3.1f ms
", elapsedTime);
72     printf("	MB/s during copy up: %3.1f
", MB / (elapsedTime / 1000));
73 
74     elapsedTime = cuda_malloc_test(SIZE, false);
75     printf("Time using cudaMalloc: %3.1f ms
", elapsedTime);
76     printf("	MB/s during copy down: %3.1f
", MB / (elapsedTime / 1000));
77 
78     elapsedTime = cuda_host_alloc_test(SIZE, true);
79     printf("Time using cudaHostAlloc: %3.1f ms
", elapsedTime);
80     printf("	MB/s during copy up: %3.1f
", MB / (elapsedTime / 1000));
81 
82     elapsedTime = cuda_host_alloc_test(SIZE, false);
83     printf("Time using cudaHostAlloc: %3.1f ms
", elapsedTime);
84     printf("	MB/s during copy down: %3.1f
", MB / (elapsedTime / 1000));
85 }

原文地址:https://www.cnblogs.com/zhangshuwen/p/7347106.html