使用常量内存来处理光线跟踪

项目打包下载
  1 /*
  2 * Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
  3 *
  4 * NVIDIA Corporation and its licensors retain all intellectual property and
  5 * proprietary rights in and to this software and related documentation.
  6 * Any use, reproduction, disclosure, or distribution of this software
  7 * and related documentation without an express license agreement from
  8 * NVIDIA Corporation is strictly prohibited.
  9 *
 10 * Please refer to the applicable NVIDIA end user license agreement (EULA)
 11 * associated with this source code for terms and conditions that govern
 12 * your use of this NVIDIA software.
 13 *
 14 */
 15 
 16 #include <GLglut.h>
 17 #include "cuda.h"
 18 #include "../common/book.h"
 19 #include "../common/cpu_bitmap.h"
 20 #include "cuda_runtime.h"
 21 #include "device_launch_parameters.h"
 22 #include <math.h>
 23 #define DIM 1024
 24 
 25 #define rnd( x ) (x * rand() / RAND_MAX)
 26 #define INF     2e10f
 27 
 28 struct Sphere {
 29     float   r, b, g;
 30     float   radius;
 31     float   x, y, z;
 32     __device__ float hit(float ox, float oy, float *n) {
 33         float dx = ox - x;
 34         float dy = oy - y;
 35         if (dx*dx + dy*dy < radius*radius) {
 36             float dz = sqrtf(radius*radius - dx*dx - dy*dy);
 37             *n = dz / sqrtf(radius * radius);
 38             return dz + z;
 39         }
 40         return -INF;
 41     }
 42 };
 43 #define SPHERES 20
 44 
 45 __constant__ Sphere s[SPHERES];
 46 
 47 __global__ void kernel(unsigned char *ptr) {
 48     // map from threadIdx/BlockIdx to pixel position
 49     int x = threadIdx.x + blockIdx.x * blockDim.x;
 50     int y = threadIdx.y + blockIdx.y * blockDim.y;
 51     int offset = x + y * blockDim.x * gridDim.x;
 52     float   ox = (x - DIM / 2);
 53     float   oy = (y - DIM / 2);
 54 
 55     float   r = 0, g = 0, b = 0;
 56     float   maxz = -INF;
 57     for (int i = 0; i<SPHERES; i++) {
 58         float   n;
 59         float   t = s[i].hit(ox, oy, &n);
 60         if (t > maxz) {
 61             float fscale = n;
 62             r = s[i].r * fscale;
 63             g = s[i].g * fscale;
 64             b = s[i].b * fscale;
 65             maxz = t;
 66         }
 67     }
 68 
 69     ptr[offset * 4 + 0] = (int)(r * 255);
 70     ptr[offset * 4 + 1] = (int)(g * 255);
 71     ptr[offset * 4 + 2] = (int)(b * 255);
 72     ptr[offset * 4 + 3] = 255;
 73 }
 74 
 75 // globals needed by the update routine
 76 struct DataBlock {
 77     unsigned char   *dev_bitmap;
 78 };
 79 
 80 int main(void) {
 81     DataBlock   data;
 82     // capture the start time
 83     cudaEvent_t     start, stop;
 84     HANDLE_ERROR(cudaEventCreate(&start));
 85     HANDLE_ERROR(cudaEventCreate(&stop));
 86     HANDLE_ERROR(cudaEventRecord(start, 0));
 87 
 88     CPUBitmap bitmap(DIM, DIM, &data);
 89     unsigned char   *dev_bitmap;
 90 
 91     // allocate memory on the GPU for the output bitmap
 92     HANDLE_ERROR(cudaMalloc((void**)&dev_bitmap,
 93         bitmap.image_size()));
 94 
 95     // allocate temp memory, initialize it, copy to constant
 96     // memory on the GPU, then free our temp memory
 97     Sphere *temp_s = (Sphere*)malloc(sizeof(Sphere)* SPHERES);
 98     for (int i = 0; i<SPHERES; i++) {
 99         temp_s[i].r = rnd(1.0f);
100         temp_s[i].g = rnd(1.0f);
101         temp_s[i].b = rnd(1.0f);
102         temp_s[i].x = rnd(1000.0f) - 500;
103         temp_s[i].y = rnd(1000.0f) - 500;
104         temp_s[i].z = rnd(1000.0f) - 500;
105         temp_s[i].radius = rnd(100.0f) + 20;
106     }
107     /*
108     将SPHERES个球面对象存放在常量内存中
109     通过cudaMemcpyToSymbol来操作
110     */
111     HANDLE_ERROR(cudaMemcpyToSymbol(s, temp_s,
112         sizeof(Sphere)* SPHERES));
113     free(temp_s);
114 
115     // generate a bitmap from our sphere data
116     dim3    grids(DIM / 16, DIM / 16);
117     dim3    threads(16, 16);
118     kernel <<<grids, threads >>>(dev_bitmap);
119 
120     // copy our bitmap back from the GPU for display
121     HANDLE_ERROR(cudaMemcpy(bitmap.get_ptr(), dev_bitmap,
122         bitmap.image_size(),
123         cudaMemcpyDeviceToHost));
124 
125     // get stop time, and display the timing results
126     HANDLE_ERROR(cudaEventRecord(stop, 0));
127     HANDLE_ERROR(cudaEventSynchronize(stop));
128     float   elapsedTime;
129     HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime,
130         start, stop));
131     printf("Time to generate:  %3.1f ms
", elapsedTime);
132 
133     HANDLE_ERROR(cudaEventDestroy(start));
134     HANDLE_ERROR(cudaEventDestroy(stop));
135 
136     HANDLE_ERROR(cudaFree(dev_bitmap));
137 
138     // display
139     bitmap.display_and_exit();
140 }
结果如下所示：