traytracer.cuh - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
HTML git clone git://src.adamsgaard.dk/sphere
DIR Log
DIR Files
DIR Refs
DIR LICENSE
---
traytracer.cuh (18395B)
---
1 #ifndef RAYTRACER_CUH_
2 #define RAYTRACER_CUH_
3
4 #include "colorbar.h"
5
6 //#include "cuPrintf.cu"
7
8 // Template for discarding the last term in four-component vector structs
9 __device__ __inline__ float3 f4_to_f3(const float4 in) {
10 return make_float3(in.x, in.y, in.z);
11 }
12
13 // Kernel for initializing image data
14 __global__ void imageInit(
15 unsigned char* __restrict__ dev_img,
16 const unsigned int pixels)
17 {
18 // Compute pixel position from threadIdx/blockIdx
19 unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x;
20 if (mempos > pixels)
21 return;
22
23 dev_img[mempos*4] = 255; // Red channel
24 dev_img[mempos*4 + 1] = 255; // Green channel
25 dev_img[mempos*4 + 2] = 255; // Blue channel
26 }
27
28 // Calculate ray origins and directions
29 __global__ void rayInitPerspective(
30 float4* __restrict__ dev_ray_origo,
31 float4* __restrict__ dev_ray_direction,
32 const float4 eye,
33 const unsigned int width,
34 const unsigned int height)
35 {
36 // Compute pixel position from threadIdx/blockIdx
37 unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x;
38 if (mempos > width*height)
39 return;
40
41 // Calculate 2D position from linear index
42 unsigned int i = mempos % width;
43 unsigned int j = (int)floor((float)mempos/width) % width;
44
45 // Calculate pixel coordinates in image plane
46 float p_u = devC_imgplane.x + (devC_imgplane.y - devC_imgplane.x)
47 * (i + 0.5f) / width;
48 float p_v = devC_imgplane.z + (devC_imgplane.w - devC_imgplane.z)
49 * (j + 0.5f) / height;
50
51 // Write ray origo and direction to global memory
52 dev_ray_origo[mempos] = make_float4(devC_eye, 0.0f);
53 dev_ray_direction[mempos] =
54 make_float4(-devC_d*devC_w + p_u*devC_u + p_v*devC_v, 0.0f);
55 }
56
57 // Check wether the pixel's viewing ray intersects with the spheres,
58 // and shade the pixel correspondingly
59 __global__ void rayIntersectSpheres(
60 const float4* __restrict__ dev_ray_origo,
61 const float4* __restrict__ dev_ray_direction,
62 const Float4* __restrict__ dev_x,
63 unsigned char* __restrict__ dev_img)
64 {
65 // Compute pixel position from threadIdx/blockIdx
66 unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x;
67 if (mempos > devC_pixels)
68 return;
69
70 // Read ray data from global memory
71 float3 e = f4_to_f3(dev_ray_origo[mempos]);
72 float3 d = f4_to_f3(dev_ray_direction[mempos]);
73 //float step = length(d);
74
75 // Distance, in ray steps, between object and eye initialized with
76 // a large value
77 float tdist = 1e10f;
78
79 // Surface normal at closest sphere intersection
80 float3 n;
81
82 // Intersection point coordinates
83 float3 p;
84
85 //cuPrintf("mepos %d\n", mempos);
86
87 // Iterate through all particles
88 for (unsigned int i=0; i<devC_np; ++i) {
89
90 // Read sphere coordinate and radius
91 Float4 x = dev_x[i];
92 float3 c = make_float3(x.x, x.y, x.z);
93 float R = x.w;
94
95 // Calculate the discriminant: d = B^2 - 4AC
96 float Delta = (2.0f*dot(d,(e-c)))*(2.0f*dot(d,(e-c))) // B^2
97 - 4.0f*dot(d,d) // -4*A
98 * (dot((e-c),(e-c)) - R*R); // C
99
100 // If the determinant is positive, there are two solutions
101 // One where the line enters the sphere, and one where it exits
102 if (Delta > 0.0f) {
103
104 // Calculate roots, Shirley 2009 p. 77
105 float t_minus = ((dot(-d,(e-c))
106 - sqrt( dot(d,(e-c))*dot(d,(e-c))
107 - dot(d,d)*(dot((e-c),(e-c)) - R*R)))
108 / dot(d,d));
109
110 // Check wether intersection is closer than previous values
111 if (fabs(t_minus) < tdist) {
112 p = e + t_minus*d;
113 tdist = fabs(t_minus);
114 n = normalize(2.0f * (p - c)); // Surface normal
115 }
116
117 } // End of solution branch
118
119 } // End of particle loop
120
121 // Write pixel color
122 if (tdist < 1e10f) {
123
124 // Lambertian shading parameters
125 float dotprod = fmax(0.0f,dot(n, devC_light));
126 float I_d = 40.0f; // Light intensity
127 float k_d = 5.0f; // Diffuse coefficient
128
129 // Ambient shading
130 float k_a = 10.0f;
131 float I_a = 5.0f; // 5.0 for black background
132
133 // Write shading model values to pixel color channels
134 dev_img[mempos*4] =
135 (unsigned char) ((k_d * I_d * dotprod + k_a * I_a)*0.48f);
136 dev_img[mempos*4 + 1] =
137 (unsigned char) ((k_d * I_d * dotprod + k_a * I_a)*0.41f);
138 dev_img[mempos*4 + 2] =
139 (unsigned char) ((k_d * I_d * dotprod + k_a * I_a)*0.27f);
140
141 }
142 }
143
144 // Check wether the pixel's viewing ray intersects with the spheres,
145 // and shade the pixel correspondingly using a colormap
146 __global__ void rayIntersectSpheresColormap(
147 const float4* __restrict__ dev_ray_origo,
148 const float4* __restrict__ dev_ray_direction,
149 const Float4* __restrict__ dev_x,
150 const Float4* __restrict__ dev_vel,
151 const Float* __restrict__ dev_linarr,
152 const float max_val,
153 const float lower_cutoff,
154 unsigned char* __restrict__ dev_img)
155 {
156 // Compute pixel position from threadIdx/blockIdx
157 unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x;
158 if (mempos > devC_pixels)
159 return;
160
161 // Read ray data from global memory
162 float3 e = f4_to_f3(dev_ray_origo[mempos]);
163 float3 d = f4_to_f3(dev_ray_direction[mempos]);
164
165 // Distance, in ray steps, between object and eye initialized with
166 // a large value
167 float tdist = 1e10f;
168
169 // Surface normal at closest sphere intersection
170 float3 n;
171
172 // Intersection point coordinates
173 float3 p;
174
175 unsigned int hitidx;
176
177 // Iterate through all particles
178 for (unsigned int i=0; i<devC_np; ++i) {
179
180 __syncthreads();
181
182 // Read sphere coordinate and radius
183 Float4 x = dev_x[i];
184 float3 c = make_float3(x.x, x.y, x.z);
185 float R = x.w;
186
187 // Calculate the discriminant: d = B^2 - 4AC
188 float Delta = (2.0f*dot(d,(e-c)))*(2.0f*dot(d,(e-c))) // B^2
189 - 4.0f*dot(d,d) // -4*A
190 * (dot((e-c),(e-c)) - R*R); // C
191
192 // If the determinant is positive, there are two solutions
193 // One where the line enters the sphere, and one where it exits
194 if (lower_cutoff > 0.0) {
195
196 // value on colorbar
197 float val = dev_linarr[i];
198
199 // particle is fixed if value > 0
200 float fixvel = dev_vel[i].w;
201
202 // only render particles which are above the lower cutoff
203 // and which are not fixed at a velocity
204 if (Delta > 0.0f && val > lower_cutoff && fixvel == 0.f) {
205
206 // Calculate roots, Shirley 2009 p. 77
207 float t_minus =
208 ((dot(-d,(e-c)) - sqrt(dot(d,(e-c))*dot(d,(e-c))
209 - dot(d,d)*(dot((e-c),(e-c)) - R*R)))
210 / dot(d,d));
211
212 // Check wether intersection is closer than previous values
213 if (fabs(t_minus) < tdist) {
214 p = e + t_minus*d;
215 tdist = fabs(t_minus);
216 n = normalize(2.0f * (p - c)); // Surface normal
217 hitidx = i;
218 }
219
220 } // End of solution branch
221
222 } else {
223
224 // render particle if it intersects with the ray
225 if (Delta > 0.0f) {
226
227 // Calculate roots, Shirley 2009 p. 77
228 float t_minus =
229 ((dot(-d,(e-c)) - sqrt(dot(d,(e-c))*dot(d,(e-c))
230 - dot(d,d)*(dot((e-c),(e-c)) - R*R)))
231 / dot(d,d));
232
233 // Check wether intersection is closer than previous values
234 if (fabs(t_minus) < tdist) {
235 p = e + t_minus*d;
236 tdist = fabs(t_minus);
237 n = normalize(2.0f * (p - c)); // Surface normal
238 hitidx = i;
239 }
240
241 } // End of solution branch
242 }
243
244 } // End of particle loop
245
246 // Write pixel color
247 if (tdist < 1e10) {
248
249 __syncthreads();
250 // Fetch particle data used for color
251 float fixvel = dev_vel[hitidx].w;
252 float ratio = dev_linarr[hitidx] / max_val;
253
254 // Make sure the ratio doesn't exceed the 0.0-1.0 interval
255 if (ratio < 0.01f)
256 ratio = 0.01f;
257 if (ratio > 0.99f)
258 ratio = 0.99f;
259
260 // Lambertian shading parameters
261 float dotprod = fmax(0.0f,dot(n, devC_light));
262 float I_d = 40.0f; // Light intensity
263 float k_d = 5.0f; // Diffuse coefficient
264
265 // Ambient shading
266 float k_a = 10.0f;
267 float I_a = 5.0f;
268
269 float redv = red(ratio);
270 float greenv = green(ratio);
271 float bluev = blue(ratio);
272
273 // Make particle dark grey if the horizontal velocity is fixed
274 if (fixvel > 0.f) {
275 redv = 0.5;
276 greenv = 0.5;
277 bluev = 0.5;
278 }
279
280 // Write shading model values to pixel color channels
281 dev_img[mempos*4] = (unsigned char) ((k_d * I_d * dotprod
282 + k_a * I_a)*redv);
283 dev_img[mempos*4 + 1] = (unsigned char) ((k_d * I_d * dotprod
284 + k_a * I_a)*greenv);
285 dev_img[mempos*4 + 2] = (unsigned char) ((k_d * I_d * dotprod
286 + k_a * I_a)*bluev);
287 }
288 }
289
290
291 __host__ void DEM::cameraInit(
292 const float3 eye,
293 const float3 lookat,
294 const float imgw,
295 const float focalLength)
296 {
297 float hw_ratio = height/width;
298
299 // Image dimensions in world space (l, r, b, t)
300 float4 imgplane = make_float4(
301 -0.5f*imgw, 0.5f*imgw,
302 -0.5f*imgw*hw_ratio,
303 0.5f*imgw*hw_ratio);
304
305 // The view vector
306 float3 view = eye - lookat;
307
308 // Construct the camera view orthonormal base
309 float3 up = make_float3(0.0f, 0.0f, 1.0f); // Pointing upward along +z
310 float3 w = -view/length(view); // w: Pointing backwards
311 float3 u = cross(up, w) / length(cross(up, w));
312 float3 v = cross(w, u);
313
314 unsigned int pixels = width*height;
315
316 // Focal length 20% of eye vector length
317 float d = length(view)*0.8f;
318
319 // Light direction (points towards light source)
320 float3 light = normalize(-1.0f*eye*make_float3(1.0f, 0.2f, 0.6f));
321
322 if (verbose == 1)
323 std::cout << " Transfering camera values to constant memory: ";
324
325 cudaMemcpyToSymbol(devC_u, &u, sizeof(u));
326 cudaMemcpyToSymbol(devC_v, &v, sizeof(v));
327 cudaMemcpyToSymbol(devC_w, &w, sizeof(w));
328 cudaMemcpyToSymbol(devC_eye, &eye, sizeof(eye));
329 cudaMemcpyToSymbol(devC_imgplane, &imgplane, sizeof(imgplane));
330 cudaMemcpyToSymbol(devC_d, &d, sizeof(d));
331 cudaMemcpyToSymbol(devC_light, &light, sizeof(light));
332 cudaMemcpyToSymbol(devC_pixels, &pixels, sizeof(pixels));
333
334 if (verbose == 1)
335 std::cout << "Done" << std::endl;
336 checkForCudaErrors("During cameraInit");
337 }
338
339 // Allocate global device memory
340 __host__ void DEM::rt_allocateGlobalDeviceMemory()
341 {
342 if (verbose == 1)
343 std::cout << " Allocating device memory: ";
344 cudaMalloc((void**)&dev_img, width*height*4*sizeof(unsigned char));
345 cudaMalloc((void**)&dev_ray_origo, width*height*sizeof(float4));
346 cudaMalloc((void**)&dev_ray_direction, width*height*sizeof(float4));
347 if (verbose == 1)
348 std::cout << "Done" << std::endl;
349 checkForCudaErrors("During rt_allocateGlobalDeviceMemory()");
350 }
351
352
353 // Free dynamically allocated device memory
354 __host__ void DEM::rt_freeGlobalDeviceMemory()
355 {
356 if (verbose == 1)
357 std::cout << " Freeing device memory: ";
358 cudaFree(dev_img);
359 cudaFree(dev_ray_origo);
360 cudaFree(dev_ray_direction);
361 if (verbose == 1)
362 std::cout << "Done" << std::endl;
363 checkForCudaErrors("During rt_freeGlobalDeviceMemory()");
364 }
365
366 // Transfer image data from device to host
367 __host__ void DEM::rt_transferFromGlobalDeviceMemory()
368 {
369 if (verbose == 1)
370 std::cout << " Transfering image data: device -> host: ";
371 cudaMemcpy(img, dev_img, width*height*4*sizeof(unsigned char),
372 cudaMemcpyDeviceToHost);
373 if (verbose == 1)
374 std::cout << "Done" << std::endl;
375 checkForCudaErrors("During rt_transferFromGlobalDeviceMemory()");
376 }
377
378 // Wrapper for the rt kernel
379 __host__ void DEM::render(
380 const int method,
381 const float maxval,
382 const float lower_cutoff,
383 const float focalLength,
384 const unsigned int img_width,
385 const unsigned int img_height)
386 {
387 using std::cout;
388 using std::cerr;
389 using std::endl;
390
391 //cudaPrintfInit();
392
393 // Save image dimensions in class object
394 width = img_width;
395 height = img_height;
396
397 // Allocate memory for output image
398 img = new rgba[height*width];
399 rt_allocateGlobalDeviceMemory();
400
401 // Arrange thread/block structure
402 unsigned int pixels = width*height;
403 const unsigned int threadsPerBlock = 256;
404 const unsigned int blocksPerGrid = iDivUp(pixels, threadsPerBlock);
405
406 // Initialize image to background color
407 imageInit<<< blocksPerGrid, threadsPerBlock >>>(dev_img, pixels);
408
409 // Initialize camera values and transfer to constant memory
410 float imgw = grid.L[0]*1.35f; // Screen width in world coordinates
411 Float3 maxpos = maxPos();
412 // Look at the centre of the mean positions
413 float3 lookat = make_float3(maxpos.x, maxpos.y, maxpos.z) / 2.0f;
414 float3 eye = make_float3(
415 grid.L[0] * 2.3f,
416 grid.L[1] * -5.0f,
417 grid.L[2] * 1.3f);
418 cameraInit(eye, lookat, imgw, focalLength);
419
420 // Construct rays for perspective projection
421 rayInitPerspective<<< blocksPerGrid, threadsPerBlock >>>(
422 dev_ray_origo, dev_ray_direction,
423 make_float4(eye.x, eye.y, eye.z, 0.0f), width, height);
424 cudaThreadSynchronize();
425
426 Float* linarr; // Linear array to use for color visualization
427 Float* dev_linarr; // Device linear array to use for color visualization
428 checkForCudaErrors("Error during cudaMalloc of linear array");
429 std::string desc; // Description of parameter visualized
430 std::string unit; // Unit of parameter values visualized
431 unsigned int i;
432 int transfer = 0; // If changed to 1, linarr will be copied to dev_linarr
433
434 // Visualize spheres without color scale overlay
435 if (method == 0) {
436 rayIntersectSpheres<<< blocksPerGrid, threadsPerBlock >>>(
437 dev_ray_origo, dev_ray_direction,
438 dev_x, dev_img);
439 } else {
440
441 if (method == 1) { // Visualize pressure
442 dev_linarr = dev_p;
443 desc = "Pressure";
444 unit = "Pa";
445
446 } else if (method == 2) { // Visualize velocity
447 // Find the magnitude of all linear velocities
448 linarr = new Float[np];
449 #pragma omp parallel for if(np>100)
450 for (i = 0; i<np; ++i) {
451 linarr[i] = sqrt(k.vel[i].x*k.vel[i].x
452 + k.vel[i].y*k.vel[i].y
453 + k.vel[i].z*k.vel[i].z);
454 }
455 transfer = 1;
456 desc = "Linear velocity";
457 unit = "m/s";
458
459 } else if (method == 3) { // Visualize angular velocity
460 // Find the magnitude of all rotational velocities
461 linarr = new Float[np];
462 #pragma omp parallel for if(np>100)
463 for (i = 0; i<np; ++i) {
464 linarr[i] = sqrt(k.angvel[i].x*k.angvel[i].x
465 + k.angvel[i].y*k.angvel[i].y
466 + k.angvel[i].z*k.angvel[i].z);
467 }
468 transfer = 1;
469 desc = "Angular velocity";
470 unit = "rad/s";
471
472 } else if (method == 4) { // Visualize xdisp
473 // Convert xyzsum to xsum
474 linarr = new Float[np];
475 #pragma omp parallel for if(np>100)
476 for (i = 0; i<np; ++i) {
477 linarr[i] = k.xyzsum[i].x;
478 }
479 transfer = 1;
480 desc = "X-axis displacement";
481 unit = "m";
482
483 } else if (method == 5) { // Visualize total rotation
484 // Find the magnitude of all rotations
485 linarr = new Float[np];
486 #pragma omp parallel for if(np>100)
487 for (i = 0; i<np; ++i) {
488 linarr[i] = sqrt(k.angpos[i].x*k.angpos[i].x
489 + k.angpos[i].y*k.angpos[i].y
490 + k.angpos[i].z*k.angpos[i].z);
491 }
492 transfer = 1;
493 desc = "Angular positions";
494 unit = "rad";
495 }
496
497
498 // Report color visualization method and color map range
499 if (verbose == 1) {
500 cout << " " << desc << " color map range: [0, "
501 << maxval << "] " << unit << endl;
502 }
503
504 // Copy linarr to dev_linarr if required
505 if (transfer == 1) {
506 cudaMalloc((void**)&dev_linarr, np*sizeof(Float));
507 checkForCudaErrors("Error during cudaMalloc of linear array");
508 cudaMemcpy(dev_linarr, linarr, np*sizeof(Float),
509 cudaMemcpyHostToDevice);
510 checkForCudaErrors("Error during cudaMemcpy of linear array");
511 }
512
513 // Start raytracing kernel
514 rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>(
515 dev_ray_origo, dev_ray_direction,
516 dev_x, dev_vel,
517 dev_linarr, maxval, lower_cutoff,
518 dev_img);
519
520 }
521
522 // Make sure all threads are done before continuing CPU control sequence
523 cudaThreadSynchronize();
524
525 // Check for errors
526 checkForCudaErrors("CUDA error after kernel execution");
527
528 // Copy image data from global device memory to host memory
529 rt_transferFromGlobalDeviceMemory();
530
531 // Free dynamically allocated global device memory
532 rt_freeGlobalDeviceMemory();
533 checkForCudaErrors("after rt_freeGlobalDeviceMemory");
534 if (transfer == 1) {
535 delete[] linarr;
536 cudaFree(dev_linarr);
537 checkForCudaErrors("When calling cudaFree(dev_linarr)");
538 }
539
540 //cudaPrintfDisplay(stdout, true);
541
542 // Write image to PPM file
543 writePPM(("img_out/" + sid + ".ppm").c_str());
544
545 }
546
547 #endif
548 // vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4