URI:
       traytracer.cuh - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
  HTML git clone git://src.adamsgaard.dk/sphere
   DIR Log
   DIR Files
   DIR Refs
   DIR LICENSE
       ---
       traytracer.cuh (18395B)
       ---
            1 #ifndef RAYTRACER_CUH_
            2 #define RAYTRACER_CUH_
            3 
            4 #include "colorbar.h"
            5 
            6 //#include "cuPrintf.cu"
            7 
            8 // Template for discarding the last term in four-component vector structs
            9 __device__ __inline__ float3 f4_to_f3(const float4 in) {
           10     return make_float3(in.x, in.y, in.z);
           11 }
           12 
           13 // Kernel for initializing image data
           14 __global__ void imageInit(
           15     unsigned char* __restrict__ dev_img,
           16     const unsigned int pixels)
           17 {
           18     // Compute pixel position from threadIdx/blockIdx
           19     unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x;
           20     if (mempos > pixels)
           21         return;
           22 
           23     dev_img[mempos*4]     = 255;        // Red channel
           24     dev_img[mempos*4 + 1] = 255;        // Green channel
           25     dev_img[mempos*4 + 2] = 255;        // Blue channel
           26 }
           27 
           28 // Calculate ray origins and directions
           29 __global__ void rayInitPerspective(
           30     float4* __restrict__ dev_ray_origo, 
           31     float4* __restrict__ dev_ray_direction, 
           32     const float4 eye, 
           33     const unsigned int width,
           34     const unsigned int height)
           35 {
           36     // Compute pixel position from threadIdx/blockIdx
           37     unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x;
           38     if (mempos > width*height) 
           39         return;
           40 
           41     // Calculate 2D position from linear index
           42     unsigned int i = mempos % width;
           43     unsigned int j = (int)floor((float)mempos/width) % width;
           44 
           45     // Calculate pixel coordinates in image plane
           46     float p_u = devC_imgplane.x + (devC_imgplane.y - devC_imgplane.x)
           47         * (i + 0.5f) / width;
           48     float p_v = devC_imgplane.z + (devC_imgplane.w - devC_imgplane.z)
           49         * (j + 0.5f) / height;
           50 
           51     // Write ray origo and direction to global memory
           52     dev_ray_origo[mempos]     = make_float4(devC_eye, 0.0f);
           53     dev_ray_direction[mempos] =
           54         make_float4(-devC_d*devC_w + p_u*devC_u + p_v*devC_v, 0.0f);
           55 }
           56 
           57 // Check wether the pixel's viewing ray intersects with the spheres,
           58 // and shade the pixel correspondingly
           59 __global__ void rayIntersectSpheres(
           60     const float4* __restrict__ dev_ray_origo, 
           61     const float4* __restrict__ dev_ray_direction,
           62     const Float4* __restrict__ dev_x, 
           63     unsigned char* __restrict__ dev_img)
           64 {
           65     // Compute pixel position from threadIdx/blockIdx
           66     unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x;
           67     if (mempos > devC_pixels)
           68         return;
           69 
           70     // Read ray data from global memory
           71     float3 e = f4_to_f3(dev_ray_origo[mempos]);
           72     float3 d = f4_to_f3(dev_ray_direction[mempos]);
           73     //float  step = length(d);
           74 
           75     // Distance, in ray steps, between object and eye initialized with
           76     // a large value
           77     float tdist = 1e10f;
           78 
           79     // Surface normal at closest sphere intersection
           80     float3 n;
           81 
           82     // Intersection point coordinates
           83     float3 p;
           84 
           85     //cuPrintf("mepos %d\n", mempos);
           86 
           87     // Iterate through all particles
           88     for (unsigned int i=0; i<devC_np; ++i) {
           89 
           90         // Read sphere coordinate and radius
           91         Float4 x = dev_x[i];
           92         float3 c = make_float3(x.x, x.y, x.z);
           93         float  R = x.w;
           94 
           95         // Calculate the discriminant: d = B^2 - 4AC
           96         float Delta = (2.0f*dot(d,(e-c)))*(2.0f*dot(d,(e-c)))  // B^2
           97             - 4.0f*dot(d,d)        // -4*A
           98             * (dot((e-c),(e-c)) - R*R);  // C
           99 
          100         // If the determinant is positive, there are two solutions
          101         // One where the line enters the sphere, and one where it exits
          102         if (Delta > 0.0f) { 
          103 
          104             // Calculate roots, Shirley 2009 p. 77
          105             float t_minus = ((dot(-d,(e-c))
          106                               - sqrt( dot(d,(e-c))*dot(d,(e-c))
          107                                       - dot(d,d)*(dot((e-c),(e-c)) - R*R)))
          108                              / dot(d,d));
          109 
          110             // Check wether intersection is closer than previous values
          111             if (fabs(t_minus) < tdist) {
          112                 p = e + t_minus*d;
          113                 tdist = fabs(t_minus);
          114                 n = normalize(2.0f * (p - c));   // Surface normal
          115             }
          116 
          117         } // End of solution branch
          118 
          119     } // End of particle loop
          120 
          121     // Write pixel color
          122     if (tdist < 1e10f) {
          123 
          124         // Lambertian shading parameters
          125         float dotprod = fmax(0.0f,dot(n, devC_light));
          126         float I_d = 40.0f;  // Light intensity
          127         float k_d = 5.0f;  // Diffuse coefficient
          128 
          129         // Ambient shading
          130         float k_a = 10.0f;
          131         float I_a = 5.0f; // 5.0 for black background
          132 
          133         // Write shading model values to pixel color channels
          134         dev_img[mempos*4]     = 
          135             (unsigned char) ((k_d * I_d * dotprod + k_a * I_a)*0.48f);
          136         dev_img[mempos*4 + 1] = 
          137             (unsigned char) ((k_d * I_d * dotprod + k_a * I_a)*0.41f);
          138         dev_img[mempos*4 + 2] = 
          139             (unsigned char) ((k_d * I_d * dotprod + k_a * I_a)*0.27f);
          140 
          141     }
          142 }
          143 
          144 // Check wether the pixel's viewing ray intersects with the spheres,
          145 // and shade the pixel correspondingly using a colormap
          146 __global__ void rayIntersectSpheresColormap(
          147     const float4* __restrict__ dev_ray_origo, 
          148     const float4* __restrict__ dev_ray_direction,
          149     const Float4* __restrict__ dev_x, 
          150     const Float4* __restrict__ dev_vel,
          151     const Float*  __restrict__ dev_linarr,
          152     const float max_val,
          153     const float lower_cutoff,
          154     unsigned char* __restrict__ dev_img)
          155 {
          156     // Compute pixel position from threadIdx/blockIdx
          157     unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x;
          158     if (mempos > devC_pixels)
          159         return;
          160 
          161     // Read ray data from global memory
          162     float3 e = f4_to_f3(dev_ray_origo[mempos]);
          163     float3 d = f4_to_f3(dev_ray_direction[mempos]);
          164 
          165     // Distance, in ray steps, between object and eye initialized with
          166     // a large value
          167     float tdist = 1e10f;
          168 
          169     // Surface normal at closest sphere intersection
          170     float3 n;
          171 
          172     // Intersection point coordinates
          173     float3 p;
          174 
          175     unsigned int hitidx;
          176 
          177     // Iterate through all particles
          178     for (unsigned int i=0; i<devC_np; ++i) {
          179 
          180         __syncthreads();
          181 
          182         // Read sphere coordinate and radius
          183         Float4 x = dev_x[i];
          184         float3 c = make_float3(x.x, x.y, x.z);
          185         float  R = x.w;
          186 
          187         // Calculate the discriminant: d = B^2 - 4AC
          188         float Delta = (2.0f*dot(d,(e-c)))*(2.0f*dot(d,(e-c)))  // B^2
          189             - 4.0f*dot(d,d)        // -4*A
          190             * (dot((e-c),(e-c)) - R*R);  // C
          191 
          192         // If the determinant is positive, there are two solutions
          193         // One where the line enters the sphere, and one where it exits
          194         if (lower_cutoff > 0.0) {
          195 
          196             // value on colorbar
          197             float val = dev_linarr[i];
          198 
          199             // particle is fixed if value > 0
          200             float fixvel = dev_vel[i].w;
          201 
          202             // only render particles which are above the lower cutoff
          203             // and which are not fixed at a velocity
          204             if (Delta > 0.0f && val > lower_cutoff && fixvel == 0.f) {
          205 
          206                 // Calculate roots, Shirley 2009 p. 77
          207                 float t_minus =
          208                     ((dot(-d,(e-c)) - sqrt(dot(d,(e-c))*dot(d,(e-c))
          209                                            - dot(d,d)*(dot((e-c),(e-c)) - R*R)))
          210                      / dot(d,d));
          211 
          212                 // Check wether intersection is closer than previous values
          213                 if (fabs(t_minus) < tdist) {
          214                     p = e + t_minus*d;
          215                     tdist = fabs(t_minus);
          216                     n = normalize(2.0f * (p - c));   // Surface normal
          217                     hitidx = i;
          218                 }
          219 
          220             } // End of solution branch
          221 
          222         } else {
          223 
          224             // render particle if it intersects with the ray
          225             if (Delta > 0.0f) {
          226 
          227                 // Calculate roots, Shirley 2009 p. 77
          228                 float t_minus =
          229                     ((dot(-d,(e-c)) - sqrt(dot(d,(e-c))*dot(d,(e-c))
          230                                            - dot(d,d)*(dot((e-c),(e-c)) - R*R)))
          231                      / dot(d,d));
          232 
          233                 // Check wether intersection is closer than previous values
          234                 if (fabs(t_minus) < tdist) {
          235                     p = e + t_minus*d;
          236                     tdist = fabs(t_minus);
          237                     n = normalize(2.0f * (p - c));   // Surface normal
          238                     hitidx = i;
          239                 }
          240 
          241             } // End of solution branch
          242         }
          243 
          244     } // End of particle loop
          245 
          246     // Write pixel color
          247     if (tdist < 1e10) {
          248 
          249         __syncthreads();
          250         // Fetch particle data used for color
          251         float fixvel = dev_vel[hitidx].w;
          252         float ratio = dev_linarr[hitidx] / max_val;
          253 
          254         // Make sure the ratio doesn't exceed the 0.0-1.0 interval
          255         if (ratio < 0.01f)
          256             ratio = 0.01f;
          257         if (ratio > 0.99f)
          258             ratio = 0.99f;
          259 
          260         // Lambertian shading parameters
          261         float dotprod = fmax(0.0f,dot(n, devC_light));
          262         float I_d = 40.0f;  // Light intensity
          263         float k_d = 5.0f;  // Diffuse coefficient
          264 
          265         // Ambient shading
          266         float k_a = 10.0f;
          267         float I_a = 5.0f;
          268 
          269         float redv   = red(ratio);
          270         float greenv = green(ratio);
          271         float bluev  = blue(ratio);
          272 
          273         // Make particle dark grey if the horizontal velocity is fixed
          274         if (fixvel > 0.f) {
          275             redv = 0.5;
          276             greenv = 0.5;
          277             bluev = 0.5;
          278         }
          279 
          280         // Write shading model values to pixel color channels
          281         dev_img[mempos*4]     = (unsigned char) ((k_d * I_d * dotprod 
          282                                                   + k_a * I_a)*redv);
          283         dev_img[mempos*4 + 1] = (unsigned char) ((k_d * I_d * dotprod
          284                                                   + k_a * I_a)*greenv);
          285         dev_img[mempos*4 + 2] = (unsigned char) ((k_d * I_d * dotprod
          286                                                   + k_a * I_a)*bluev);
          287     }
          288 }
          289 
          290 
          291 __host__ void DEM::cameraInit(
          292     const float3 eye,
          293     const float3 lookat, 
          294     const float imgw,
          295     const float focalLength)
          296 {
          297     float hw_ratio = height/width;
          298 
          299     // Image dimensions in world space (l, r, b, t)
          300     float4 imgplane = make_float4(
          301         -0.5f*imgw, 0.5f*imgw,
          302         -0.5f*imgw*hw_ratio,
          303         0.5f*imgw*hw_ratio);
          304 
          305     // The view vector
          306     float3 view = eye - lookat;
          307 
          308     // Construct the camera view orthonormal base
          309     float3 up = make_float3(0.0f, 0.0f, 1.0f);  // Pointing upward along +z
          310     float3 w = -view/length(view);                   // w: Pointing backwards
          311     float3 u = cross(up, w) / length(cross(up, w));
          312     float3 v = cross(w, u);
          313 
          314     unsigned int pixels = width*height;
          315 
          316     // Focal length 20% of eye vector length
          317     float d = length(view)*0.8f;
          318 
          319     // Light direction (points towards light source)
          320     float3 light = normalize(-1.0f*eye*make_float3(1.0f, 0.2f, 0.6f));
          321 
          322     if (verbose == 1)
          323         std::cout << "  Transfering camera values to constant memory: ";
          324 
          325     cudaMemcpyToSymbol(devC_u, &u, sizeof(u));
          326     cudaMemcpyToSymbol(devC_v, &v, sizeof(v));
          327     cudaMemcpyToSymbol(devC_w, &w, sizeof(w));
          328     cudaMemcpyToSymbol(devC_eye, &eye, sizeof(eye));
          329     cudaMemcpyToSymbol(devC_imgplane, &imgplane, sizeof(imgplane));
          330     cudaMemcpyToSymbol(devC_d, &d, sizeof(d));
          331     cudaMemcpyToSymbol(devC_light, &light, sizeof(light));
          332     cudaMemcpyToSymbol(devC_pixels, &pixels, sizeof(pixels));
          333 
          334     if (verbose == 1)
          335         std::cout << "Done" << std::endl;
          336     checkForCudaErrors("During cameraInit");
          337 }
          338 
          339 // Allocate global device memory
          340 __host__ void DEM::rt_allocateGlobalDeviceMemory()
          341 {
          342     if (verbose == 1)
          343         std::cout << "  Allocating device memory: ";
          344     cudaMalloc((void**)&dev_img, width*height*4*sizeof(unsigned char));
          345     cudaMalloc((void**)&dev_ray_origo, width*height*sizeof(float4));
          346     cudaMalloc((void**)&dev_ray_direction, width*height*sizeof(float4));
          347     if (verbose == 1)
          348         std::cout << "Done" << std::endl;
          349     checkForCudaErrors("During rt_allocateGlobalDeviceMemory()");
          350 }
          351 
          352 
          353 // Free dynamically allocated device memory
          354 __host__ void DEM::rt_freeGlobalDeviceMemory()
          355 {
          356     if (verbose == 1)
          357         std::cout << "  Freeing device memory: ";
          358     cudaFree(dev_img);
          359     cudaFree(dev_ray_origo);
          360     cudaFree(dev_ray_direction);
          361     if (verbose == 1)
          362         std::cout << "Done" << std::endl;
          363     checkForCudaErrors("During rt_freeGlobalDeviceMemory()");
          364 }
          365 
          366 // Transfer image data from device to host
          367 __host__ void DEM::rt_transferFromGlobalDeviceMemory()
          368 {
          369     if (verbose == 1)
          370         std::cout << "  Transfering image data: device -> host: ";
          371     cudaMemcpy(img, dev_img, width*height*4*sizeof(unsigned char),
          372                cudaMemcpyDeviceToHost);
          373     if (verbose == 1)
          374         std::cout << "Done" << std::endl;
          375     checkForCudaErrors("During rt_transferFromGlobalDeviceMemory()");
          376 }
          377 
          378 // Wrapper for the rt kernel
          379 __host__ void DEM::render(
          380     const int method,
          381     const float maxval,
          382     const float lower_cutoff,
          383     const float focalLength,
          384     const unsigned int img_width,
          385     const unsigned int img_height)
          386 {
          387     using std::cout;
          388     using std::cerr;
          389     using std::endl;
          390 
          391     //cudaPrintfInit();
          392 
          393     // Save image dimensions in class object
          394     width = img_width;
          395     height = img_height;
          396 
          397     // Allocate memory for output image
          398     img = new rgba[height*width];
          399     rt_allocateGlobalDeviceMemory();
          400 
          401     // Arrange thread/block structure
          402     unsigned int pixels = width*height;
          403     const unsigned int threadsPerBlock = 256;
          404     const unsigned int blocksPerGrid = iDivUp(pixels, threadsPerBlock);
          405 
          406     // Initialize image to background color
          407     imageInit<<< blocksPerGrid, threadsPerBlock >>>(dev_img, pixels);
          408 
          409     // Initialize camera values and transfer to constant memory
          410     float imgw = grid.L[0]*1.35f; // Screen width in world coordinates
          411     Float3 maxpos = maxPos();
          412     // Look at the centre of the mean positions
          413     float3 lookat = make_float3(maxpos.x, maxpos.y, maxpos.z) / 2.0f; 
          414     float3 eye = make_float3(
          415         grid.L[0] * 2.3f,
          416         grid.L[1] * -5.0f,
          417         grid.L[2] * 1.3f);
          418     cameraInit(eye, lookat, imgw, focalLength);
          419 
          420     // Construct rays for perspective projection
          421     rayInitPerspective<<< blocksPerGrid, threadsPerBlock >>>(
          422         dev_ray_origo, dev_ray_direction, 
          423         make_float4(eye.x, eye.y, eye.z, 0.0f), width, height);
          424     cudaThreadSynchronize();
          425 
          426     Float* linarr;     // Linear array to use for color visualization
          427     Float* dev_linarr; // Device linear array to use for color visualization
          428     checkForCudaErrors("Error during cudaMalloc of linear array");
          429     std::string desc;  // Description of parameter visualized
          430     std::string unit;  // Unit of parameter values visualized
          431     unsigned int i;
          432     int transfer = 0;  // If changed to 1, linarr will be copied to dev_linarr
          433 
          434     // Visualize spheres without color scale overlay
          435     if (method == 0) {
          436         rayIntersectSpheres<<< blocksPerGrid, threadsPerBlock >>>(
          437             dev_ray_origo, dev_ray_direction,
          438             dev_x, dev_img);
          439     } else {
          440 
          441         if (method == 1) { // Visualize pressure
          442             dev_linarr = dev_p;
          443             desc = "Pressure";
          444             unit = "Pa";
          445 
          446         } else if (method == 2) { // Visualize velocity
          447             // Find the magnitude of all linear velocities
          448             linarr = new Float[np];
          449 #pragma omp parallel for if(np>100)
          450             for (i = 0; i<np; ++i) {
          451                 linarr[i] = sqrt(k.vel[i].x*k.vel[i].x 
          452                                  + k.vel[i].y*k.vel[i].y 
          453                                  + k.vel[i].z*k.vel[i].z);
          454             }
          455             transfer = 1;
          456             desc = "Linear velocity";
          457             unit = "m/s";
          458 
          459         } else if (method == 3) { // Visualize angular velocity
          460             // Find the magnitude of all rotational velocities
          461             linarr = new Float[np];
          462 #pragma omp parallel for if(np>100)
          463             for (i = 0; i<np; ++i) {
          464                 linarr[i] = sqrt(k.angvel[i].x*k.angvel[i].x
          465                                  + k.angvel[i].y*k.angvel[i].y 
          466                                  + k.angvel[i].z*k.angvel[i].z);
          467             }
          468             transfer = 1;
          469             desc = "Angular velocity";
          470             unit = "rad/s";
          471 
          472         } else if (method == 4) { // Visualize xdisp
          473             // Convert xyzsum to xsum
          474             linarr = new Float[np];
          475 #pragma omp parallel for if(np>100)
          476             for (i = 0; i<np; ++i) {
          477                 linarr[i] = k.xyzsum[i].x;
          478             }
          479             transfer = 1;
          480             desc = "X-axis displacement";
          481             unit = "m";
          482 
          483         } else if (method == 5) { // Visualize total rotation
          484             // Find the magnitude of all rotations
          485             linarr = new Float[np];
          486 #pragma omp parallel for if(np>100)
          487             for (i = 0; i<np; ++i) {
          488                 linarr[i] = sqrt(k.angpos[i].x*k.angpos[i].x
          489                                  + k.angpos[i].y*k.angpos[i].y 
          490                                  + k.angpos[i].z*k.angpos[i].z);
          491             }
          492             transfer = 1;
          493             desc = "Angular positions";
          494             unit = "rad";
          495         }
          496 
          497 
          498         // Report color visualization method and color map range
          499         if (verbose == 1) {
          500             cout << "  " << desc << " color map range: [0, " 
          501                  << maxval << "] " << unit << endl;
          502         }
          503 
          504         // Copy linarr to dev_linarr if required
          505         if (transfer == 1) {
          506             cudaMalloc((void**)&dev_linarr, np*sizeof(Float));
          507             checkForCudaErrors("Error during cudaMalloc of linear array");
          508             cudaMemcpy(dev_linarr, linarr, np*sizeof(Float),
          509                        cudaMemcpyHostToDevice);
          510             checkForCudaErrors("Error during cudaMemcpy of linear array");
          511         }
          512 
          513         // Start raytracing kernel
          514         rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>(
          515             dev_ray_origo, dev_ray_direction,
          516             dev_x, dev_vel,
          517             dev_linarr, maxval, lower_cutoff,
          518             dev_img);
          519 
          520     }
          521 
          522     // Make sure all threads are done before continuing CPU control sequence
          523     cudaThreadSynchronize();
          524 
          525     // Check for errors
          526     checkForCudaErrors("CUDA error after kernel execution");
          527 
          528     // Copy image data from global device memory to host memory
          529     rt_transferFromGlobalDeviceMemory();
          530 
          531     // Free dynamically allocated global device memory
          532     rt_freeGlobalDeviceMemory();
          533     checkForCudaErrors("after rt_freeGlobalDeviceMemory");
          534     if (transfer == 1) {
          535         delete[] linarr;
          536         cudaFree(dev_linarr);
          537         checkForCudaErrors("When calling cudaFree(dev_linarr)");
          538     }
          539 
          540     //cudaPrintfDisplay(stdout, true);
          541 
          542     // Write image to PPM file
          543     writePPM(("img_out/" + sid + ".ppm").c_str());
          544 
          545 }
          546 
          547 #endif
          548 // vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4