URI:
       tRemoved redundant zeroing of delta_t - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
  HTML git clone git://src.adamsgaard.dk/sphere
   DIR Log
   DIR Files
   DIR Refs
   DIR LICENSE
       ---
   DIR commit 005e46d1edbfa0e3313f7c5f538a2bf265200f51
   DIR parent 223197848f6e1f56d5003dc84ac7d89623de97d0
  HTML Author: Anders Damsgaard <adc@geo.au.dk>
       Date:   Thu, 11 Oct 2012 11:05:35 +0200
       
       Removed redundant zeroing of delta_t
       
       Diffstat:
         M raytracer/README                    |      29 ++++++++++++++---------------
         M raytracer/doc/raytracer.pdf         |       0 
         M raytracer/doc/raytracer.tex         |      49 +++----------------------------
         D raytracer/io.cpp                    |      31 -------------------------------
         D raytracer/render_all_outputs_GPU_c… |      34 -------------------------------
         D raytracer/rt_kernel.h               |      31 -------------------------------
         D raytracer/rt_kernel_cpu.cpp         |     259 -------------------------------
         D raytracer/rt_kernel_cpu.h           |      14 --------------
         M src/contactsearch.cuh               |       8 +++++---
       
       9 files changed, 23 insertions(+), 432 deletions(-)
       ---
   DIR diff --git a/raytracer/README b/raytracer/README
       t@@ -1,37 +1,36 @@
        ------------------------------------------------------------------
       -This is the README file for the SPHERE ray tracing module. 
       -  Last revision: Oct. 17., 2011
       + This is the README file for the ESys-particle ray tracing module
       +   Last revision: May. 18., 2012
        ------------------------------------------------------------------
        
       - BACKGROUND INFORMATION
       +BACKGROUND INFORMATION
       +======================
        
       -The ray tracing module was developed as an exam project in the 
       -course "Data Parallel Computing", held Q1 at the Department of 
       -Computer Science, Aarhus University, Denmark by Thomas Sangild 
       -Sørensen. 
       +The ray tracing module was initially developed for the Sphere DEM
       +software: http://github.com/anders-dc/sphere
        
       -It is used for visualizing simulation output from the SPHERE
       -discrete element method software.
       +This is a port for visualizing simulation output from the 
       +ESys-Particle discrete element method software.
        
       -It is developed by Anders Damsgaard Christensen, adc@cs.au.dk, and
       +It is developed by Anders Damsgaard Christensen, adc@geo.au.dk, and
        is licenced under the GNU General Public Licence (GPL).
        
        Documentation can be found in the file doc/raytracer.pdf
        
       -------------------------------------------------------------------
        
       -  REQUIREMENTS
       +REQUIREMENTS
       +============
        
        For compiling the raytracer, the following software is required:
         - GNU compiler collection
         - CUDA developer drivers
         - CUDA toolkit
        When executing the compiled code, a CUDA compatible GPU is
       -necessary.
       +necessary, along with appropriate developer device drivers.
        
       -------------------------------------------------------------------
        
       -  USAGE INSTRUCTIONS
       +USAGE INSTRUCTIONS
       +==================
         
        The source code is built using GNU make with the command:
          ./make
   DIR diff --git a/raytracer/doc/raytracer.pdf b/raytracer/doc/raytracer.pdf
       Binary files differ.
   DIR diff --git a/raytracer/doc/raytracer.tex b/raytracer/doc/raytracer.tex
       t@@ -273,17 +273,17 @@ This appendix contains the following source code files:
        
        \subsection{CUDA raytracing source code}
        %\subsubsection{rt\_kernel.h}
       -\lstinputlisting[caption={rt\_kernel.h},label={rt-kernel.h},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel.h}
       +\lstinputlisting[caption={rt-kernel.h},label={rt-kernel.h},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel.h}
        
        %\subsubsection{rt\_kernel.cu}
       -\lstinputlisting[caption={rt\_kernel.cu},label={rt-kernel.cu},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel.cu}
       +\lstinputlisting[caption={rt-kernel.cu},label={rt-kernel.cu},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel.cu}
        
        \subsection{CPU raytracing source code}
        %\subsubsection{rt\_kernel\_cpu.h}
       -\lstinputlisting[caption={rt\_kernel\_cpu.h},label={rt-kernel-cpu.h},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel-cpu.h}
       +\lstinputlisting[caption={rt-kernel-cpu.h},label={rt-kernel-cpu.h},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel-cpu.h}
        
        %\subsubsection{rt\_kernel\_cpu.cpp}
       -\lstinputlisting[caption={rt\_kernel\_cpu.cpp},label={rt-kernel-cpu.cpp},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel-cpu.cpp}
       +\lstinputlisting[caption={rt-kernel-cpu.cpp},label={rt-kernel-cpu.cpp},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel-cpu.cpp}
        
        
        
       t@@ -295,47 +295,6 @@ This appendix contains the following source code files:
        
        
        
       -\section{Bloopers}
       -This section contains pictures of the unfinished raytracer in a malfunctioning state, which can provide interesting, however unusable, results.
       -
       -\begin{figure}[!h]
       -\centering
       -\includegraphics[width=0.30\textwidth]{bloopers/black.png}
       -\caption{An end result encountered way too many times, e.g. after inadvertently dividing the focal length with zero.}
       -\label{fig:black}
       -\end{figure}
       -
       -\begin{figure}[!h]
       -\centering
       -\includegraphics[width=0.30\textwidth]{bloopers/twisted.png}
       -\caption{Twisted appearance caused by wrong thread/block layout.}
       -\label{fig:twisted}
       -\end{figure}
       -
       -\begin{figure}[!h]
       -\centering
       -\includegraphics[width=0.30\textwidth]{bloopers/wrong_order.png}
       -\caption{A problem with the distance determination caused the particles to be displayed in a wrong order.}
       -\label{fig:wrongorder}
       -\end{figure}
       -
       -\begin{figure}[!h]
       -\centering
       -\includegraphics[width=0.30\textwidth]{bloopers/colorful.png}
       -\caption{Colorful result caused by forgetting to normalize the normal vector calculation.}
       -\label{fig:colorful}
       -\end{figure}
       -
       -\begin{figure}[!h]
       -\centering
       -\includegraphics[width=0.30\textwidth]{bloopers/cpu_wrong_colorful.png}
       -\caption{Another colorful result with wrong order of particles, cause by using \texttt{abs()} (meant for integers) instead of \texttt{fabs()} (meant for floats).}
       -\label{fig:colorful_cpu}
       -\end{figure}
       -
       -
       -
       -
        
        
        % You can push biographies down or up by placing
   DIR diff --git a/raytracer/io.cpp b/raytracer/io.cpp
       t@@ -1,31 +0,0 @@
       -// io.cpp
       -//         Contains functions to read SPHERE binary files,
       -//         and write images in the PPM format
       -
       -#include <iostream>
       -#include <cstdio>
       -#include "header.h"
       -
       -
       -// Write image structure to PPM file
       -int image_to_ppm(rgb* rgb_img, const char* filename, const unsigned int width, const unsigned int height)
       -{
       -  FILE *fout = fopen(filename, "wb"); 
       -  if (fout == NULL) {
       -    std::cout << "File error in img_write() while trying to writing " << filename;
       -    return 1;
       -  }
       -
       -  fprintf(fout, "P6 %d %d 255\n", width, height);
       -
       -  // Write pixel array to ppm image file
       -  for (unsigned int i=0; i<height*width; i++) {
       -      putc(rgb_img[i].r, fout);
       -      putc(rgb_img[i].g, fout);
       -      putc(rgb_img[i].b, fout);
       -  }
       -
       -  fclose(fout);
       -  return 0;
       -}
       -
   DIR diff --git a/raytracer/render_all_outputs_GPU_clever.sh b/raytracer/render_all_outputs_GPU_clever.sh
       t@@ -1,34 +0,0 @@
       -#!/bin/bash
       -
       -XRES=800
       -YRES=600
       -
       -WORKHORSE=GPU
       -
       -echo "# Rendering PPM images"
       -for F in ../output/*.bin
       -do
       -  BASE=`basename $F`
       -  OUTFILE=$BASE.ppm.jpg
       -  if [ -e ../img_out/$OUTFILE ]
       -  then
       -    echo $OUTFILE exists.
       -  else
       -    ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm pressure 4000 > /dev/null
       -    if [ $? -ne 0 ] ; then
       -      echo "Error rendering $F, trying again..."
       -      ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm pressure 4000 > /dev/null
       -    fi
       -  fi
       -done
       -
       -echo "# Converting PPM files to JPEG using ImageMagick in parallel"
       -for F in ../img_out/*.ppm
       -do
       -  (BASE=`basename $F`; convert $F $F.jpg &)
       -done
       -
       -sleep 5
       -echo "# Removing temporary PPM files"
       -rm ../img_out/*.ppm
       -
   DIR diff --git a/raytracer/rt_kernel.h b/raytracer/rt_kernel.h
       t@@ -1,31 +0,0 @@
       -#ifndef RT_KERNEL_H_
       -#define RT_KERNEL_H_
       -
       -#include <vector_functions.h>
       -
       -// Constants
       -__constant__ float4 const_u;
       -__constant__ float4 const_v;
       -__constant__ float4 const_w;
       -__constant__ float4 const_eye;
       -__constant__ float4 const_imgplane;
       -__constant__ float  const_d;
       -__constant__ float4 const_light;
       -
       -
       -// Host prototype functions
       -
       -extern "C"
       -void cameraInit(float4 eye, float4 lookat, float imgw, float hw_ratio);
       -
       -extern "C"
       -void checkForCudaErrors(const char* checkpoint_description);
       -
       -extern "C"
       -int rt(float4* p, const unsigned int np, 
       -       rgb* img, const unsigned int width, const unsigned int height,
       -       f3 origo, f3 L, f3 eye, f3 lookat, float imgw,
       -       const int visualize, const float max_val, 
       -       float* fixvel, float* pres, float* es_dot, float* es);
       -
       -#endif
   DIR diff --git a/raytracer/rt_kernel_cpu.cpp b/raytracer/rt_kernel_cpu.cpp
       t@@ -1,259 +0,0 @@
       -#include <iostream>
       -#include <cstdio>
       -#include <cmath>
       -#include <time.h>
       -#include <cuda.h>
       -#include <cutil_math.h>
       -#include <string.h>
       -#include "header.h"
       -#include "rt_kernel_cpu.h"
       -
       -// Constants
       -float3 constc_u;
       -float3 constc_v;
       -float3 constc_w;
       -float3 constc_eye;
       -float4 constc_imgplane;
       -float  constc_d;
       -float3 constc_light;
       -
       -__inline__ float3 f4_to_f3(float4 in)
       -{
       -  return make_float3(in.x, in.y, in.z);
       -}
       -
       -__inline__ float4 f3_to_f4(float3 in)
       -{
       -  return make_float4(in.x, in.y, in.z, 0.0f);
       -}
       -
       -__inline__ float lengthf3(float3 in)
       -{
       -  return sqrt(in.x*in.x + in.y*in.y + in.z*in.z);
       -}
       -
       -// Kernel for initializing image data
       -void imageInit_cpu(unsigned char* _img, unsigned int pixels)
       -{
       -  for (unsigned int mempos=0; mempos<pixels; mempos++) {
       -    _img[mempos*4]     = 0;        // Red channel
       -    _img[mempos*4 + 1] = 0;        // Green channel
       -    _img[mempos*4 + 2] = 0;        // Blue channel
       -  }
       -}
       -
       -// Calculate ray origins and directions
       -void rayInitPerspective_cpu(float3* _ray_origo, 
       -                            float3* _ray_direction, 
       -                        float3 eye, 
       -                        unsigned int width,
       -                        unsigned int height)
       -{
       -  unsigned int i;
       -  #pragma omp parallel for
       -  for (i=0; i<width; i++) {
       -    for (unsigned int j=0; j<height; j++) {
       -
       -      unsigned int mempos = i + j*height;
       -
       -      // Calculate pixel coordinates in image plane
       -      float p_u = constc_imgplane.x + (constc_imgplane.y - constc_imgplane.x)
       -        * (i + 0.5f) / width;
       -      float p_v = constc_imgplane.z + (constc_imgplane.w - constc_imgplane.z)
       -        * (j + 0.5f) / height;
       -
       -      // Write ray origo and direction to global memory
       -      _ray_origo[mempos]     = constc_eye;
       -      _ray_direction[mempos] = -constc_d*constc_w + p_u*constc_u + p_v*constc_v;
       -    }
       -  }
       -}
       -
       -// Check wether the pixel's viewing ray intersects with the spheres,
       -// and shade the pixel correspondingly
       -void rayIntersectSpheres_cpu(float3* _ray_origo, 
       -                         float3* _ray_direction,
       -                         float4* _p, 
       -                         unsigned char* _img, 
       -                         unsigned int pixels,
       -                         unsigned int np)
       -{
       -  unsigned int mempos;
       -  #pragma omp parallel for
       -  for (mempos=0; mempos<pixels; mempos++) {
       -    
       -    // Read ray data from global memory
       -    float3 e = _ray_origo[mempos];
       -    float3 d = _ray_direction[mempos];
       -    //float  step = lengthf3(d);
       -
       -    // Distance, in ray steps, between object and eye initialized with a large value
       -    float tdist = 1e10f;
       -
       -    // Surface normal at closest sphere intersection
       -    float3 n;
       -
       -    // Intersection point coordinates
       -    float3 p;
       -
       -    // Iterate through all particles
       -    for (unsigned int i=0; i<np; i++) {
       -
       -      // Read sphere coordinate and radius
       -      float3 c = f4_to_f3(_p[i]);
       -      float  R = _p[i].w;
       -
       -      // Calculate the discriminant: d = B^2 - 4AC
       -      float Delta = (2.0f*dot(d,(e-c)))*(2.0f*dot(d,(e-c)))  // B^2
       -        - 4.0f*dot(d,d)        // -4*A
       -        * (dot((e-c),(e-c)) - R*R);  // C
       -
       -      // If the determinant is positive, there are two solutions
       -      // One where the line enters the sphere, and one where it exits
       -      if (Delta > 0.0f) { 
       -
       -        // Calculate roots, Shirley 2009 p. 77
       -        float t_minus = ((dot(-d,(e-c)) - sqrt( dot(d,(e-c))*dot(d,(e-c)) - dot(d,d)
       -                * (dot((e-c),(e-c)) - R*R) ) ) / dot(d,d));
       -
       -        // Check wether intersection is closer than previous values
       -        if (fabs(t_minus) < tdist) {
       -          p = e + t_minus*d;
       -          tdist = fabs(t_minus);
       -          n = normalize(2.0f * (p - c));   // Surface normal
       -        }
       -
       -      } // End of solution branch
       -
       -    } // End of particle loop
       -
       -    // Write pixel color
       -    if (tdist < 1e10) {
       -
       -      // Lambertian shading parameters
       -      //float dotprod = fabs(dot(n, constc_light));
       -      float dotprod = fmax(0.0f,dot(n, constc_light));
       -      float I_d = 40.0f;  // Light intensity
       -      float k_d = 5.0f;  // Diffuse coefficient
       -
       -      // Ambient shading
       -      float k_a = 10.0f;
       -      float I_a = 5.0f;
       -
       -      // Write shading model values to pixel color channels
       -      _img[mempos*4]     = (unsigned char) ((k_d * I_d * dotprod 
       -            + k_a * I_a)*0.48f);
       -      _img[mempos*4 + 1] = (unsigned char) ((k_d * I_d * dotprod
       -            + k_a * I_a)*0.41f);
       -      _img[mempos*4 + 2] = (unsigned char) ((k_d * I_d * dotprod
       -            + k_a * I_a)*0.27f);
       -    }
       -  }
       -}
       -
       -
       -void cameraInit_cpu(float3 eye, float3 lookat, float imgw, float hw_ratio)
       -{
       -  // Image dimensions in world space (l, r, b, t)
       -  float4 imgplane = make_float4(-0.5f*imgw, 0.5f*imgw, -0.5f*imgw*hw_ratio, 0.5f*imgw*hw_ratio);
       -
       -  // The view vector
       -  float3 view = eye - lookat;
       -
       -  // Construct the camera view orthonormal base
       -  float3 v = make_float3(0.0f, 0.0f, 1.0f);  // v: Pointing upward
       -  float3 w = -view/lengthf3(view);                   // w: Pointing backwards
       -  float3 u = cross(make_float3(v.x, v.y, v.z), make_float3(w.x, w.y, w.z)); // u: Pointing right
       -
       -  // Focal length 20% of eye vector length
       -  float d = lengthf3(view)*0.8f;
       -
       -  // Light direction (points towards light source)
       -  float3 light = normalize(-1.0f*eye*make_float3(1.0f, 0.2f, 0.6f));
       -
       -  std::cout << "  Transfering camera values to constant memory\n";
       -
       -  constc_u = u;
       -  constc_v = v;
       -  constc_w = w;
       -  constc_eye = eye;
       -  constc_imgplane = imgplane;
       -  constc_d = d;
       -  constc_light = light;
       -
       -  std::cout << "Rendering image...";
       -}
       -
       -
       -// Wrapper for the rt algorithm
       -int rt_cpu(float4* p, unsigned int np,
       -       rgb* img, unsigned int width, unsigned int height,
       -       f3 origo, f3 L, f3 eye, f3 lookat, float imgw) {
       -
       -  using std::cout;
       -
       -  cout << "Initializing CPU raytracer:\n";
       -
       -  // Initialize GPU timestamp recorders
       -  float t1_go, t2_go, t1_stop, t2_stop;
       -
       -  // Start timer 1
       -  t1_go = clock();
       -
       -  // Allocate memory
       -  cout << "  Allocating device memory\n";
       -  static unsigned char *_img;                 // RGBw values in image
       -  static float3* _ray_origo;                // Ray origo (x,y,z)
       -  static float3* _ray_direction;        // Ray direction (x,y,z)
       -  _img                  = new unsigned char[width*height*4];
       -  _ray_origo          = new float3[width*height];
       -  _ray_direction = new float3[width*height];
       -
       -  // Arrange thread/block structure
       -  unsigned int pixels = width*height;
       -  float hw_ratio = (float)height/(float)width;
       -
       -  // Start timer 2
       -  t2_go = clock();
       -  
       -  // Initialize image to background color
       -  imageInit_cpu(_img, pixels);
       -
       -  // Initialize camera
       -  cameraInit_cpu(make_float3(eye.x, eye.y, eye.z), 
       -                       make_float3(lookat.x, lookat.y, lookat.z),
       -                 imgw, hw_ratio);
       -
       -  // Construct rays for perspective projection
       -  rayInitPerspective_cpu(
       -      _ray_origo, _ray_direction, 
       -      make_float3(eye.x, eye.y, eye.z), 
       -      width, height);
       -  
       -  // Find closest intersection between rays and spheres
       -  rayIntersectSpheres_cpu(
       -      _ray_origo, _ray_direction,
       -      p, _img, pixels, np);
       -
       -  // Stop timer 2
       -  t2_stop = clock();
       -
       -  memcpy(img, _img, sizeof(unsigned char)*pixels*4);
       -
       -  // Free dynamically allocated device memory
       -  delete [] _img;
       -  delete [] _ray_origo;
       -  delete [] _ray_direction;
       -
       -  // Stop timer 1
       -  t1_stop = clock();
       -  
       -  // Report time spent 
       -  cout << " done.\n"
       -       << "  Time spent on entire CPU raytracing routine: "
       -       << (t1_stop-t1_go)/CLOCKS_PER_SEC*1000.0 << " ms\n";
       -  cout << "  - Functions: " << (t2_stop-t2_go)/CLOCKS_PER_SEC*1000.0 << " ms\n";
       -
       -  // Return successfully
       -  return 0;
       -}
   DIR diff --git a/raytracer/rt_kernel_cpu.h b/raytracer/rt_kernel_cpu.h
       t@@ -1,14 +0,0 @@
       -#ifndef RT_KERNEL_CPU_H_
       -#define RT_KERNEL_CPU_H_
       -
       -#include <vector_functions.h>
       -
       -// Host prototype functions
       -
       -void cameraInit(float3 eye, float3 lookat, float imgw, float hw_ratio);
       -
       -int rt_cpu(float4* p, const unsigned int np, 
       -       rgb* img, const unsigned int width, const unsigned int height,
       -       f3 origo, f3 L, f3 eye, f3 lookat, float imgw);
       -
       -#endif
   DIR diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh
       t@@ -479,14 +479,16 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
                  } else {
                    __syncthreads();
                    // Remove this contact (there is no particle with index=np)
       -            dev_contacts[mempos] = devC_np; 
       +            dev_contacts[mempos] = devC_np;
                    // Zero sum of shear displacement in this position
                    dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
                  }
       -        } else {
       +
       +        }/* else { // if dev_contacts[mempos] == devC_np
                  __syncthreads();
       +          // Zero sum of shear displacement in this position
                  dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
       -        }
       +        } */
              } // Contact loop end