tRemoved redundant zeroing of delta_t - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
HTML git clone git://src.adamsgaard.dk/sphere
DIR Log
DIR Files
DIR Refs
DIR LICENSE
---
DIR commit 005e46d1edbfa0e3313f7c5f538a2bf265200f51
DIR parent 223197848f6e1f56d5003dc84ac7d89623de97d0
HTML Author: Anders Damsgaard <adc@geo.au.dk>
Date: Thu, 11 Oct 2012 11:05:35 +0200
Removed redundant zeroing of delta_t
Diffstat:
M raytracer/README | 29 ++++++++++++++---------------
M raytracer/doc/raytracer.pdf | 0
M raytracer/doc/raytracer.tex | 49 +++----------------------------
D raytracer/io.cpp | 31 -------------------------------
D raytracer/render_all_outputs_GPU_c… | 34 -------------------------------
D raytracer/rt_kernel.h | 31 -------------------------------
D raytracer/rt_kernel_cpu.cpp | 259 -------------------------------
D raytracer/rt_kernel_cpu.h | 14 --------------
M src/contactsearch.cuh | 8 +++++---
9 files changed, 23 insertions(+), 432 deletions(-)
---
DIR diff --git a/raytracer/README b/raytracer/README
t@@ -1,37 +1,36 @@
------------------------------------------------------------------
-This is the README file for the SPHERE ray tracing module.
- Last revision: Oct. 17., 2011
+ This is the README file for the ESys-particle ray tracing module
+ Last revision: May. 18., 2012
------------------------------------------------------------------
- BACKGROUND INFORMATION
+BACKGROUND INFORMATION
+======================
-The ray tracing module was developed as an exam project in the
-course "Data Parallel Computing", held Q1 at the Department of
-Computer Science, Aarhus University, Denmark by Thomas Sangild
-Sørensen.
+The ray tracing module was initially developed for the Sphere DEM
+software: http://github.com/anders-dc/sphere
-It is used for visualizing simulation output from the SPHERE
-discrete element method software.
+This is a port for visualizing simulation output from the
+ESys-Particle discrete element method software.
-It is developed by Anders Damsgaard Christensen, adc@cs.au.dk, and
+It is developed by Anders Damsgaard Christensen, adc@geo.au.dk, and
is licenced under the GNU General Public Licence (GPL).
Documentation can be found in the file doc/raytracer.pdf
-------------------------------------------------------------------
- REQUIREMENTS
+REQUIREMENTS
+============
For compiling the raytracer, the following software is required:
- GNU compiler collection
- CUDA developer drivers
- CUDA toolkit
When executing the compiled code, a CUDA compatible GPU is
-necessary.
+necessary, along with appropriate developer device drivers.
-------------------------------------------------------------------
- USAGE INSTRUCTIONS
+USAGE INSTRUCTIONS
+==================
The source code is built using GNU make with the command:
./make
DIR diff --git a/raytracer/doc/raytracer.pdf b/raytracer/doc/raytracer.pdf
Binary files differ.
DIR diff --git a/raytracer/doc/raytracer.tex b/raytracer/doc/raytracer.tex
t@@ -273,17 +273,17 @@ This appendix contains the following source code files:
\subsection{CUDA raytracing source code}
%\subsubsection{rt\_kernel.h}
-\lstinputlisting[caption={rt\_kernel.h},label={rt-kernel.h},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel.h}
+\lstinputlisting[caption={rt-kernel.h},label={rt-kernel.h},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel.h}
%\subsubsection{rt\_kernel.cu}
-\lstinputlisting[caption={rt\_kernel.cu},label={rt-kernel.cu},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel.cu}
+\lstinputlisting[caption={rt-kernel.cu},label={rt-kernel.cu},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel.cu}
\subsection{CPU raytracing source code}
%\subsubsection{rt\_kernel\_cpu.h}
-\lstinputlisting[caption={rt\_kernel\_cpu.h},label={rt-kernel-cpu.h},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel-cpu.h}
+\lstinputlisting[caption={rt-kernel-cpu.h},label={rt-kernel-cpu.h},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel-cpu.h}
%\subsubsection{rt\_kernel\_cpu.cpp}
-\lstinputlisting[caption={rt\_kernel\_cpu.cpp},label={rt-kernel-cpu.cpp},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel-cpu.cpp}
+\lstinputlisting[caption={rt-kernel-cpu.cpp},label={rt-kernel-cpu.cpp},numbers=left,breaklines=true,xleftmargin=15pt,basicstyle=\scriptsize]{../rt-kernel-cpu.cpp}
t@@ -295,47 +295,6 @@ This appendix contains the following source code files:
-\section{Bloopers}
-This section contains pictures of the unfinished raytracer in a malfunctioning state, which can provide interesting, however unusable, results.
-
-\begin{figure}[!h]
-\centering
-\includegraphics[width=0.30\textwidth]{bloopers/black.png}
-\caption{An end result encountered way too many times, e.g. after inadvertently dividing the focal length with zero.}
-\label{fig:black}
-\end{figure}
-
-\begin{figure}[!h]
-\centering
-\includegraphics[width=0.30\textwidth]{bloopers/twisted.png}
-\caption{Twisted appearance caused by wrong thread/block layout.}
-\label{fig:twisted}
-\end{figure}
-
-\begin{figure}[!h]
-\centering
-\includegraphics[width=0.30\textwidth]{bloopers/wrong_order.png}
-\caption{A problem with the distance determination caused the particles to be displayed in a wrong order.}
-\label{fig:wrongorder}
-\end{figure}
-
-\begin{figure}[!h]
-\centering
-\includegraphics[width=0.30\textwidth]{bloopers/colorful.png}
-\caption{Colorful result caused by forgetting to normalize the normal vector calculation.}
-\label{fig:colorful}
-\end{figure}
-
-\begin{figure}[!h]
-\centering
-\includegraphics[width=0.30\textwidth]{bloopers/cpu_wrong_colorful.png}
-\caption{Another colorful result with wrong order of particles, cause by using \texttt{abs()} (meant for integers) instead of \texttt{fabs()} (meant for floats).}
-\label{fig:colorful_cpu}
-\end{figure}
-
-
-
-
% You can push biographies down or up by placing
DIR diff --git a/raytracer/io.cpp b/raytracer/io.cpp
t@@ -1,31 +0,0 @@
-// io.cpp
-// Contains functions to read SPHERE binary files,
-// and write images in the PPM format
-
-#include <iostream>
-#include <cstdio>
-#include "header.h"
-
-
-// Write image structure to PPM file
-int image_to_ppm(rgb* rgb_img, const char* filename, const unsigned int width, const unsigned int height)
-{
- FILE *fout = fopen(filename, "wb");
- if (fout == NULL) {
- std::cout << "File error in img_write() while trying to writing " << filename;
- return 1;
- }
-
- fprintf(fout, "P6 %d %d 255\n", width, height);
-
- // Write pixel array to ppm image file
- for (unsigned int i=0; i<height*width; i++) {
- putc(rgb_img[i].r, fout);
- putc(rgb_img[i].g, fout);
- putc(rgb_img[i].b, fout);
- }
-
- fclose(fout);
- return 0;
-}
-
DIR diff --git a/raytracer/render_all_outputs_GPU_clever.sh b/raytracer/render_all_outputs_GPU_clever.sh
t@@ -1,34 +0,0 @@
-#!/bin/bash
-
-XRES=800
-YRES=600
-
-WORKHORSE=GPU
-
-echo "# Rendering PPM images"
-for F in ../output/*.bin
-do
- BASE=`basename $F`
- OUTFILE=$BASE.ppm.jpg
- if [ -e ../img_out/$OUTFILE ]
- then
- echo $OUTFILE exists.
- else
- ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm pressure 4000 > /dev/null
- if [ $? -ne 0 ] ; then
- echo "Error rendering $F, trying again..."
- ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm pressure 4000 > /dev/null
- fi
- fi
-done
-
-echo "# Converting PPM files to JPEG using ImageMagick in parallel"
-for F in ../img_out/*.ppm
-do
- (BASE=`basename $F`; convert $F $F.jpg &)
-done
-
-sleep 5
-echo "# Removing temporary PPM files"
-rm ../img_out/*.ppm
-
DIR diff --git a/raytracer/rt_kernel.h b/raytracer/rt_kernel.h
t@@ -1,31 +0,0 @@
-#ifndef RT_KERNEL_H_
-#define RT_KERNEL_H_
-
-#include <vector_functions.h>
-
-// Constants
-__constant__ float4 const_u;
-__constant__ float4 const_v;
-__constant__ float4 const_w;
-__constant__ float4 const_eye;
-__constant__ float4 const_imgplane;
-__constant__ float const_d;
-__constant__ float4 const_light;
-
-
-// Host prototype functions
-
-extern "C"
-void cameraInit(float4 eye, float4 lookat, float imgw, float hw_ratio);
-
-extern "C"
-void checkForCudaErrors(const char* checkpoint_description);
-
-extern "C"
-int rt(float4* p, const unsigned int np,
- rgb* img, const unsigned int width, const unsigned int height,
- f3 origo, f3 L, f3 eye, f3 lookat, float imgw,
- const int visualize, const float max_val,
- float* fixvel, float* pres, float* es_dot, float* es);
-
-#endif
DIR diff --git a/raytracer/rt_kernel_cpu.cpp b/raytracer/rt_kernel_cpu.cpp
t@@ -1,259 +0,0 @@
-#include <iostream>
-#include <cstdio>
-#include <cmath>
-#include <time.h>
-#include <cuda.h>
-#include <cutil_math.h>
-#include <string.h>
-#include "header.h"
-#include "rt_kernel_cpu.h"
-
-// Constants
-float3 constc_u;
-float3 constc_v;
-float3 constc_w;
-float3 constc_eye;
-float4 constc_imgplane;
-float constc_d;
-float3 constc_light;
-
-__inline__ float3 f4_to_f3(float4 in)
-{
- return make_float3(in.x, in.y, in.z);
-}
-
-__inline__ float4 f3_to_f4(float3 in)
-{
- return make_float4(in.x, in.y, in.z, 0.0f);
-}
-
-__inline__ float lengthf3(float3 in)
-{
- return sqrt(in.x*in.x + in.y*in.y + in.z*in.z);
-}
-
-// Kernel for initializing image data
-void imageInit_cpu(unsigned char* _img, unsigned int pixels)
-{
- for (unsigned int mempos=0; mempos<pixels; mempos++) {
- _img[mempos*4] = 0; // Red channel
- _img[mempos*4 + 1] = 0; // Green channel
- _img[mempos*4 + 2] = 0; // Blue channel
- }
-}
-
-// Calculate ray origins and directions
-void rayInitPerspective_cpu(float3* _ray_origo,
- float3* _ray_direction,
- float3 eye,
- unsigned int width,
- unsigned int height)
-{
- unsigned int i;
- #pragma omp parallel for
- for (i=0; i<width; i++) {
- for (unsigned int j=0; j<height; j++) {
-
- unsigned int mempos = i + j*height;
-
- // Calculate pixel coordinates in image plane
- float p_u = constc_imgplane.x + (constc_imgplane.y - constc_imgplane.x)
- * (i + 0.5f) / width;
- float p_v = constc_imgplane.z + (constc_imgplane.w - constc_imgplane.z)
- * (j + 0.5f) / height;
-
- // Write ray origo and direction to global memory
- _ray_origo[mempos] = constc_eye;
- _ray_direction[mempos] = -constc_d*constc_w + p_u*constc_u + p_v*constc_v;
- }
- }
-}
-
-// Check wether the pixel's viewing ray intersects with the spheres,
-// and shade the pixel correspondingly
-void rayIntersectSpheres_cpu(float3* _ray_origo,
- float3* _ray_direction,
- float4* _p,
- unsigned char* _img,
- unsigned int pixels,
- unsigned int np)
-{
- unsigned int mempos;
- #pragma omp parallel for
- for (mempos=0; mempos<pixels; mempos++) {
-
- // Read ray data from global memory
- float3 e = _ray_origo[mempos];
- float3 d = _ray_direction[mempos];
- //float step = lengthf3(d);
-
- // Distance, in ray steps, between object and eye initialized with a large value
- float tdist = 1e10f;
-
- // Surface normal at closest sphere intersection
- float3 n;
-
- // Intersection point coordinates
- float3 p;
-
- // Iterate through all particles
- for (unsigned int i=0; i<np; i++) {
-
- // Read sphere coordinate and radius
- float3 c = f4_to_f3(_p[i]);
- float R = _p[i].w;
-
- // Calculate the discriminant: d = B^2 - 4AC
- float Delta = (2.0f*dot(d,(e-c)))*(2.0f*dot(d,(e-c))) // B^2
- - 4.0f*dot(d,d) // -4*A
- * (dot((e-c),(e-c)) - R*R); // C
-
- // If the determinant is positive, there are two solutions
- // One where the line enters the sphere, and one where it exits
- if (Delta > 0.0f) {
-
- // Calculate roots, Shirley 2009 p. 77
- float t_minus = ((dot(-d,(e-c)) - sqrt( dot(d,(e-c))*dot(d,(e-c)) - dot(d,d)
- * (dot((e-c),(e-c)) - R*R) ) ) / dot(d,d));
-
- // Check wether intersection is closer than previous values
- if (fabs(t_minus) < tdist) {
- p = e + t_minus*d;
- tdist = fabs(t_minus);
- n = normalize(2.0f * (p - c)); // Surface normal
- }
-
- } // End of solution branch
-
- } // End of particle loop
-
- // Write pixel color
- if (tdist < 1e10) {
-
- // Lambertian shading parameters
- //float dotprod = fabs(dot(n, constc_light));
- float dotprod = fmax(0.0f,dot(n, constc_light));
- float I_d = 40.0f; // Light intensity
- float k_d = 5.0f; // Diffuse coefficient
-
- // Ambient shading
- float k_a = 10.0f;
- float I_a = 5.0f;
-
- // Write shading model values to pixel color channels
- _img[mempos*4] = (unsigned char) ((k_d * I_d * dotprod
- + k_a * I_a)*0.48f);
- _img[mempos*4 + 1] = (unsigned char) ((k_d * I_d * dotprod
- + k_a * I_a)*0.41f);
- _img[mempos*4 + 2] = (unsigned char) ((k_d * I_d * dotprod
- + k_a * I_a)*0.27f);
- }
- }
-}
-
-
-void cameraInit_cpu(float3 eye, float3 lookat, float imgw, float hw_ratio)
-{
- // Image dimensions in world space (l, r, b, t)
- float4 imgplane = make_float4(-0.5f*imgw, 0.5f*imgw, -0.5f*imgw*hw_ratio, 0.5f*imgw*hw_ratio);
-
- // The view vector
- float3 view = eye - lookat;
-
- // Construct the camera view orthonormal base
- float3 v = make_float3(0.0f, 0.0f, 1.0f); // v: Pointing upward
- float3 w = -view/lengthf3(view); // w: Pointing backwards
- float3 u = cross(make_float3(v.x, v.y, v.z), make_float3(w.x, w.y, w.z)); // u: Pointing right
-
- // Focal length 20% of eye vector length
- float d = lengthf3(view)*0.8f;
-
- // Light direction (points towards light source)
- float3 light = normalize(-1.0f*eye*make_float3(1.0f, 0.2f, 0.6f));
-
- std::cout << " Transfering camera values to constant memory\n";
-
- constc_u = u;
- constc_v = v;
- constc_w = w;
- constc_eye = eye;
- constc_imgplane = imgplane;
- constc_d = d;
- constc_light = light;
-
- std::cout << "Rendering image...";
-}
-
-
-// Wrapper for the rt algorithm
-int rt_cpu(float4* p, unsigned int np,
- rgb* img, unsigned int width, unsigned int height,
- f3 origo, f3 L, f3 eye, f3 lookat, float imgw) {
-
- using std::cout;
-
- cout << "Initializing CPU raytracer:\n";
-
- // Initialize GPU timestamp recorders
- float t1_go, t2_go, t1_stop, t2_stop;
-
- // Start timer 1
- t1_go = clock();
-
- // Allocate memory
- cout << " Allocating device memory\n";
- static unsigned char *_img; // RGBw values in image
- static float3* _ray_origo; // Ray origo (x,y,z)
- static float3* _ray_direction; // Ray direction (x,y,z)
- _img = new unsigned char[width*height*4];
- _ray_origo = new float3[width*height];
- _ray_direction = new float3[width*height];
-
- // Arrange thread/block structure
- unsigned int pixels = width*height;
- float hw_ratio = (float)height/(float)width;
-
- // Start timer 2
- t2_go = clock();
-
- // Initialize image to background color
- imageInit_cpu(_img, pixels);
-
- // Initialize camera
- cameraInit_cpu(make_float3(eye.x, eye.y, eye.z),
- make_float3(lookat.x, lookat.y, lookat.z),
- imgw, hw_ratio);
-
- // Construct rays for perspective projection
- rayInitPerspective_cpu(
- _ray_origo, _ray_direction,
- make_float3(eye.x, eye.y, eye.z),
- width, height);
-
- // Find closest intersection between rays and spheres
- rayIntersectSpheres_cpu(
- _ray_origo, _ray_direction,
- p, _img, pixels, np);
-
- // Stop timer 2
- t2_stop = clock();
-
- memcpy(img, _img, sizeof(unsigned char)*pixels*4);
-
- // Free dynamically allocated device memory
- delete [] _img;
- delete [] _ray_origo;
- delete [] _ray_direction;
-
- // Stop timer 1
- t1_stop = clock();
-
- // Report time spent
- cout << " done.\n"
- << " Time spent on entire CPU raytracing routine: "
- << (t1_stop-t1_go)/CLOCKS_PER_SEC*1000.0 << " ms\n";
- cout << " - Functions: " << (t2_stop-t2_go)/CLOCKS_PER_SEC*1000.0 << " ms\n";
-
- // Return successfully
- return 0;
-}
DIR diff --git a/raytracer/rt_kernel_cpu.h b/raytracer/rt_kernel_cpu.h
t@@ -1,14 +0,0 @@
-#ifndef RT_KERNEL_CPU_H_
-#define RT_KERNEL_CPU_H_
-
-#include <vector_functions.h>
-
-// Host prototype functions
-
-void cameraInit(float3 eye, float3 lookat, float imgw, float hw_ratio);
-
-int rt_cpu(float4* p, const unsigned int np,
- rgb* img, const unsigned int width, const unsigned int height,
- f3 origo, f3 L, f3 eye, f3 lookat, float imgw);
-
-#endif
DIR diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh
t@@ -479,14 +479,16 @@ __global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsorted
} else {
__syncthreads();
// Remove this contact (there is no particle with index=np)
- dev_contacts[mempos] = devC_np;
+ dev_contacts[mempos] = devC_np;
// Zero sum of shear displacement in this position
dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
}
- } else {
+
+ }/* else { // if dev_contacts[mempos] == devC_np
__syncthreads();
+ // Zero sum of shear displacement in this position
dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
- }
+ } */
} // Contact loop end