tCurrently experimenting with integration schemes - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
HTML git clone git://src.adamsgaard.dk/sphere
DIR Log
DIR Files
DIR Refs
DIR LICENSE
---
DIR commit 68987a0bcf02ed1b43919109acc000099694f787
DIR parent b6d92242b72333dd94520921dc43198ef420e907
HTML Author: Anders Damsgaard <adc@geo.au.dk>
Date: Tue, 13 Nov 2012 16:04:12 +0100
Currently experimenting with integration schemes
Diffstat:
M python/sphere.py | 63 +++++++++++++++++++++++--------
M python/tests.py | 56 ++++++++++++++++---------------
M src/contactmodels.cuh | 2 +-
M src/device.cu | 10 +++++++++-
M src/integration.cuh | 108 +++++++++++++++++--------------
M src/main.cpp | 10 ++++++++--
M src/raytracer.cuh | 6 ++++--
M src/sphere.cpp | 9 +++++++--
M src/sphere.h | 3 ++-
9 files changed, 166 insertions(+), 101 deletions(-)
---
DIR diff --git a/python/sphere.py b/python/sphere.py
t@@ -2,7 +2,7 @@
import math
import numpy
import matplotlib as mpl
-mpl.use('Agg')
+#mpl.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import subprocess
t@@ -17,9 +17,10 @@ class Spherebin:
"""
# Constructor - Initialize arrays
- def __init__(self, np=1, nd=3, nw=1):
+ def __init__(self, np = 1, nd = 3, nw = 1, sid = 'unnamed'):
self.nd = numpy.ones(1, dtype=numpy.int32) * nd
self.np = numpy.ones(1, dtype=numpy.uint32) * np
+ self.sid = sid
# Time parameters
self.time_dt = numpy.zeros(1, dtype=numpy.float64)
t@@ -66,7 +67,7 @@ class Spherebin:
self.mu_ws = numpy.ones(1, dtype=numpy.float64)
self.mu_wd = numpy.ones(1, dtype=numpy.float64)
self.rho = numpy.ones(1, dtype=numpy.float64) * 2600.0
- self.contactmodel = numpy.ones(1, dtype=numpy.uint32) * 2 # contactLinear default
+ self.contactmodel = numpy.ones(1, dtype=numpy.uint32) * 2 # contactLinear default
self.kappa = numpy.zeros(1, dtype=numpy.float64)
self.db = numpy.zeros(1, dtype=numpy.float64)
self.V_b = numpy.zeros(1, dtype=numpy.float64)
t@@ -262,11 +263,12 @@ class Spherebin:
fh.close()
# Write binary data
- def writebin(self, targetbin, verbose = True):
+ def writebin(self, folder = "../input/", verbose = True):
""" Reads a target SPHERE binary file and returns data.
"""
fh = None
try:
+ targetbin = folder + "/" + self.sid + ".bin"
if (verbose == True):
print("Output file: {0}".format(targetbin))
t@@ -313,8 +315,6 @@ class Spherebin:
fh.write(self.ev.astype(numpy.float64))
fh.write(self.p.astype(numpy.float64))
-
-
fh.write(self.g.astype(numpy.float64))
fh.write(self.k_n.astype(numpy.float64))
fh.write(self.k_t.astype(numpy.float64))
t@@ -452,7 +452,8 @@ class Spherebin:
print(" Grid: x={}, y={}, z={}".format(self.num[0], self.num[1], self.num[2]))
# Put upper wall at top boundary
- self.w_x[0] = self.L[0]
+ if (self.nw > 0):
+ self.w_x[0] = self.L[0]
# Generate grid based on particle positions
t@@ -760,8 +761,8 @@ class Spherebin:
gamma_n = 0,
gamma_t = 0,
gamma_r = 0,
- gamma_wn = 1e3,
- gamma_wt = 1e3,
+ gamma_wn = 1e4,
+ gamma_wt = 1e4,
capillaryCohesion = 0):
""" Initialize particle parameters to default values.
Radii must be set prior to calling this function.
t@@ -950,12 +951,44 @@ class Spherebin:
return porosity_grid
+ def run(self, verbose=True, hideinputfile=False):
+ """ Execute sphere with target project
+ """
+ quiet = ""
+ stdout = ""
+ if (verbose == False):
+ quiet = "-q"
+ if (hideinputfile == True):
+ stdout = " > /dev/null"
+ subprocess.call("cd ..; ./sphere_* " + quiet + " input/" + self.sid + ".bin " + stdout, shell=True)
+
+ def render(self,
+ method = "pres",
+ max_val = 1e3,
+ graphicsformat = "png",
+ verbose=True):
+ """ Render all output files that belong to the simulation, determined by sid.
+ """
+
+ quiet = ""
+ if (verbose == False):
+ quiet = "-q"
+
+ # Render images using sphere raytracer
+ subprocess.call("cd ..; ./sphere_* " + quiet + " --method " + method + " {}".format(max_val) + " --render output/" + self.sid + "*.bin", shell=True)
+
+ # Convert images to compressed format
+ convert()
+
+
def convert(graphicsformat = "png",
folder = "../img_out"):
""" Converts all PPM images in img_out to graphicsformat,
using ImageMagick """
+ #quiet = " > /dev/null"
+ quiet = ""
# Convert images
- subprocess.call("for F in " + folder + "/*.ppm ; do BASE=`basename $F`; convert $F $F." + graphicsformat + " > /dev/null ; done", shell=True)
+ subprocess.call("for F in " + folder + "/*.ppm ; do BASE=`basename $F .ppm`; convert $F " + folder + "/$BASE." + graphicsformat + " " + quiet + " ; done", shell=True)
# Remove PPM files
subprocess.call("rm " + folder + "/*.ppm", shell=True)
t@@ -1031,7 +1064,7 @@ def visualize(project, method = 'energy', savefig = True, outformat = 'png'):
# Read energy values from project binaries
sb = Spherebin()
for i in range(lastfile+1):
- fn = "../output/{0}.output{1}.bin".format(project, i)
+ fn = "../output/{0}.output{1:0=5}.bin".format(project, i)
sb.readbin(fn, verbose = False)
Epot[i] = sb.energy("pot")
t@@ -1109,7 +1142,7 @@ def visualize(project, method = 'energy', savefig = True, outformat = 'png'):
# Read energy values from project binaries
sb = Spherebin()
for i in range(lastfile+1):
- fn = "../output/{0}.output{1}.bin".format(project, i)
+ fn = "../output/{0}.output{1:0=5}.bin".format(project, i)
sb.readbin(fn, verbose = False)
# Allocate arrays on first run
t@@ -1155,7 +1188,7 @@ def visualize(project, method = 'energy', savefig = True, outformat = 'png'):
# Read stress values from project binaries
for i in range(lastfile+1):
- fn = "../output/{0}.output{1}.bin".format(project, i)
+ fn = "../output/{0}.output{1:0=5}.bin".format(project, i)
sb.readbin(fn, verbose = False)
# First iteration: Allocate arrays and find constant values
t@@ -1227,7 +1260,7 @@ def visualize(project, method = 'energy', savefig = True, outformat = 'png'):
else:
plt.show()
-def run(project, verbose=True, hideinputfile=False):
+def run(binary, verbose=True, hideinputfile=False):
""" Execute sphere with target project
"""
quiet = ""
t@@ -1236,7 +1269,7 @@ def run(project, verbose=True, hideinputfile=False):
quiet = "-q"
if (hideinputfile == True):
stdout = " > /dev/null"
- subprocess.call("cd ..; ./sphere_* " + quiet + " input/" + project + ".bin" + stdout, shell=True)
+ subprocess.call("cd ..; ./sphere_* " + quiet + " " + binary + " " + stdout, shell=True)
def status(project):
""" Check the status.dat file for the target project,
DIR diff --git a/python/tests.py b/python/tests.py
t@@ -13,33 +13,35 @@ def compare(first, second, string):
print("### Input/output tests ###")
# Generate data in python
-orig = Spherebin(np = 100, nw = 0)
-orig.generateRadii(histogram = False)
-orig.defaultParams()
-orig.initRandomGridPos(g = numpy.zeros(orig.nd))
-orig.initTemporal(current = 0.0, total = 0.0)
-orig.time_total = 2.0*orig.time_dt;
-orig.time_file_dt = orig.time_dt;
-orig.writebin("orig.bin", verbose=False)
-
-# Test Python IO routines
-py = Spherebin()
-py.readbin("orig.bin", verbose=False)
-compare(orig, py, "Python IO:")
-
-# Test C++ IO routines
-run("python/orig.bin", verbose=False, hideinputfile=True)
-cpp = Spherebin()
-cpp.readbin("../output/orig.output0.bin", verbose=False)
-compare(orig, cpp, "C++ IO: ")
-
-# Test CUDA IO routines
-cuda = Spherebin()
-cuda.readbin("../output/orig.output1.bin", verbose=False)
-cuda.time_current = orig.time_current
-cuda.time_step_count = orig.time_step_count
-compare(orig, cuda, "CUDA IO: ")
+for N in [1, 2, 10, 1e2, 1e3, 1e4]:
+ print("{} particle(s)".format(int(N)))
+ orig = Spherebin(np = int(N), nw = 0, sid = "test")
+ orig.generateRadii(histogram = False)
+ orig.defaultParams()
+ orig.initRandomGridPos(g = numpy.zeros(orig.nd), gridnum=numpy.array([N*N+3,N*N+3,N*N+3]))
+ orig.initTemporal(current = 0.0, total = 0.0)
+ orig.time_total = 2.0*orig.time_dt;
+ orig.time_file_dt = orig.time_dt;
+ orig.writebin(verbose=False)
+
+ # Test Python IO routines
+ py = Spherebin()
+ py.readbin("../input/test.bin", verbose=False)
+ compare(orig, py, "Python IO:")
+
+ # Test C++ IO routines
+ orig.run(verbose=False, hideinputfile=True)
+ cpp = Spherebin()
+ cpp.readbin("../output/test.output0.bin", verbose=False)
+ compare(orig, cpp, "C++ IO: ")
+
+ # Test CUDA IO routines
+ cuda = Spherebin()
+ cuda.readbin("../output/test.output1.bin", verbose=False)
+ cuda.time_current = orig.time_current
+ cuda.time_step_count = orig.time_step_count
+ compare(orig, cuda, "CUDA IO: ")
# Remove temporary files
-subprocess.call("rm orig.bin; rm ../output/orig*.bin", shell=True)
+subprocess.call("rm ../{input,output}/test*bin", shell=True)
DIR diff --git a/src/contactmodels.cuh b/src/contactmodels.cuh
t@@ -183,7 +183,7 @@ __device__ void contactLinearViscous(Float3* F, Float3* T,
} // f_n is OK! */
// Normal force component: Elastic
- //f_n = -k_n_ab * delta_ab * n_ab;
+ //f_n = -devC_params.k_n * delta_ab * n_ab;
// Normal force component: Elastic - viscous damping
f_n = (-devC_params.k_n * delta_ab - devC_params.gamma_n * vel_n_ab) * n_ab;
DIR diff --git a/src/device.cu b/src/device.cu
t@@ -227,6 +227,11 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
k.acc = new Float4[np];
k.angacc = new Float4[np];
+#pragma omp parallel for if(np>100)
+ for (unsigned int i = 0; i<np; ++i) {
+ k.acc[i] = MAKE_FLOAT4(0.0, 0.0, 0.0, 0.0);
+ k.angacc[i] = MAKE_FLOAT4(0.0, 0.0, 0.0, 0.0);
+ }
// Kinematics arrays
cudaMalloc((void**)&dev_x, memSizeF4);
t@@ -262,9 +267,10 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
cudaMalloc((void**)&dev_cellStart, sizeof(unsigned int)*grid.num[0]*grid.num[1]*grid.num[2]);
cudaMalloc((void**)&dev_cellEnd, sizeof(unsigned int)*grid.num[0]*grid.num[1]*grid.num[2]);
- // Host contact bookkeeping arrays
+ // Host contact bookkeeping arrays
k.contacts = new unsigned int[np*NC];
// Initialize contacts lists to np
+#pragma omp parallel for if(np>100)
for (unsigned int i=0; i<(np*NC); ++i)
k.contacts[i] = np;
k.distmod = new Float4[np*NC];
t@@ -687,6 +693,8 @@ __host__ void DEM::startTime()
dev_force,
dev_torque,
dev_angpos,
+ dev_acc,
+ dev_angacc,
dev_xysum,
dev_gridParticleIndex);
cudaThreadSynchronize();
DIR diff --git a/src/integration.cuh b/src/integration.cuh
t@@ -4,13 +4,13 @@
// integration.cuh
// Functions responsible for temporal integration
-
// Second order integration scheme based on Taylor expansion of particle kinematics.
// Kernel executed on device, and callable from host only.
__global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
Float4* dev_angvel_sorted,
Float4* dev_x, Float4* dev_vel, Float4* dev_angvel, // Output
Float4* dev_force, Float4* dev_torque, Float4* dev_angpos, // Input
+ Float4* dev_acc, Float4* dev_angacc,
Float2* dev_xysum,
unsigned int* dev_gridParticleIndex) // Input: Sorted-Unsorted key
{
t@@ -24,19 +24,15 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
Float4 force = dev_force[orig_idx];
Float4 torque = dev_torque[orig_idx];
Float4 angpos = dev_angpos[orig_idx];
-
- Float2 xysum = MAKE_FLOAT2(0.0f, 0.0f);
-
- // Initialize acceleration vectors to zero
- Float4 acc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
- Float4 angacc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
-
- // Fetch particle position and velocity values from global read
+ Float4 acc = dev_acc[orig_idx];
+ Float4 angacc = dev_angacc[orig_idx];
Float4 x = dev_x_sorted[idx];
Float4 vel = dev_vel_sorted[idx];
Float4 angvel = dev_angvel_sorted[idx];
Float radius = x.w;
+ Float2 xysum = MAKE_FLOAT2(0.0f, 0.0f);
+
// Coherent read from constant memory to registers
Float dt = devC_dt;
Float3 origo = MAKE_FLOAT3(devC_grid.origo[0], devC_grid.origo[1], devC_grid.origo[2]);
t@@ -44,7 +40,48 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
Float rho = devC_params.rho;
// Particle mass
- Float m = 4.0f/3.0f * PI * radius*radius*radius * rho;
+ Float m = 4.0/3.0 * PI * radius*radius*radius * rho;
+
+#if 0
+ //// First-order Euler integration scheme ///
+ // Update angular position
+ angpos.x += angvel.x * dt;
+ angpos.y += angvel.y * dt;
+ angpos.z += angvel.z * dt;
+
+ // Update position
+ x.x += vel.x * dt;
+ x.y += vel.y * dt;
+ x.z += vel.z * dt;
+#else
+
+ /// Second-order scheme based on Taylor expansion ///
+ // Update angular position
+ angpos.x += angvel.x * dt + angacc.x * dt*dt * 0.5;
+ angpos.y += angvel.y * dt + angacc.y * dt*dt * 0.5;
+ angpos.z += angvel.z * dt + angacc.z * dt*dt * 0.5;
+
+ // Update position
+ x.x += vel.x * dt + acc.x * dt*dt * 0.5;
+ x.y += vel.y * dt + acc.y * dt*dt * 0.5;
+ x.z += vel.z * dt + acc.z * dt*dt * 0.5;
+#endif
+
+ // Update angular velocity
+ angvel.x += angacc.x * dt;
+ angvel.y += angacc.y * dt;
+ angvel.z += angacc.z * dt;
+
+ // Update linear velocity
+ vel.x += acc.x * dt;
+ vel.y += acc.y * dt;
+ vel.z += acc.z * dt;
+
+ // Add x-displacement for this time step to
+ // sum of x-displacements
+ //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
+ xysum.x += vel.x * dt;
+ xysum.y += vel.y * dt;// + (acc.y * dt*dt * 0.5f;
// Update linear acceleration of particle
acc.x = force.x / m;
t@@ -53,9 +90,9 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
// Update angular acceleration of particle
// (angacc = (total moment)/Intertia, intertia = 2/5*m*r^2)
- angacc.x = torque.x * 1.0f / (2.0f/5.0f * m * radius*radius);
- angacc.y = torque.y * 1.0f / (2.0f/5.0f * m * radius*radius);
- angacc.z = torque.z * 1.0f / (2.0f/5.0f * m * radius*radius);
+ angacc.x = torque.x * 1.0 / (2.0/5.0 * m * radius*radius);
+ angacc.y = torque.y * 1.0 / (2.0/5.0 * m * radius*radius);
+ angacc.z = torque.z * 1.0 / (2.0/5.0 * m * radius*radius);
// Add gravity
acc.x += devC_params.g[0];
t@@ -69,47 +106,14 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
// gravity to counteract segregation.
// Particles may move in the z-dimension,
// to allow for dilation.
- acc.x = 0.0f;
- acc.y = 0.0f;
+ acc.x = 0.0;
+ acc.y = 0.0;
acc.z -= devC_params.g[2];
// Zero the angular acceleration
angacc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
}
- // Update angular velocity
- angvel.x += angacc.x * dt;
- angvel.y += angacc.y * dt;
- angvel.z += angacc.z * dt;
-
- // Update linear velocity
- vel.x += acc.x * dt;
- vel.y += acc.y * dt;
- vel.z += acc.z * dt;
-
- // Update position. First-order Euler's scheme:
- //x.x += vel.x * dt;
- //x.y += vel.y * dt;
- //x.z += vel.z * dt;
-
- // Update angular position. Second-order scheme based on Taylor expansion
- // (greater accuracy than the first-order Euler's scheme)
- angpos.x += angvel.x * dt + (angacc.x * dt*dt)/2.0f;
- angpos.y += angvel.y * dt + (angacc.y * dt*dt)/2.0f;
- angpos.z += angvel.z * dt + (angacc.z * dt*dt)/2.0f;
-
- // Update position. Second-order scheme based on Taylor expansion
- // (greater accuracy than the first-order Euler's scheme)
- x.x += vel.x * dt + (acc.x * dt*dt)/2.0f;
- x.y += vel.y * dt + (acc.y * dt*dt)/2.0f;
- x.z += vel.z * dt + (acc.z * dt*dt)/2.0f;
-
- // Add x-displacement for this time step to
- // sum of x-displacements
- //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
- xysum.x += vel.x * dt + (acc.x * dt*dt)/2.0f;
- xysum.y += vel.y * dt + (acc.y * dt*dt)/2.0f;
-
// Move particle across boundary if it is periodic
if (devC_grid.periodic == 1) {
t@@ -133,6 +137,8 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Input
// Store data in global memory at original, pre-sort positions
dev_xysum[orig_idx] += xysum;
+ dev_acc[orig_idx] = acc;
+ dev_angacc[orig_idx] = angacc;
dev_angvel[orig_idx] = angvel;
dev_vel[orig_idx] = vel;
dev_angpos[orig_idx] = angpos;
t@@ -220,8 +226,10 @@ __global__ void integrateWalls(Float4* dev_walls_nx,
w_mvfd.y += acc * dt;
// Update position. Second-order scheme based on Taylor expansion
- // (greater accuracy than the first-order Euler's scheme)
- w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0f;
+ //w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0f;
+
+ // Update position. First-order Euler integration scheme
+ w_nx.w += w_mvfd.y * dt;
// Store data in global memory
dev_walls_nx[idx] = w_nx;
DIR diff --git a/src/main.cpp b/src/main.cpp
t@@ -30,6 +30,7 @@ int main(const int argc, const char *argv[])
// Default values
int verbose = 1;
int checkVals = 1;
+ int dry = 0;
int render = 0; // whether to render an image
int method = 0; // visualization method
int nfiles = 0; // number of input files
t@@ -48,6 +49,7 @@ int main(const int argc, const char *argv[])
<< "-h, --help\t\tprint help\n"
<< "-V, --version\t\tprint version information and exit\n"
<< "-q, --quiet\t\tsuppress status messages to stdout\n"
+ << "-n, --dry\t\tshow key experiment parameters and quit\n"
<< "-r, --render\t\trender input files instead of simulating temporal evolution\n"
<< "-dcv, --dpnt-check-values\t\tdon't check values before running\n"
<< "Raytracer (-r) specific options:\n"
t@@ -75,6 +77,9 @@ int main(const int argc, const char *argv[])
else if (argvi == "-q" || argvi == "--quiet")
verbose = 0;
+ else if (argvi == "-n" || argvi == "--dry")
+ dry = 1;
+
else if (argvi == "-r" || argvi == "--render")
render = 1;
t@@ -110,10 +115,11 @@ int main(const int argc, const char *argv[])
else {
nfiles++;
- std::cout << argv[0] << ": processing input file: " << argvi << std::endl;
+ if (verbose == 1)
+ std::cout << argv[0] << ": processing input file: " << argvi << std::endl;
// Create DEM class, read data from input binary, check values
- DEM dem(argvi, verbose, checkVals);
+ DEM dem(argvi, verbose, checkVals, dry);
// Render image if requested
if (render == 1)
DIR diff --git a/src/raytracer.cuh b/src/raytracer.cuh
t@@ -485,8 +485,10 @@ __host__ void DEM::render(
// Report color visualization method and color map range
- cout << " " << desc << " color map range: [0, "
- << maxval << "] " << unit << endl;
+ if (verbose == 1) {
+ cout << " " << desc << " color map range: [0, "
+ << maxval << "] " << unit << endl;
+ }
// Copy linarr to dev_linarr if required
if (transfer == 1) {
DIR diff --git a/src/sphere.cpp b/src/sphere.cpp
t@@ -13,7 +13,8 @@
// and reports the values
DEM::DEM(const std::string inputbin,
const int verbosity,
- const int checkVals)
+ const int checkVals,
+ const int dry)
: verbose(verbosity)
{
using std::cout;
t@@ -36,8 +37,12 @@ DEM::DEM(const std::string inputbin,
checkValues();
// Report data values
- if (verbose == 1)
+ if (dry == 1)
reportValues();
+
+ // If this is a dry run, exit
+ if (dry == 1)
+ exit(1);
// Initialize CUDA
initializeGPU();
DIR diff --git a/src/sphere.h b/src/sphere.h
t@@ -126,7 +126,8 @@ class DEM {
// Constructor, some parameters with default values
DEM(std::string inputbin,
const int verbosity = 1,
- const int checkVals = 1);
+ const int checkVals = 1,
+ const int dry = 0);
// Destructor
~DEM(void);