URI:
       tWorking on implementing darcy flow - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
  HTML git clone git://src.adamsgaard.dk/sphere
   DIR Log
   DIR Files
   DIR Refs
   DIR LICENSE
       ---
   DIR commit f043266094c4d99fe6ccc12c2650d0c03b082ec4
   DIR parent fba2d7f021655b31ce60447fa54e3a4afc9ab5d5
  HTML Author: Anders Damsgaard <adc@geo.au.dk>
       Date:   Mon,  3 Jun 2013 10:57:27 +0200
       
       Working on implementing darcy flow
       
       Diffstat:
         M src/CMakeLists.txt                  |      15 ++++++++++-----
         M src/darcy.cpp                       |     294 +++++++++++++++++++++++++++++--
         M src/latticeboltzmann.cuh            |     627 +++++++++++++++++++++++--------
         M src/porousflow.cpp                  |       2 +-
         M src/sphere.cpp                      |       1 +
         M src/sphere.h                        |      44 +++++++++++++++++++++++++++++--
       
       6 files changed, 801 insertions(+), 182 deletions(-)
       ---
   DIR diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
       t@@ -12,13 +12,18 @@ INCLUDE(FindCUDA)
        
        # Additional NVCC command line arguments
        # NOTE: Multiple arguments must be semi-colon selimited
       -SET(CUDA_NVCC_FLAGS "--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20,compute_20\"")
       +SET(CUDA_NVCC_FLAGS
       +    "--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20,compute_20\" -ccbin gcc-4.6")
        
        # Rule to build executable program 
       -CUDA_ADD_EXECUTABLE(../sphere main.cpp file_io.cpp sphere.cpp device.cu utility.cu)
       -CUDA_ADD_EXECUTABLE(../porosity porosity.cpp file_io.cpp sphere.cpp device.cu utility.cu)
       -CUDA_ADD_EXECUTABLE(../forcechains forcechains.cpp file_io.cpp sphere.cpp device.cu utility.cu)
       -CUDA_ADD_EXECUTABLE(../porousflow porousflow.cpp darcy.cpp file_io.cpp sphere.cpp device.cu utility.cu)
       +CUDA_ADD_EXECUTABLE(../sphere
       +    main.cpp file_io.cpp sphere.cpp device.cu utility.cu)
       +CUDA_ADD_EXECUTABLE(../porosity
       +    porosity.cpp file_io.cpp sphere.cpp device.cu utility.cu)
       +CUDA_ADD_EXECUTABLE(../forcechains
       +    forcechains.cpp file_io.cpp sphere.cpp device.cu utility.cu)
       +CUDA_ADD_EXECUTABLE(../porousflow
       +    porousflow.cpp darcy.cpp file_io.cpp sphere.cpp device.cu utility.cu)
        
        #ADD_EXECUTABLE(unittests boost-unit-tests.cpp sphere.cpp)
        #TARGET_LINK_LIBRARIES(unittests
   DIR diff --git a/src/darcy.cpp b/src/darcy.cpp
       t@@ -1,39 +1,299 @@
        #include <iostream>
       -#include <string>
        #include <cstdio>
        #include <cstdlib>
       -#include <cmath>
       -#include <vector>
       -#include <algorithm>
       +#include <string>
        
        #include "typedefs.h"
        #include "datatypes.h"
        #include "constants.h"
        #include "sphere.h"
        
       -// Find hydraulic conductivities for each cell
       +//#include "eigen-nvcc/Eigen/Core"
       +
       +// Initialize memory
       +void DEM::initDarcyMem()
       +{
       +    unsigned int ncells = d_nx*d_ny*d_nz;
       +    d_P = new Float[ncells]; // hydraulic pressure matrix
       +    d_dP = new Float3[ncells]; // Cell spatial gradient in hydraulic pressures
       +    d_K = new Float[ncells]; // hydraulic conductivity matrix
       +    d_S = new Float[ncells]; // hydraulic storativity matrix
       +    d_W = new Float[ncells]; // hydraulic recharge
       +}
       +
       +// Free memory
       +void DEM::freeDarcyMem()
       +{
       +    free(d_P);
       +    free(d_dP);
       +    free(d_K);
       +    free(d_S);
       +    free(d_W);
       +}
       +
       +// 3D index to 1D index
       +unsigned int DEM::idx(
       +        const unsigned int x,
       +        const unsigned int y,
       +        const unsigned int z)
       +{
       +    return x + d_nx*y + d_nx*d_ny*z;
       +}
       +
       +// Set initial values
       +void DEM::initDarcyVals()
       +{
       +    unsigned int ix, iy, iz;
       +    for (ix=0; ix<d_nx; ++ix) {
       +        for (iy=0; iy<d_ny; ++iy) {
       +            for (iz=0; iz<d_nz; ++iz) {
       +                d_P[idx(ix,iy,iz)] = 1.0;
       +                d_K[idx(ix,iy,iz)] = 1.5;
       +                d_S[idx(ix,iy,iz)] = 7.5e-3;
       +                d_W[idx(ix,iy,iz)] = 0.0;
       +            }
       +        }
       +    }
       +}
       +
       +Float DEM::minVal3dArr(Float* arr)
       +{
       +    Float minval = 1e16; // a large val
       +    Float val;
       +    unsigned int ix, iy, iz;
       +    for (ix=0; ix<d_nx; ++ix) {
       +        for (iy=0; iy<d_ny; ++iy) {
       +            for (iz=0; iz<d_nz; ++iz) {
       +                val = arr[idx(ix,iy,iz)];
       +                if (minval > val)
       +                    minval = val;
       +            }
       +        }
       +    }
       +}
        
       -// Solve Darcy flow through particles
       +// Find the spatial gradient in pressures per cell
       +void DEM::findDarcyGradients()
       +{
       +
       +    std::cout << "dx,dy,dz: "
       +        << d_dx << ","
       +        << d_dy << ","
       +        << d_dz << std::endl;
       +    const Float dx2 = d_dx*d_dx;
       +    const Float dy2 = d_dy*d_dy;
       +    const Float dz2 = d_dz*d_dz;
       +    std::cout << "dx2,dy2,dz2: "
       +        << dx2 << ","
       +        << dy2 << ","
       +        << dz2 << std::endl;
       +    Float localP2;
       +
       +    unsigned int ix, iy, iz;
       +    for (ix=1; ix<d_nx-1; ++ix) {
       +        for (iy=1; iy<d_ny-1; ++iy) {
       +            for (iz=1; iz<d_nz-1; ++iz) {
       +
       +                localP2 = 2.0*d_P[idx(ix,iy,iz)];
       +
       +                d_dP[idx(ix,iy,iz)].x
       +                    = (d_P[idx(ix+1,iy,iz)] - localP2
       +                            + d_P[idx(ix-1,iy,iz)])/dx2;
       +                
       +                d_dP[idx(ix,iy,iz)].y
       +                    = (d_P[idx(ix,iy+1,iz)] - localP2
       +                            + d_P[idx(ix,iy-1,iz)])/dx2;
       +
       +                d_dP[idx(ix,iy,iz)].z
       +                    = (d_P[idx(ix,iy,iz+1)] - localP2
       +                            + d_P[idx(ix,iy,iz-1)])/dz2;
       +            }
       +        }
       +    }
       +}
       +
       +// Set the gradient to 0.0 in all dimensions at the boundaries
       +void DEM::setDarcyBCNeumannZero()
       +{
       +    Float3 z3 = MAKE_FLOAT3(0.0, 0.0, 0.0);
       +    unsigned int ix, iy, iz;
       +    unsigned int nx = d_nx-1;
       +    unsigned int ny = d_ny-1;
       +    unsigned int nz = d_nz-1;
       +
       +    // I don't care that the values at four edges are written twice
       +
       +    // x-y plane at z=0 and z=d_dz-1
       +    for (ix=0; ix<d_nx; ++ix) {
       +        for (iy=0; iy<d_ny; ++iy) {
       +            d_dP[idx(ix,iy, 0)] = z3;
       +            d_dP[idx(ix,iy,nz)] = z3;
       +        }
       +    }
       +
       +    // x-z plane at y=0 and y=d_dy-1
       +    for (ix=0; ix<d_nx; ++ix) {
       +        for (iz=0; iz<d_nz; ++iz) {
       +            d_dP[idx(ix, 0,iz)] = z3;
       +            d_dP[idx(ix,ny,iz)] = z3;
       +        }
       +    }
       +
       +    // y-z plane at x=0 and x=d_dx-1
       +    for (iy=0; iy<d_ny; ++iy) {
       +        for (iz=0; iz<d_nz; ++iz) {
       +            d_dP[idx( 0,iy,iz)] = z3;
       +            d_dP[idx(nx,iy,iz)] = z3;
       +        }
       +    }
       +}
       +
       +
       +void DEM::explDarcyStep(const Float dt)
       +{
       +    // Find spatial gradients in all cells
       +    //findDarcyGradients();
       +    //printDarcyArray3(stdout, d_dP, "d_dP, after findDarcyGradients");
       +
       +    // Set boundary conditions
       +    //setDarcyBCNeumannZero();
       +    //printDarcyArray3(stdout, d_dP, "d_dP, after setDarcyBCNeumannZero");
       +
       +    // Cell dims squared
       +    const Float dx2 = d_dx*d_dx;
       +    const Float dy2 = d_dy*d_dy;
       +    const Float dz2 = d_dz*d_dz;
       +    std::cout << "dx2,dy2,dz2: "
       +        << dx2 << ","
       +        << dy2 << ","
       +        << dz2 << std::endl;
       +
       +    // Explicit 3D finite difference scheme
       +    // new = old + gradient*timestep
       +    unsigned int ix, iy, iz, cellidx;
       +    Float K, P;
       +    for (ix=1; ix<d_nx-1; ++ix) {
       +        for (iy=1; iy<d_ny-1; ++iy) {
       +            for (iz=1; iz<d_nz-1; ++iz) {
       +
       +                cellidx = idx(ix,iy,iz);
       +
       +                /*d_P[cellidx]
       +                    -= (d_W[cellidx]*dt
       +                    + d_K[cellidx]*d_dP[cellidx].x/d_dx
       +                    + d_K[cellidx]*d_dP[cellidx].y/d_dy
       +                    + d_K[cellidx]*d_dP[cellidx].z/d_dz) / d_S[cellidx];*/
       +
       +                K = d_K[cellidx];   // cell hydraulic conductivity
       +                P = d_P[cellidx];   // cell hydraulic pressure
       +
       +                d_P[cellidx]
       +                    += d_W[cellidx]*dt  // cell recharge
       +                    + K*dt *            // diffusivity term
       +                    (
       +                     (d_P[idx(ix+1,iy,iz)] - 2.0*P + d_P[idx(ix-1,iy,iz)])/dx2 +
       +                     (d_P[idx(ix,iy+1,iz)] - 2.0*P + d_P[idx(ix,iy-1,iz)])/dy2 +
       +                     (d_P[idx(ix,iy,iz+1)] - 2.0*P + d_P[idx(ix,iy,iz-1)])/dz2
       +                    );
       +
       +
       +            }
       +        }
       +    }
       +}
       +
       +// Print array values to file stream (stdout, stderr, other file)
       +void DEM::printDarcyArray(FILE* stream, Float* arr)
       +{
       +    unsigned int x, y, z;
       +    for (z=0; z<d_nz; z++) {
       +        for (y=0; y<d_ny; y++) {
       +            for (x=0; x<d_nx; x++) {
       +                fprintf(stream, "%f\t", arr[idx(x,y,z)]);
       +            }
       +            fprintf(stream, "\n");
       +        }
       +        fprintf(stream, "\n");
       +    }
       +}
       +
       +// Overload printDarcyArray to add optional description
       +void DEM::printDarcyArray(FILE* stream, Float* arr, std::string desc)
       +{
       +    std::cout << "\n" << desc << ":\n";
       +    printDarcyArray(stream, arr);
       +}
       +
       +// Print array values to file stream (stdout, stderr, other file)
       +void DEM::printDarcyArray3(FILE* stream, Float3* arr)
       +{
       +    unsigned int x, y, z;
       +    for (z=0; z<d_nz; z++) {
       +        for (y=0; y<d_ny; y++) {
       +            for (x=0; x<d_nx; x++) {
       +                fprintf(stream, "%f,%f,%f\t",
       +                        arr[idx(x,y,z)].x,
       +                        arr[idx(x,y,z)].y,
       +                        arr[idx(x,y,z)].z);
       +            }
       +            fprintf(stream, "\n");
       +        }
       +        fprintf(stream, "\n");
       +    }
       +}
       +
       +// Overload printDarcyArray to add optional description
       +void DEM::printDarcyArray3(FILE* stream, Float3* arr, std::string desc)
       +{
       +    std::cout << "\n" << desc << ":\n";
       +    printDarcyArray3(stream, arr);
       +}
       +
       +
       +// Find hydraulic conductivities for each cell by finding the particle contents
       +//
       +
       +// Solve Darcy flow on a regular, cubic grid
        void DEM::startDarcy(
                const Float cellsizemultiplier)
        {
            // Number of cells
       -    int nx = grid.L[0]/grid.num[0];
       -    int ny = grid.L[1]/grid.num[1];
       -    int nz = grid.L[2]/grid.num[2];
       +    d_nx = floor(grid.num[0]*cellsizemultiplier);
       +    d_ny = floor(grid.num[1]*cellsizemultiplier);
       +    d_nz = floor(grid.num[2]*cellsizemultiplier);
        
            // Cell size 
       -    Float dx = grid.L[0]/nx;
       -    Float dy = grid.L[1]/nx;
       -    Float dz = grid.L[2]/nx;
       +    Float d_dx = grid.L[0]/d_nx;
       +    Float d_dy = grid.L[1]/d_ny;
       +    Float d_dz = grid.L[2]/d_nz;
        
            if (verbose == 1) {
       -        std::cout << "Fluid grid dimensions: "
       -            << nx << " * "
       -            << ny << " * "
       -            << nz << std::endl;
       +        std::cout << "  - Fluid grid dimensions: "
       +            << d_nx << " * "
       +            << d_ny << " * "
       +            << d_nz << std::endl;
       +        std::cout << "  - Fluid grid cell size: "
       +            << d_dx << " * "
       +            << d_dy << " * "
       +            << d_dz << std::endl;
            }
        
       +    initDarcyMem();
       +    initDarcyVals();
        
       -}
       +    // Temporal loop
       +    //while(time.current <= time.total) {
        
       +        explDarcyStep(time.dt);
       +        time.current += time.dt;
       +    //}
       +
       +
       +    printDarcyArray(stdout, d_P, "d_P");
       +    //printDarcyArray3(stdout, d_dP, "d_dP");
       +    //printDarcyArray(stdout, d_K, "d_K");
       +    //printDarcyArray(stdout, d_S, "d_S");
       +    //printDarcyArray(stdout, d_W, "d_W");
       +
       +    freeDarcyMem();
       +}
   DIR diff --git a/src/latticeboltzmann.cuh b/src/latticeboltzmann.cuh
       t@@ -1,6 +1,10 @@
        #ifndef LATTICEBOLTZMANN_CUH_
        #define LATTICEBOLTZMANN_CUH_
        
       +// Enable line below to perform lattice Boltzmann computations on the
       +// GPU, disable for CPU computation
       +//#define LBM_GPU
       +
        // latticeboltzmann.cuh
        // Functions for solving the Navier-Stokes equations using the Lattice-Boltzmann
        // method with D3Q19 stencils
       t@@ -8,84 +12,166 @@
        // Calculate linear cell index from position (x,y,z)
        // and fluid position vector (i).
        // From A. Monitzer 2013
       -__device__ unsigned int grid2index(
       +#ifdef LBM_GPU
       +__device__ 
       +#endif
       +unsigned int grid2index(
                unsigned int x, unsigned int y, unsigned int z,
       -        unsigned int i)
       +        unsigned int i,
       +        unsigned int nx, unsigned int ny, unsigned int nz)
        {
       -    return x + ((y + z*devC_grid.num[1])*devC_grid.num[0])
       -        + (devC_grid.num[0]*devC_grid.num[1]*devC_grid.num[2]*i);
       +    return x + ((y + z*ny)*nx) + (nx*ny*nz*i);
        }
        
       +
        // Equilibrium distribution
       -__device__ Float feq(Float3 v, Float rho, Float3 e, Float omega)
       +#ifdef LBM_GPU
       +__device__ 
       +#endif
       +Float feq(Float3 v, Float rho, Float3 e, Float w, Float dt, Float dx)
        {
       -    return omega*rho * (1.0 - 3.0/2.0 * dot(v,v) + 3.0*dot(e,v) +
       -            9.0/2.0*dot(e,v)*dot(e,v));
       +    // Monitzer 2010
       +    //return w*rho * (1.0 - 3.0/2.0 * dot(v,v) + 3.0*dot(e,v) +
       +            //9.0/2.0*dot(e,v)*dot(e,v));
       +
       +    // Rinaldi 2012
       +    //return w*rho * (1.0 + 3.0*dot(e,v) + 9.0/2.0*dot(e,v)*dot(e,v)
       +            //- 3.0/2.0*dot(v,v));
       +
       +    // Hecht 2010
       +    //Float c2_s = 1.0/sqrt(3);   // D3Q19 lattice speed of sound
       +    //c2_s *= c2_s;
       +    //return w*rho * (1.0 + dot(e,v)/c2_s 
       +            //+ (dot(e,v)*dot(e,v))/(2.0*c2_s*c2_s)
       +            //- dot(v,v)*dot(v,v)/(2.0*c2_s));
       +
       +    // Chirila 2010
       +    //Float c2 = 1.0*grid.num[0]/devC_dt;
       +    //Float c2 = 1.0/sqrt(3.0);
       +    //c2 *= c2;   // Propagation speed on the lattice
       +    //return w*rho * (1.0 + 3.0*dot(e,v)/c2
       +            //+ 9.0/2.0 * dot(e,v)*dot(e,v)/(c2*c2)
       +            //- 3.0/2.0 * dot(v,v)/c2);
       +
       +    // Habich 2011
       +    Float c2 = dx/dt * dx/dt;
       +    return rho*w
       +        * (1.0 + 3.0/c2*dot(e,v)
       +                + 9.0/(2.0*c2*c2) * dot(e,v)*dot(e,v)
       +                - 3.0/(2.0*c2) * dot(v,v));
        }
        
        // Collision operator
        // Bhatnagar-Gross-Krook approximation (BGK), Thurey (2003).
       -__device__ Float bgk(
       +#ifdef LBM_GPU
       +__device__
       +#endif
       +Float bgk(
       +        Float dt,
       +        Float dx,
                Float f,
                Float tau, 
                Float3 v,
                Float rho,
                Float3 e,
       -        Float omega,
       +        Float w,
                Float3 extF)
        {
       -    return devC_dt / tau * (f - feq(v, rho, e, omega))
       -        - (1.0 - 1.0/(2.0*tau)) * 3.0/omega * dot(extF, e);
       +    //Float feqval = feq(v, rho, e, w);
       +    //printf("feq(v, rho=%f, e, w=%f) = %f\n", rho, w, feqval);
       +
       +    // Monitzer 2008
       +    //return dt / tau * (f - feq(v, rho, e, w))
       +        //- (1.0 - 1.0/(2.0*tau)) * 3.0/w * dot(extF, e);
       +    //return dt / tau * (f - feq(v, rho, e, w))
       +     //   + (2.0*tau - 1.0/(2.0*tau)) * 3.0/w * dot(extF, e);
       +    //return dt/tau * (f - feq(v, rho, e, w))
       +        //+ (2.0*tau - 1.0/(2.0*tau)) * 3.0/w * dot(extF, e);
       +    
       +    // Monitzer 2010
       +    //return dt/tau*(f - feq(v, rho, e, w))
       +        //+ (2.0*tau - 1.0)/(2.0*tau) * 3.0/w * dot(extF, e);
       +
       +    // Rinaldi 2012
       +    //return 1.0/tau * (feq(v, rho, e, w) - f);
       +
       +    // Habich 2011
       +    return (f - feq(v, rho, e, w, dt, dx))/tau
       +        + (2.0*tau - 1.0)/(2.0*tau) * 3.0/w * dot(extF, e);
        }
        
        // Initialize the fluid distributions on the base of the densities provided
       -__global__ void initfluid(
       +#ifdef LBM_GPU
       +__global__ void initFluid(
                Float4* dev_v_rho,
                Float* dev_f)
       +#else
       +void initFluid(
       +        Float4* dev_v_rho,
       +        Float* dev_f,
       +        unsigned int nx,
       +        unsigned int ny,
       +        unsigned int nz)
       +#endif
       +
        {
       +#ifdef LBM_GPU
            // 3D thread index
       -    const unsigned int z = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
            const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       -    const unsigned int x = blockDim.z * blockIdx.z + threadIdx.z;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
        
            // Grid dimensions
            const unsigned int nx = devC_grid.num[0];
            const unsigned int ny = devC_grid.num[1];
            const unsigned int nz = devC_grid.num[2];
       +#else
       +    for (unsigned int z = 0; z<nz; z++) {
       +        for (unsigned int y = 0; y<ny; y++) {
       +            for (unsigned int x = 0; x<nx; x++) {
       +#endif
        
            // Check that we are not outside the fluid grid
            if (x < nx && y < ny && z < nz) {
        
                // 1D thread index
       -        const unsigned long int tidx = x + nx*y + nx*ny*z;
       +        const unsigned int tidx = x + nx*y + nx*ny*z;
        
                // Read velocity and density, zero velocity
       +#ifdef LBM_GPU
       +        __syncthreads();
       +#endif
                Float4 v_rho = dev_v_rho[tidx];
                v_rho = MAKE_FLOAT4(0.0, 0.0, 0.0, v_rho.w);
        
       -        // Set values to equilibrium distribution (f_i = omega_i * rho_0)
       +        // Set values to equilibrium distribution (f_i = w_i * rho_0)
       +#ifdef LBM_GPU
                __syncthreads();
       +#endif
                dev_v_rho[tidx] = v_rho;
       -        dev_f[grid2index(x,y,z,0)]  = 1.0/3.0  * v_rho.w;
       -        dev_f[grid2index(x,y,z,1)]  = 1.0/18.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,2)]  = 1.0/18.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,3)]  = 1.0/18.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,4)]  = 1.0/18.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,5)]  = 1.0/18.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,6)]  = 1.0/18.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,7)]  = 1.0/36.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,8)]  = 1.0/36.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,9)]  = 1.0/36.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,10)] = 1.0/36.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,11)] = 1.0/36.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,12)] = 1.0/36.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,13)] = 1.0/36.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,14)] = 1.0/36.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,15)] = 1.0/36.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,16)] = 1.0/36.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,17)] = 1.0/36.0 * v_rho.w;
       -        dev_f[grid2index(x,y,z,18)] = 1.0/36.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,0,nx,ny,nz)]  = 1.0/3.0  * v_rho.w;
       +        dev_f[grid2index(x,y,z,1,nx,ny,nz)]  = 1.0/18.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,2,nx,ny,nz)]  = 1.0/18.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,3,nx,ny,nz)]  = 1.0/18.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,4,nx,ny,nz)]  = 1.0/18.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,5,nx,ny,nz)]  = 1.0/18.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,6,nx,ny,nz)]  = 1.0/18.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,7,nx,ny,nz)]  = 1.0/36.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,8,nx,ny,nz)]  = 1.0/36.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,9,nx,ny,nz)]  = 1.0/36.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,10,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,11,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,12,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,13,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,14,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,15,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,16,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,17,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
       +        dev_f[grid2index(x,y,z,18,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
            }
       +#ifndef LBM_GPU
       +            }}}
       +#endif
        }
        
        // Swap two arrays pointers
       t@@ -98,6 +184,7 @@ void swapFloatArrays(Float* arr1, Float* arr2)
        
        // Combined streaming and collision step with particle coupling and optional
        // periodic boundaries. Derived from A. Monitzer 2013
       +#ifdef LBM_GPU
        __global__ void latticeBoltzmannD3Q19(
                Float* dev_f,
                Float* dev_f_new,
       t@@ -108,46 +195,68 @@ __global__ void latticeBoltzmannD3Q19(
                Float4* dev_vel_sorted, // particle velocities + fixvel
                Float4* dev_force,
                unsigned int* dev_gridParticleIndex)
       +#else
       +void latticeBoltzmannD3Q19(
       +        Float* dev_f,
       +        Float* dev_f_new,
       +        Float4* dev_v_rho,        // fluid velocities and densities
       +        Float devC_dt,
       +        Grid& grid,
       +        Params& params)
       +
       +#endif
        {
       +#ifdef LBM_GPU
            // 3D thread index
       -    const unsigned int z = blockDim.x * blockIdx.x + threadIdx.x;
       +    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
            const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
       -    const unsigned int x = blockDim.z * blockIdx.z + threadIdx.z;
       +    const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
        
            // Grid dimensions
            const unsigned int nx = devC_grid.num[0];
            const unsigned int ny = devC_grid.num[1];
            const unsigned int nz = devC_grid.num[2];
       +#else
       +    // Grid dimensions
       +    const unsigned int nx = grid.num[0];
       +    const unsigned int ny = grid.num[1];
       +    const unsigned int nz = grid.num[2];
       +
       +    for (unsigned int z = 0; z<nz; z++) {
       +        for (unsigned int y = 0; y<ny; y++) {
       +            for (unsigned int x = 0; x<nx; x++) {
       +#endif
        
            // Check that we are not outside the fluid grid
            if (x < nx && y < ny && z < nz) {
        
       +        // 1D thread index
       +        const unsigned int tidx = x + nx*y + nx*ny*z;
                //printf("(x,y,x) = (%d,%d,%d), tidx = %d\n", x, y, z, tidx);
        
                // Load the fluid distribution into local registers
       +#ifdef LBM_GPU
                __syncthreads();
       -        Float f_0  = dev_f[grid2index(x,y,z,0)];
       -        Float f_1  = dev_f[grid2index(x,y,z,1)];
       -        Float f_2  = dev_f[grid2index(x,y,z,2)];
       -        Float f_3  = dev_f[grid2index(x,y,z,3)];
       -        Float f_4  = dev_f[grid2index(x,y,z,4)];
       -        Float f_5  = dev_f[grid2index(x,y,z,5)];
       -        Float f_6  = dev_f[grid2index(x,y,z,6)];
       -        Float f_7  = dev_f[grid2index(x,y,z,7)];
       -        Float f_8  = dev_f[grid2index(x,y,z,8)];
       -        Float f_9  = dev_f[grid2index(x,y,z,9)];
       -        Float f_10 = dev_f[grid2index(x,y,z,10)];
       -        Float f_11 = dev_f[grid2index(x,y,z,11)];
       -        Float f_12 = dev_f[grid2index(x,y,z,12)];
       -        Float f_13 = dev_f[grid2index(x,y,z,13)];
       -        Float f_14 = dev_f[grid2index(x,y,z,14)];
       -        Float f_15 = dev_f[grid2index(x,y,z,15)];
       -        Float f_16 = dev_f[grid2index(x,y,z,16)];
       -        Float f_17 = dev_f[grid2index(x,y,z,17)];
       -        Float f_18 = dev_f[grid2index(x,y,z,18)];
       -
       -        // Fluid constant (Wei et al. 2004), nu: kinematic viscosity [Pa*s]
       -        const Float tau = 0.5*(1.0 + 6.0*devC_params.nu);
       +#endif
       +        Float f_0  = dev_f[grid2index(x,y,z,0,nx,ny,nz)];
       +        Float f_1  = dev_f[grid2index(x,y,z,1,nx,ny,nz)];
       +        Float f_2  = dev_f[grid2index(x,y,z,2,nx,ny,nz)];
       +        Float f_3  = dev_f[grid2index(x,y,z,3,nx,ny,nz)];
       +        Float f_4  = dev_f[grid2index(x,y,z,4,nx,ny,nz)];
       +        Float f_5  = dev_f[grid2index(x,y,z,5,nx,ny,nz)];
       +        Float f_6  = dev_f[grid2index(x,y,z,6,nx,ny,nz)];
       +        Float f_7  = dev_f[grid2index(x,y,z,7,nx,ny,nz)];
       +        Float f_8  = dev_f[grid2index(x,y,z,8,nx,ny,nz)];
       +        Float f_9  = dev_f[grid2index(x,y,z,9,nx,ny,nz)];
       +        Float f_10 = dev_f[grid2index(x,y,z,10,nx,ny,nz)];
       +        Float f_11 = dev_f[grid2index(x,y,z,11,nx,ny,nz)];
       +        Float f_12 = dev_f[grid2index(x,y,z,12,nx,ny,nz)];
       +        Float f_13 = dev_f[grid2index(x,y,z,13,nx,ny,nz)];
       +        Float f_14 = dev_f[grid2index(x,y,z,14,nx,ny,nz)];
       +        Float f_15 = dev_f[grid2index(x,y,z,15,nx,ny,nz)];
       +        Float f_16 = dev_f[grid2index(x,y,z,16,nx,ny,nz)];
       +        Float f_17 = dev_f[grid2index(x,y,z,17,nx,ny,nz)];
       +        Float f_18 = dev_f[grid2index(x,y,z,18,nx,ny,nz)];
        
                // Directional vectors to each lattice-velocity in D3Q19
                // Zero velocity: i = 0
       t@@ -162,7 +271,7 @@ __global__ void latticeBoltzmannD3Q19(
                const Float3 e_6  = MAKE_FLOAT3( 0.0, 0.0,-1.0); // face: -z
                const Float3 e_7  = MAKE_FLOAT3( 1.0, 1.0, 0.0); // edge: +x,+y
                const Float3 e_8  = MAKE_FLOAT3(-1.0,-1.0, 0.0); // edge: -x,-y
       -        const Float3 e_9  = MAKE_FLOAT3( 1.0,-1.0, 0.0); // edge: -x,+y
       +        const Float3 e_9  = MAKE_FLOAT3(-1.0, 1.0, 0.0); // edge: -x,+y
                const Float3 e_10 = MAKE_FLOAT3( 1.0,-1.0, 0.0); // edge: +x,-y
                const Float3 e_11 = MAKE_FLOAT3( 1.0, 0.0, 1.0); // edge: +x,+z
                const Float3 e_12 = MAKE_FLOAT3(-1.0, 0.0,-1.0); // edge: -x,-z
       t@@ -180,22 +289,20 @@ __global__ void latticeBoltzmannD3Q19(
                const Float rho = f_0 + f_1 + f_2 + f_3 + f_4 + f_5 + f_6 + f_7 + f_8 +
                    f_9 + f_10 + f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18;
        
       -        // Fluid velocity (v = sum(f_i/e_i)/rho)
       -        const Float3 v = (f_0/e_0 + f_1/e_1 + f_2/e_2 + f_3/e_3 + f_4/e_4 +
       -                f_5/e_5 + f_6/e_6 + f_7/e_7 + f_8/e_8 + f_9/e_9 + f_10/e_10 +
       -                f_11/e_11 + f_12/e_12 + f_13/e_13 + f_14/e_14 + f_15/e_15 +
       -                f_16/e_16 + f_17/e_17 + f_18/e_18) / rho;
       +        // Fluid velocity (v = sum(f_i*e_i)/rho)
       +        const Float3 v = (f_0*e_0 + f_1*e_1 + f_2*e_2 + f_3*e_3 + f_4*e_4 +
       +                f_5*e_5 + f_6*e_6 + f_7*e_7 + f_8*e_8 + f_9*e_9 + f_10*e_10 +
       +                f_11*e_11 + f_12*e_12 + f_13*e_13 + f_14*e_14 + f_15*e_15 +
       +                f_16*e_16 + f_17*e_17 + f_18*e_18) / rho;
        
                //// Calculate the force transferred from the particles to the fluid
       -        Float3 f_particle;
       +        /*Float3 f_particle;
                Float3 f_particles = MAKE_FLOAT3(0.0, 0.0, 0.0);
                Float4 x_particle4; // particle position + radius
                Float  r_particle;  // radius
                Float4 v_particle4; // particle velocity + fixvel
                Float3 v_particle;  // particle velocity
        
       -        // 1D thread index
       -        const unsigned int tidx = x + nx*y + nx*ny*z;
        
                // Lowest particle index in cell
                unsigned int startIdx = dev_cellStart[tidx];
       t@@ -206,7 +313,6 @@ __global__ void latticeBoltzmannD3Q19(
        
                    // Highest particle index in cell + 1
                    unsigned int endIdx = dev_cellEnd[tidx];
       -
                    
                    // Iterate over cell particles
                    for (unsigned int idx = startIdx; idx<endIdx; ++idx) {
       t@@ -243,40 +349,77 @@ __global__ void latticeBoltzmannD3Q19(
                // 100: experimental value, depends on the grid size compared to the
                // particle size and the time step size
                f_particles *= 100.0 * rho * 6.0;
       +        */
       +
       +#ifdef LBM_GPU
       +        Float dx = devC_grid.L[0]/devC_grid.num[0];
       +
       +        // Fluid constant (Wei et al. 2004), nu: kinematic viscosity [Pa*s]
       +        //  nu = 1/6*(2*tau - 1) * dx * c
       +        //const Float tau = 0.5*(1.0 + 6.0*devC_params.nu);
       +        //const Float tau = 1.0/6.0*(2.0*devC_params.nu - 1.0) * dx*dx/devC_dt;
       +        const Float tau = (6.0*devC_params.nu * devC_dt/(dx*dx) + 1)/2.0;
       +
       +        // Gravitational force (F = g * m)
       +        const Float3 f_gravity = MAKE_FLOAT3(
       +                devC_params.g[0]*dx*rho,
       +                devC_params.g[1] 
       +                * ((Float)devC_grid.L[1]/devC_grid.num[1]) * rho,
       +                devC_params.g[2] 
       +                * ((Float)devC_grid.L[2]/devC_grid.num[2]) * rho);
       +#else
       +        Float dx = grid.L[0]/grid.num[0];
       +
       +        // Fluid constant (Wei et al. 2004), nu: kinematic viscosity [Pa*s]
       +        //const Float tau = 0.5*(1.0 + 6.0*params.nu);
       +        //const Float tau = 1.0/6.0*(2.0*params.nu - 1.0) * dx*dx/devC_dt;
       +        const Float tau = (6.0*params.nu * devC_dt/(dx*dx) + 1)/2.0;
       +
       +        //if (tau <= 0.5) {
       +            //fprintf(stderr, "Error, tau <= 0.5\n");
       +            //exit(1);
       +        //}
        
                // Gravitational force (F = g * m)
                const Float3 f_gravity = MAKE_FLOAT3(
       -                devC_params.g[0],
       -                devC_params.g[1],
       -                devC_params.g[2])
       -            * (devC_grid.L[0]/devC_grid.num[0])
       -            * (devC_grid.L[1]/devC_grid.num[1])
       -            * (devC_grid.L[2]/devC_grid.num[2]) * rho;
       +                params.g[0]*dx*rho,
       +                params.g[1] * ((Float)grid.L[1]/grid.num[1]) * rho,
       +                params.g[2] * ((Float)grid.L[2]/grid.num[2]) * rho);
       +#endif
        
                // The final external force
       -        const Float3 f_ext = f_particles + f_gravity;
       +        //const Float3 f_ext = f_particles + f_gravity;
       +        const Float3 f_ext = f_gravity;
       +        //const Float3 f_ext = MAKE_FLOAT3(0.0, 0.0, 0.0);
       +        //printf("%d,%d,%d: f_ext = %f, %f, %f\n", x, y, z,
       +                //f_ext.x, f_ext.y, f_ext.z);
        
                //// Collide fluid
                // Weights corresponding to each e_i lattice-velocity in D3Q19, sum to 1.0
       -        f_0  -= bgk(f_0,  tau, v, rho, e_0,  1.0/3.0,  f_ext);
       -        f_1  -= bgk(f_1,  tau, v, rho, e_1,  1.0/18.0, f_ext);
       -        f_2  -= bgk(f_2,  tau, v, rho, e_2,  1.0/18.0, f_ext);
       -        f_3  -= bgk(f_3,  tau, v, rho, e_3,  1.0/18.0, f_ext);
       -        f_4  -= bgk(f_4,  tau, v, rho, e_4,  1.0/18.0, f_ext);
       -        f_5  -= bgk(f_5,  tau, v, rho, e_5,  1.0/18.0, f_ext);
       -        f_6  -= bgk(f_6,  tau, v, rho, e_6,  1.0/18.0, f_ext);
       -        f_7  -= bgk(f_7,  tau, v, rho, e_7,  1.0/36.0, f_ext);
       -        f_8  -= bgk(f_8,  tau, v, rho, e_8,  1.0/36.0, f_ext);
       -        f_9  -= bgk(f_9,  tau, v, rho, e_9,  1.0/36.0, f_ext);
       -        f_10 -= bgk(f_10, tau, v, rho, e_10, 1.0/36.0, f_ext);
       -        f_11 -= bgk(f_11, tau, v, rho, e_11, 1.0/36.0, f_ext);
       -        f_12 -= bgk(f_12, tau, v, rho, e_12, 1.0/36.0, f_ext);
       -        f_13 -= bgk(f_13, tau, v, rho, e_13, 1.0/36.0, f_ext);
       -        f_14 -= bgk(f_14, tau, v, rho, e_14, 1.0/36.0, f_ext);
       -        f_15 -= bgk(f_15, tau, v, rho, e_15, 1.0/36.0, f_ext);
       -        f_16 -= bgk(f_16, tau, v, rho, e_16, 1.0/36.0, f_ext);
       -        f_17 -= bgk(f_17, tau, v, rho, e_17, 1.0/36.0, f_ext);
       -        f_18 -= bgk(f_18, tau, v, rho, e_18, 1.0/36.0, f_ext);
       +        f_0  -= bgk(devC_dt, dx, f_0,  tau, v, rho, e_0,  1.0/3.0,  f_ext);
       +        f_1  -= bgk(devC_dt, dx, f_1,  tau, v, rho, e_1,  1.0/18.0, f_ext);
       +        f_2  -= bgk(devC_dt, dx, f_2,  tau, v, rho, e_2,  1.0/18.0, f_ext);
       +        f_3  -= bgk(devC_dt, dx, f_3,  tau, v, rho, e_3,  1.0/18.0, f_ext);
       +        f_4  -= bgk(devC_dt, dx, f_4,  tau, v, rho, e_4,  1.0/18.0, f_ext);
       +        f_5  -= bgk(devC_dt, dx, f_5,  tau, v, rho, e_5,  1.0/18.0, f_ext);
       +        f_6  -= bgk(devC_dt, dx, f_6,  tau, v, rho, e_6,  1.0/18.0, f_ext);
       +        f_7  -= bgk(devC_dt, dx, f_7,  tau, v, rho, e_7,  1.0/36.0, f_ext);
       +        f_8  -= bgk(devC_dt, dx, f_8,  tau, v, rho, e_8,  1.0/36.0, f_ext);
       +        f_9  -= bgk(devC_dt, dx, f_9,  tau, v, rho, e_9,  1.0/36.0, f_ext);
       +        f_10 -= bgk(devC_dt, dx, f_10, tau, v, rho, e_10, 1.0/36.0, f_ext);
       +        f_11 -= bgk(devC_dt, dx, f_11, tau, v, rho, e_11, 1.0/36.0, f_ext);
       +        f_12 -= bgk(devC_dt, dx, f_12, tau, v, rho, e_12, 1.0/36.0, f_ext);
       +        f_13 -= bgk(devC_dt, dx, f_13, tau, v, rho, e_13, 1.0/36.0, f_ext);
       +        f_14 -= bgk(devC_dt, dx, f_14, tau, v, rho, e_14, 1.0/36.0, f_ext);
       +        f_15 -= bgk(devC_dt, dx, f_15, tau, v, rho, e_15, 1.0/36.0, f_ext);
       +        f_16 -= bgk(devC_dt, dx, f_16, tau, v, rho, e_16, 1.0/36.0, f_ext);
       +        f_17 -= bgk(devC_dt, dx, f_17, tau, v, rho, e_17, 1.0/36.0, f_ext);
       +        f_18 -= bgk(devC_dt, dx, f_18, tau, v, rho, e_18, 1.0/36.0, f_ext);
       +        //Float bgkval = bgk(devC_dt, f_1,  tau, v, rho, e_1,  1.0/18.0, f_ext);
       +        //Float feqval = feq(v, rho, e_1, 1.0/18.0);
       +        //printf("%d,%d,%d: dt %f, f %f, feq %f, tau %f, v %fx%fx%f, rho %f, e %fx%fx%f, f_ext %fx%fx%f, bgk %f\n",
       +                //x, y, z, devC_dt, f_1, feqval, tau, v.x, v.y, v.z, rho,
       +                //e_1.x, e_1.y, e_1.z, f_ext.x, f_ext.y, f_ext.z, bgkval);
        
        
                //// Stream fluid
       t@@ -284,171 +427,341 @@ __global__ void latticeBoltzmannD3Q19(
        
                
                // There may be a write conflict due to bounce backs
       +#ifdef LBM_GPU
                __syncthreads();
       +#endif
                
                // Write fluid velocity and density to global memory
                dev_v_rho[tidx] = MAKE_FLOAT4(v.x, v.y, v.z, rho); 
       +        //dev_v_rho[tidx] = MAKE_FLOAT4(x, y, z, rho); 
        
                // Face 0
       -        dev_f_new[grid2index(x,y,z,0)] = fmax(0.0, f_0);
       +        dev_f_new[grid2index(x,y,z,0,nx,ny,nz)] = fmax(0.0, f_0);
       +
       +        //*
       +
       +        // Face 1 (+x): Bounce back
       +        if (x < nx-1)
       +            dev_f_new[grid2index(x+1,  y,  z,  1,nx,ny,nz)] = fmax(0.0, f_1);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z,  2,nx,ny,nz)] = fmax(0.0, f_1);
       +
       +        // Face 2 (-x): Bounce back
       +        if (x > 0)
       +            dev_f_new[grid2index(x-1,  y,  z,  2,nx,ny,nz)] = fmax(0.0, f_2);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z,  1,nx,ny,nz)] = fmax(0.0, f_2);
       +
       +        // Face 3 (+y): Bounce back
       +        if (y < ny-1)
       +            dev_f_new[grid2index(  x,y+1,  z,  3,nx,ny,nz)] = fmax(0.0, f_3);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z,  4,nx,ny,nz)] = fmax(0.0, f_3);
       +
       +        // Face 4 (-y): Bounce back
       +        if (y > 0)
       +            dev_f_new[grid2index(  x,y-1,  z,  4,nx,ny,nz)] = fmax(0.0, f_4);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z,  3,nx,ny,nz)] = fmax(0.0, f_4);
       +
       +        // Face 5 (+z): Bounce back
       +        if (z < nz-1)
       +            dev_f_new[grid2index(  x,  y,z+1,  5,nx,ny,nz)] = fmax(0.0, f_5);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z,  6,nx,ny,nz)] = fmax(0.0, f_5);
       +
       +        // Face 6 (-z): Bounce back
       +        if (z > 0)
       +            dev_f_new[grid2index(  x,  y,z-1,  6,nx,ny,nz)] = fmax(0.0, f_6);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z,  5,nx,ny,nz)] = fmax(0.0, f_6);
       +        
       +        // Edge 7 (+x,+y): Bounce back
       +        if (x < nx-1 && y < ny-1)
       +            dev_f_new[grid2index(x+1,y+1,  z,  7,nx,ny,nz)] = fmax(0.0, f_7);
       +        else if (x < nx-1)
       +            dev_f_new[grid2index(x+1,  y,  z,  9,nx,ny,nz)] = fmax(0.0, f_7);
       +        else if (y < ny-1)
       +            dev_f_new[grid2index(  x,y+1,  z, 10,nx,ny,nz)] = fmax(0.0, f_7);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z,  8,nx,ny,nz)] = fmax(0.0, f_7);
       +
       +        // Edge 8 (-x,-y): Bounce back
       +        if (x > 0 && y > 0)
       +            dev_f_new[grid2index(x-1,y-1,  z,  8,nx,ny,nz)] = fmax(0.0, f_8);
       +        else if (x > 0)
       +            dev_f_new[grid2index(x-1,  y,  z,  9,nx,ny,nz)] = fmax(0.0, f_8);
       +        else if (y > 0)
       +            dev_f_new[grid2index(  x,y-1,  z, 10,nx,ny,nz)] = fmax(0.0, f_8);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z,  7,nx,ny,nz)] = fmax(0.0, f_8);
       +
       +        // Edge 9 (-x,+y): Bounce back
       +        if (x > 0 && y < ny-1)
       +            dev_f_new[grid2index(x-1,y+1,  z,  9,nx,ny,nz)] = fmax(0.0, f_9);
       +        else if (x > 0)
       +            dev_f_new[grid2index(x-1,  y,  z,  8,nx,ny,nz)] = fmax(0.0, f_9);
       +        else if (y < ny-1)
       +            dev_f_new[grid2index(  x,y+1,  z,  7,nx,ny,nz)] = fmax(0.0, f_9);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z, 10,nx,ny,nz)] = fmax(0.0, f_9);
       +
       +        // Edge 10 (+x,-y): Bounce back
       +        if (x < nx-1 && y > 0)
       +            dev_f_new[grid2index(x+1,y-1,  z, 10,nx,ny,nz)] = fmax(0.0, f_10);
       +        else if (x < nx-1)
       +            dev_f_new[grid2index(x+1,  y,  z,  8,nx,ny,nz)] = fmax(0.0, f_10);
       +        else if (y > 0)
       +            dev_f_new[grid2index(  x,y-1,  z,  7,nx,ny,nz)] = fmax(0.0, f_10);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z,  9,nx,ny,nz)] = fmax(0.0, f_10);
       +
       +        // Edge 11 (+x,+z): Bounce back
       +        if (x < nx-1 && z < nz-1)
       +            dev_f_new[grid2index(x+1,  y,z+1, 11,nx,ny,nz)] = fmax(0.0, f_11);
       +        else if (x < nx-1)
       +            dev_f_new[grid2index(x+1,  y,  z, 16,nx,ny,nz)] = fmax(0.0, f_11);
       +        else if (z < nz-1)
       +            dev_f_new[grid2index(  x,  y,z+1, 15,nx,ny,nz)] = fmax(0.0, f_11);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z, 12,nx,ny,nz)] = fmax(0.0, f_11);
       +
       +        // Edge 12 (-x,-z): Bounce back
       +        if (x > 0 && z > 0)
       +            dev_f_new[grid2index(x-1,  y,z-1, 12,nx,ny,nz)] = fmax(0.0, f_12);
       +        else if (x > 0)
       +            dev_f_new[grid2index(x-1,  y,  z, 15,nx,ny,nz)] = fmax(0.0, f_12);
       +        else if (z > 0)
       +            dev_f_new[grid2index(  x,  y,z-1, 16,nx,ny,nz)] = fmax(0.0, f_12);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z, 11,nx,ny,nz)] = fmax(0.0, f_12);
       +
       +        // Edge 13 (+y,+z): Bounce back
       +        if (y < ny-1 && z < nz-1)
       +            dev_f_new[grid2index(  x,y+1,z+1, 13,nx,ny,nz)] = fmax(0.0, f_13);
       +        else if (y < ny-1)
       +            dev_f_new[grid2index(  x,y+1,  z, 18,nx,ny,nz)] = fmax(0.0, f_13);
       +        else if (z < nz-1)
       +            dev_f_new[grid2index(  x,  y,z+1, 17,nx,ny,nz)] = fmax(0.0, f_13);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z, 14,nx,ny,nz)] = fmax(0.0, f_13);
       +
       +        // Edge 14 (-y,-z): Bounce back
       +        if (y > 0 && z > 0)
       +            dev_f_new[grid2index(  x,y-1,z-1, 14,nx,ny,nz)] = fmax(0.0, f_14);
       +        else if (y > 0)
       +            dev_f_new[grid2index(  x,y-1,  z, 17,nx,ny,nz)] = fmax(0.0, f_14);
       +        else if (z > 0)
       +            dev_f_new[grid2index(  x,  y,z-1, 18,nx,ny,nz)] = fmax(0.0, f_14);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z, 13,nx,ny,nz)] = fmax(0.0, f_14);
       +
       +        // Edge 15 (-x,+z): Bounce back
       +        if (x > 0 && z < nz-1)
       +            dev_f_new[grid2index(x-1,  y,z+1, 15,nx,ny,nz)] = fmax(0.0, f_15);
       +        else if (x > 0)
       +            dev_f_new[grid2index(x-1,  y,  z, 12,nx,ny,nz)] = fmax(0.0, f_15);
       +        else if (z < nz-1)
       +            dev_f_new[grid2index(  x,  y,z+1, 11,nx,ny,nz)] = fmax(0.0, f_15);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z, 16,nx,ny,nz)] = fmax(0.0, f_15);
       +
       +        // Edge 16 (+x,-z)
       +        if (x < nx-1 && z > 0)
       +            dev_f_new[grid2index(x+1,  y,z-1, 16,nx,ny,nz)] = fmax(0.0, f_16);
       +        else if (x < nx-1)
       +            dev_f_new[grid2index(x+1,  y,  z, 11,nx,ny,nz)] = fmax(0.0, f_16);
       +        else if (z > 0)
       +            dev_f_new[grid2index(  x,  y,z-1, 12,nx,ny,nz)] = fmax(0.0, f_16);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z, 15,nx,ny,nz)] = fmax(0.0, f_16);
       +
       +        // Edge 17 (-y,+z)
       +        if (y > 0 && z < nz-1)
       +            dev_f_new[grid2index(  x,y-1,z+1, 17,nx,ny,nz)] = fmax(0.0, f_17);
       +        else if (y > 0)
       +            dev_f_new[grid2index(  x,y-1,  z, 14,nx,ny,nz)] = fmax(0.0, f_17);
       +        else if (z < nz-1)
       +            dev_f_new[grid2index(  x,  y,z+1, 13,nx,ny,nz)] = fmax(0.0, f_17);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z, 18,nx,ny,nz)] = fmax(0.0, f_17);
       +
       +        // Edge 18 (+y,-z)
       +        if (y < ny-1 && z > 0)
       +            dev_f_new[grid2index(  x,y+1,z-1, 18,nx,ny,nz)] = fmax(0.0, f_18);
       +        else if (y < ny-1)
       +            dev_f_new[grid2index(  x,y+1,  z, 13,nx,ny,nz)] = fmax(0.0, f_18);
       +        else if (z > 0)
       +            dev_f_new[grid2index(  x,  y,z-1, 14,nx,ny,nz)] = fmax(0.0, f_18);
       +        else
       +            dev_f_new[grid2index(  x,  y,  z, 17,nx,ny,nz)] = fmax(0.0, f_18);
       +        
       +        // */
       +        
       +        /*
        
                // Face 1 (+x): Periodic
                if (x < nx-1) // not at boundary
       -            dev_f_new[grid2index( x+1,   y,   z,  1)] = fmax(0.0, f_1);
       +            dev_f_new[grid2index( x+1,   y,   z,  1,nx,ny,nz)] = fmax(0.0, f_1);
                else        // at boundary
       -            dev_f_new[grid2index(   0,   y,   z,  1)] = fmax(0.0, f_1);
       +            dev_f_new[grid2index(   0,   y,   z,  1,nx,ny,nz)] = fmax(0.0, f_1);
        
                // Face 2 (-x): Periodic
                if (x > 0)  // not at boundary
       -            dev_f_new[grid2index( x-1,   y,   z,  2)] = fmax(0.0, f_2);
       +            dev_f_new[grid2index( x-1,   y,   z,  2,nx,ny,nz)] = fmax(0.0, f_2);
                else        // at boundary
       -            dev_f_new[grid2index(nx-1,   y,   z,  2)] = fmax(0.0, f_2);
       +            dev_f_new[grid2index(nx-1,   y,   z,  2,nx,ny,nz)] = fmax(0.0, f_2);
                
                // Face 3 (+y): Periodic
                if (y < ny-1) // not at boundary
       -            dev_f_new[grid2index(   x, y+1,   z,  3)] = fmax(0.0, f_3);
       +            dev_f_new[grid2index(   x, y+1,   z,  3,nx,ny,nz)] = fmax(0.0, f_3);
                else        // at boundary
       -            dev_f_new[grid2index(   x,   0,   z,  3)] = fmax(0.0, f_3);
       +            dev_f_new[grid2index(   x,   0,   z,  3,nx,ny,nz)] = fmax(0.0, f_3);
        
                // Face 4 (-y): Periodic
                if (y > 0)  // not at boundary
       -            dev_f_new[grid2index(   x, y-1,   z,  4)] = fmax(0.0, f_4);
       +            dev_f_new[grid2index(   x, y-1,   z,  4,nx,ny,nz)] = fmax(0.0, f_4);
                else        // at boundary
       -            dev_f_new[grid2index(   x,ny-1,   z,  4)] = fmax(0.0, f_4);
       +            dev_f_new[grid2index(   x,ny-1,   z,  4,nx,ny,nz)] = fmax(0.0, f_4);
        
                // Face 5 (+z): Bounce back, free slip
                if (z < nz-1) // not at boundary
       -            dev_f_new[grid2index(   x,   y, z+1,  5)] = fmax(0.0, f_5);
       +            dev_f_new[grid2index(   x,   y, z+1,  5,nx,ny,nz)] = fmax(0.0, f_5);
                else        // at boundary
       -            dev_f_new[grid2index(   x,   y,   z,  6)] = fmax(0.0, f_5);
       +            dev_f_new[grid2index(   x,   y,   z,  6,nx,ny,nz)] = fmax(0.0, f_5);
        
                // Face 6 (-z): Bounce back, free slip
                if (z > 0)  // not at boundary
       -            dev_f_new[grid2index(   x,   y, z-1,  6)] = fmax(0.0, f_6);
       +            dev_f_new[grid2index(   x,   y, z-1,  6,nx,ny,nz)] = fmax(0.0, f_6);
                else        // at boundary
       -            dev_f_new[grid2index(   x,   y,   z,  5)] = fmax(0.0, f_6);
       -
       +            dev_f_new[grid2index(   x,   y,   z,  5,nx,ny,nz)] = fmax(0.0, f_6);
       +        
                // Edge 7 (+x,+y): Periodic
                if (x < nx-1 && y < ny-1)   // not at boundary
       -            dev_f_new[grid2index( x+1, y+1,   z,  7)] = fmax(0.0, f_7);
       +            dev_f_new[grid2index( x+1, y+1,   z,  7,nx,ny,nz)] = fmax(0.0, f_7);
                else if (x < nx-1)  // at +y boundary
       -            dev_f_new[grid2index( x+1,   0,   z,  7)] = fmax(0.0, f_7);
       +            dev_f_new[grid2index( x+1,   0,   z,  7,nx,ny,nz)] = fmax(0.0, f_7);
                else if (y < ny-1)  // at +x boundary
       -            dev_f_new[grid2index(   0, y+1,   z,  7)] = fmax(0.0, f_7);
       +            dev_f_new[grid2index(   0, y+1,   z,  7,nx,ny,nz)] = fmax(0.0, f_7);
                else    // at +x+y boundary
       -            dev_f_new[grid2index(   0,   0,   z,  7)] = fmax(0.0, f_7);
       +            dev_f_new[grid2index(   0,   0,   z,  7,nx,ny,nz)] = fmax(0.0, f_7);
        
                // Edge 8 (-x,-y): Periodic
                if (x > 0 && y > 0) // not at boundary
       -            dev_f_new[grid2index( x-1, y-1,   z,  8)] = fmax(0.0, f_8);
       +            dev_f_new[grid2index( x-1, y-1,   z,  8,nx,ny,nz)] = fmax(0.0, f_8);
                else if (x > 0) // at -y boundary
       -            dev_f_new[grid2index( x-1,ny-1,   z,  8)] = fmax(0.0, f_8);
       +            dev_f_new[grid2index( x-1,ny-1,   z,  8,nx,ny,nz)] = fmax(0.0, f_8);
                else if (y > 0) // at -x boundary
       -            dev_f_new[grid2index(nx-1, y-1,   z,  8)] = fmax(0.0, f_8);
       +            dev_f_new[grid2index(nx-1, y-1,   z,  8,nx,ny,nz)] = fmax(0.0, f_8);
                else    // at -x-y boundary
       -            dev_f_new[grid2index(nx-1,ny-1,   z,  8)] = fmax(0.0, f_8);
       +            dev_f_new[grid2index(nx-1,ny-1,   z,  8,nx,ny,nz)] = fmax(0.0, f_8);
        
                // Edge 9 (-x,+y): Periodic
                if (x > 0 && y < ny-1)  // not at boundary
       -            dev_f_new[grid2index( x-1, y+1,   z,  9)] = fmax(0.0, f_9);
       +            dev_f_new[grid2index( x-1, y+1,   z,  9,nx,ny,nz)] = fmax(0.0, f_9);
                else if (x > 0)     // at +y boundary
       -            dev_f_new[grid2index( x-1,   0,   z,  9)] = fmax(0.0, f_9);
       +            dev_f_new[grid2index( x-1,   0,   z,  9,nx,ny,nz)] = fmax(0.0, f_9);
                else if (y < ny-1)  // at -x boundary
       -            dev_f_new[grid2index(nx-1, y+1,   z,  9)] = fmax(0.0, f_9);
       +            dev_f_new[grid2index(nx-1, y+1,   z,  9,nx,ny,nz)] = fmax(0.0, f_9);
                else    // at -x+y boundary
       -            dev_f_new[grid2index(nx-1,   0,   z,  9)] = fmax(0.0, f_9);
       +            dev_f_new[grid2index(nx-1,   0,   z,  9,nx,ny,nz)] = fmax(0.0, f_9);
        
                // Edge 10 (+x,-y): Periodic
                if (x < nx-1 && y > 0)  // not at boundary
       -            dev_f_new[grid2index( x+1, y-1,   z, 10)] = fmax(0.0, f_10);
       +            dev_f_new[grid2index( x+1, y-1,   z, 10,nx,ny,nz)] = fmax(0.0, f_10);
                else if (x < nx-1)  // at -y boundary
       -            dev_f_new[grid2index( x+1,ny-1,   z, 10)] = fmax(0.0, f_10);
       +            dev_f_new[grid2index( x+1,ny-1,   z, 10,nx,ny,nz)] = fmax(0.0, f_10);
                else if (y > 0)     // at +x boundary
       -            dev_f_new[grid2index(   0, y-1,   z, 10)] = fmax(0.0, f_10);
       +            dev_f_new[grid2index(   0, y-1,   z, 10,nx,ny,nz)] = fmax(0.0, f_10);
                else    // at +x-y boundary
       -            dev_f_new[grid2index(   0,ny-1,   z, 10)] = fmax(0.0, f_10);
       +            dev_f_new[grid2index(   0,ny-1,   z, 10,nx,ny,nz)] = fmax(0.0, f_10);
        
                // Edge 11 (+x,+z): Periodic & bounce-back (free slip)
                if (x < nx-1 && z < nz-1)   // not at boundary
       -            dev_f_new[grid2index( x+1,   y, z+1, 11)] = fmax(0.0, f_11);
       +            dev_f_new[grid2index( x+1,   y, z+1, 11,nx,ny,nz)] = fmax(0.0, f_11);
                else if (x < nx-1)  // at +z boundary
       -            dev_f_new[grid2index( x+1,   y,   0, 12)] = fmax(0.0, f_11);
       +            dev_f_new[grid2index( x+1,   y,   0, 12,nx,ny,nz)] = fmax(0.0, f_11);
                else if (z < nz-1)  // at +x boundary
       -            dev_f_new[grid2index(   0,   y, z+1, 11)] = fmax(0.0, f_11);
       +            dev_f_new[grid2index(   0,   y, z+1, 11,nx,ny,nz)] = fmax(0.0, f_11);
                else    // at +x+z boundary
       -            dev_f_new[grid2index(   0,   y,   0, 12)] = fmax(0.0, f_11);
       +            dev_f_new[grid2index(   0,   y,   0, 12,nx,ny,nz)] = fmax(0.0, f_11);
        
                // Edge 12 (-x,-z): Periodic & bounce back (free slip)
                if (x > 0 && z > 0) // not at boundary
       -            dev_f_new[grid2index( x-1,   y, z-1, 12)] = fmax(0.0, f_12);
       +            dev_f_new[grid2index( x-1,   y, z-1, 12,nx,ny,nz)] = fmax(0.0, f_12);
                else if (x > 0) // at -z boundary
       -            dev_f_new[grid2index( x-1,   y,nz-1, 11)] = fmax(0.0, f_12);
       +            dev_f_new[grid2index( x-1,   y,nz-1, 11,nx,ny,nz)] = fmax(0.0, f_12);
                else if (z > 0) // at -x boundary
       -            dev_f_new[grid2index(nx-1,   y, z-1, 12)] = fmax(0.0, f_12);
       +            dev_f_new[grid2index(nx-1,   y, z-1, 12,nx,ny,nz)] = fmax(0.0, f_12);
                else    // at -x-z boundary
       -            dev_f_new[grid2index(nx-1,   y,nz-1, 11)] = fmax(0.0, f_12);
       +            dev_f_new[grid2index(nx-1,   y,nz-1, 11,nx,ny,nz)] = fmax(0.0, f_12);
        
                // Edge 13 (+y,+z): Periodic & bounce-back (free slip)
                if (y < ny-1 && z < nz-1)   // not at boundary
       -            dev_f_new[grid2index(   x, y+1, z+1, 13)] = fmax(0.0, f_13);
       +            dev_f_new[grid2index(   x, y+1, z+1, 13,nx,ny,nz)] = fmax(0.0, f_13);
                else if (y < ny-1)  // at +z boundary
       -            dev_f_new[grid2index(   x, y+1,   0, 14)] = fmax(0.0, f_13);
       +            dev_f_new[grid2index(   x, y+1,   0, 14,nx,ny,nz)] = fmax(0.0, f_13);
                else if (z < nz-1)  // at +y boundary
       -            dev_f_new[grid2index(   x,   0, z+1, 13)] = fmax(0.0, f_13);
       +            dev_f_new[grid2index(   x,   0, z+1, 13,nx,ny,nz)] = fmax(0.0, f_13);
                else    // at +y+z boundary
       -            dev_f_new[grid2index(   x,   0,   0, 14)] = fmax(0.0, f_13);
       +            dev_f_new[grid2index(   x,   0,   0, 14,nx,ny,nz)] = fmax(0.0, f_13);
        
                // Edge 14 (-y,-z): Periodic & bounce-back (free slip)
                if (y > 0 && z > 0) // not at boundary
       -            dev_f_new[grid2index(   x, y-1, z-1, 14)] = fmax(0.0, f_14);
       +            dev_f_new[grid2index(   x, y-1, z-1, 14,nx,ny,nz)] = fmax(0.0, f_14);
                else if (y > 0) // at -z boundary
       -            dev_f_new[grid2index(   x, y-1,nz-1, 13)] = fmax(0.0, f_14);
       +            dev_f_new[grid2index(   x, y-1,nz-1, 13,nx,ny,nz)] = fmax(0.0, f_14);
                else if (z > 0) // at -y boundary
       -            dev_f_new[grid2index(   x,ny-1, z-1, 14)] = fmax(0.0, f_14);
       +            dev_f_new[grid2index(   x,ny-1, z-1, 14,nx,ny,nz)] = fmax(0.0, f_14);
                else    // at -y-z boundary
       -            dev_f_new[grid2index(   x,ny-1,nz-1, 13)] = fmax(0.0, f_14);
       +            dev_f_new[grid2index(   x,ny-1,nz-1, 13,nx,ny,nz)] = fmax(0.0, f_14);
        
                // Edge 15 (-x,+z): Periodic & bounce-back (free slip)
                if (x > 0 && z < nz-1)  // not at boundary
       -            dev_f_new[grid2index( x-1,   y, z+1, 15)] = fmax(0.0, f_15);
       +            dev_f_new[grid2index( x-1,   y, z+1, 15,nx,ny,nz)] = fmax(0.0, f_15);
                else if (x > 0)     // at +z boundary
       -            dev_f_new[grid2index( x-1,   y,   0, 16)] = fmax(0.0, f_15);
       +            dev_f_new[grid2index( x-1,   y,   0, 16,nx,ny,nz)] = fmax(0.0, f_15);
                else if (z < nz-1)  // at -x boundary
       -            dev_f_new[grid2index(nx-1,   y, z+1, 15)] = fmax(0.0, f_15);
       +            dev_f_new[grid2index(nx-1,   y, z+1, 15,nx,ny,nz)] = fmax(0.0, f_15);
                else    // at -x+z boundary
       -            dev_f_new[grid2index(nx-1,   y,   0, 16)] = fmax(0.0, f_15);
       +            dev_f_new[grid2index(nx-1,   y,   0, 16,nx,ny,nz)] = fmax(0.0, f_15);
        
                // Edge 16 (+x,-z): Periodic & bounce-back (free slip)
                if (x < nx-1 && z > 0)  // not at boundary
       -            dev_f_new[grid2index( x+1,   y, z-1, 16)] = fmax(0.0, f_16);
       +            dev_f_new[grid2index( x+1,   y, z-1, 16,nx,ny,nz)] = fmax(0.0, f_16);
                else if (x < nx-1)  // at -z boundary
       -            dev_f_new[grid2index( x+1,   y,nz-1, 15)] = fmax(0.0, f_16);
       +            dev_f_new[grid2index( x+1,   y,nz-1, 15,nx,ny,nz)] = fmax(0.0, f_16);
                else if (z > 0)     // at +x boundary
       -            dev_f_new[grid2index(   0,   y, z-1, 16)] = fmax(0.0, f_16);
       +            dev_f_new[grid2index(   0,   y, z-1, 16,nx,ny,nz)] = fmax(0.0, f_16);
                else    // at +x-z boundary
       -            dev_f_new[grid2index(   0,   y,nz-1, 15)] = fmax(0.0, f_16);
       +            dev_f_new[grid2index(   0,   y,nz-1, 15,nx,ny,nz)] = fmax(0.0, f_16);
        
                // Edge 17 (-y,+z): Periodic & bounce-back (free slip)
                if (y > 0 && z < nz-1)  // not at boundary
       -            dev_f_new[grid2index(   x, y-1, z+1, 17)] = fmax(0.0, f_17);
       +            dev_f_new[grid2index(   x, y-1, z+1, 17,nx,ny,nz)] = fmax(0.0, f_17);
                else if (y > 0)     // at +z boundary
       -            dev_f_new[grid2index(   x, y-1,   0, 18)] = fmax(0.0, f_17);
       +            dev_f_new[grid2index(   x, y-1,   0, 18,nx,ny,nz)] = fmax(0.0, f_17);
                else if (z < nz-1)  // at -y boundary
       -            dev_f_new[grid2index(   x,ny-1, z+1, 17)] = fmax(0.0, f_17);
       +            dev_f_new[grid2index(   x,ny-1, z+1, 17,nx,ny,nz)] = fmax(0.0, f_17);
                else    // at -y+z boundary
       -            dev_f_new[grid2index(   x,ny-1,   0, 18)] = fmax(0.0, f_17);
       +            dev_f_new[grid2index(   x,ny-1,   0, 18,nx,ny,nz)] = fmax(0.0, f_17);
        
                // Edge 18 (+y,-z): Periodic & bounce-back (free slip)
                if (y < ny-1 && z > 0)    // not at boundary
       -            dev_f_new[grid2index(   x, y+1, z-1, 18)] = fmax(0.0, f_18);
       +            dev_f_new[grid2index(   x, y+1, z-1, 18,nx,ny,nz)] = fmax(0.0, f_18);
                else if (y < ny-1)  // at -z boundary
       -            dev_f_new[grid2index(   x, y+1,   0, 17)] = fmax(0.0, f_18);
       +            dev_f_new[grid2index(   x, y+1,   0, 17,nx,ny,nz)] = fmax(0.0, f_18);
                else if (z > 0)     // at +y boundary
       -            dev_f_new[grid2index(   x,   0, z-1, 18)] = fmax(0.0, f_18);
       +            dev_f_new[grid2index(   x,   0, z-1, 18,nx,ny,nz)] = fmax(0.0, f_18);
                else    // at +y-z boundary
       -            dev_f_new[grid2index(   x,   0,   0, 17)] = fmax(0.0, f_18);
       +            dev_f_new[grid2index(   x,   0,   0, 17,nx,ny,nz)] = fmax(0.0, f_18);
       +        // */
       +        
        
            }
       +#ifndef LBM_GPU
       +            }}}
       +#endif
        }
        
        #endif
   DIR diff --git a/src/porousflow.cpp b/src/porousflow.cpp
       t@@ -64,7 +64,7 @@ int main(const int argc, const char *argv[])
                    // mem
                    DEM dem(argvi, verbose, 0, dry, 0, 0);
        
       -            // Otherwise, start iterating through time
       +            // Start iterating through time
                    dem.startDarcy();
        
        
   DIR diff --git a/src/sphere.cpp b/src/sphere.cpp
       t@@ -66,6 +66,7 @@ DEM::DEM(const std::string inputbin,
                transferToGlobalDeviceMemory();
            }
        
       +
        }
        
        // Destructor: Liberates dynamically allocated host memory
   DIR diff --git a/src/sphere.h b/src/sphere.h
       t@@ -4,6 +4,7 @@
        
        #include <vector>
        
       +//#include "eigen-nvcc/Eigen/Core"
        
        #include "datatypes.h"
        
       t@@ -146,6 +147,13 @@ class DEM {
                // Darcy-flow values
                int d_nx, d_ny, d_nz;     // Number of cells in each dim
                Float d_dx, d_dy, d_dz;   // Cell length in each dim
       +        Float* d_P;   // Cell hydraulic pressures
       +        Float3* d_dP; // Cell spatial gradient in pressures
       +        Float* d_K;   // Cell hydraulic conductivities (anisotropic)
       +        Float* d_S;   // Cell hydraulic storativity
       +        Float* d_W;   // Cell hydraulic recharge
       +        Float mu;     // Fluid viscosity
       +        
        
        
            public:
       t@@ -212,12 +220,44 @@ class DEM {
                        const double lower_cutoff = 0.0,
                        const double upper_cutoff = 1.0e9);
        
       +        
       +        ///// Darcy flow functions
       +
       +        // Memory allocation and destruction
       +        void initDarcyMem();
       +        void freeDarcyMem();
       +
       +        // Set some values for the Darcy parameters
       +        void initDarcyVals();
       +
       +        // Get linear (1D) index from 3D coordinate
       +        unsigned int idx(
       +                const unsigned int x,
       +                const unsigned int y,
       +                const unsigned int z);
       +
       +        // Get minimum value in 1D array 
       +        Float minVal3dArr(Float* arr);
       +
       +        // Finds central difference gradients
       +        void findDarcyGradients();
       +
       +        // Set gradient to zero at grid edges
       +        void setDarcyBCNeumannZero();
       +
       +        // Perform a single time step, explicit integration
       +        void explDarcyStep(const Float dt);
       +
                // Calculate Darcy fluid flow through material
                void startDarcy(
                        const Float cellsizemultiplier = 1.0);
       -};
       -
        
       +        // Print Darcy arrays to file stream
       +        void printDarcyArray(FILE* stream, Float* arr);
       +        void printDarcyArray(FILE* stream, Float* arr, std::string desc);
       +        void printDarcyArray3(FILE* stream, Float3* arr);
       +        void printDarcyArray3(FILE* stream, Float3* arr, std::string desc);
       +};
        
        #endif
        // vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4