mx1.adamsgaard.dk/1/src/sphere/file/src/darcy.cuh.gph

       tdarcy.cuh - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
  HTML git clone git://src.adamsgaard.dk/sphere
   DIR Log
   DIR Files
   DIR Refs
   DIR LICENSE
       ---
       tdarcy.cuh (70180B)
       ---
            1 // darcy.cuh
            2 // CUDA implementation of Darcy porous flow
            3 
            4 #include <iostream>
            5 #include <cuda.h>
            6 //#include <cutil_math.h>
            7 #include <helper_math.h>
            8 
            9 #include "vector_arithmetic.h"  // for arbitrary precision vectors
           10 #include "sphere.h"
           11 #include "datatypes.h"
           12 #include "utility.h"
           13 #include "constants.cuh"
           14 #include "debug.h"
           15 
           16 // Initialize memory
           17 void DEM::initDarcyMemDev(void)
           18 {
           19     // size of scalar field
           20     unsigned int memSizeF = sizeof(Float)*darcyCells();
           21 
           22     cudaMalloc((void**)&dev_darcy_dp_expl, memSizeF); // Expl. pressure change
           23     cudaMalloc((void**)&dev_darcy_p_old, memSizeF); // old pressure
           24     cudaMalloc((void**)&dev_darcy_p, memSizeF);     // hydraulic pressure
           25     cudaMalloc((void**)&dev_darcy_p_new, memSizeF); // updated pressure
           26     cudaMalloc((void**)&dev_darcy_v, memSizeF*3);   // cell hydraulic velocity
           27     cudaMalloc((void**)&dev_darcy_vp_avg, memSizeF*3); // avg. particle velocity
           28     cudaMalloc((void**)&dev_darcy_phi, memSizeF);   // cell porosity
           29     cudaMalloc((void**)&dev_darcy_dphi, memSizeF);  // cell porosity change
           30     cudaMalloc((void**)&dev_darcy_norm, memSizeF);  // normalized residual
           31     cudaMalloc((void**)&dev_darcy_f_p, sizeof(Float4)*np); // pressure force
           32     cudaMalloc((void**)&dev_darcy_k, memSizeF);        // hydraulic permeability
           33     cudaMalloc((void**)&dev_darcy_grad_k, memSizeF*3); // grad(permeability)
           34     cudaMalloc((void**)&dev_darcy_div_v_p, memSizeF);  // divergence(v_p)
           35     cudaMalloc((void**)&dev_darcy_grad_p, memSizeF*3); // grad(pressure)
           36     cudaMalloc((void**)&dev_darcy_p_constant,
           37             sizeof(int)*darcyCells()); // grad(pressure)
           38 
           39     checkForCudaErrors("End of initDarcyMemDev");
           40 }
           41 
           42 // Free memory
           43 void DEM::freeDarcyMemDev()
           44 {
           45     cudaFree(dev_darcy_dp_expl);
           46     cudaFree(dev_darcy_p_old);
           47     cudaFree(dev_darcy_p);
           48     cudaFree(dev_darcy_p_new);
           49     cudaFree(dev_darcy_v);
           50     cudaFree(dev_darcy_vp_avg);
           51     cudaFree(dev_darcy_phi);
           52     cudaFree(dev_darcy_dphi);
           53     cudaFree(dev_darcy_norm);
           54     cudaFree(dev_darcy_f_p);
           55     cudaFree(dev_darcy_k);
           56     cudaFree(dev_darcy_grad_k);
           57     cudaFree(dev_darcy_div_v_p);
           58     cudaFree(dev_darcy_grad_p);
           59     cudaFree(dev_darcy_p_constant);
           60 }
           61 
           62 // Transfer to device
           63 void DEM::transferDarcyToGlobalDeviceMemory(int statusmsg)
           64 {
           65     checkForCudaErrors("Before attempting cudaMemcpy in "
           66             "transferDarcyToGlobalDeviceMemory");
           67 
           68     //if (verbose == 1 && statusmsg == 1)
           69     //std::cout << "  Transfering fluid data to the device:           ";
           70 
           71     // memory size for a scalar field
           72     unsigned int memSizeF  = sizeof(Float)*darcyCells();
           73 
           74     //writeNSarray(ns.p, "ns.p.txt");
           75 
           76     cudaMemcpy(dev_darcy_p, darcy.p, memSizeF, cudaMemcpyHostToDevice);
           77     checkForCudaErrors("transferDarcytoGlobalDeviceMemory after first "
           78             "cudaMemcpy");
           79     cudaMemcpy(dev_darcy_v, darcy.v, memSizeF*3, cudaMemcpyHostToDevice);
           80     cudaMemcpy(dev_darcy_phi, darcy.phi, memSizeF, cudaMemcpyHostToDevice);
           81     cudaMemcpy(dev_darcy_dphi, darcy.dphi, memSizeF, cudaMemcpyHostToDevice);
           82     cudaMemcpy(dev_darcy_f_p, darcy.f_p, sizeof(Float4)*np,
           83             cudaMemcpyHostToDevice);
           84     cudaMemcpy(dev_darcy_p_constant, darcy.p_constant, sizeof(int)*darcyCells(),
           85             cudaMemcpyHostToDevice);
           86 
           87     checkForCudaErrors("End of transferDarcyToGlobalDeviceMemory");
           88     //if (verbose == 1 && statusmsg == 1)
           89     //std::cout << "Done" << std::endl;
           90 }
           91 
           92 // Transfer from device
           93 void DEM::transferDarcyFromGlobalDeviceMemory(int statusmsg)
           94 {
           95     if (verbose == 1 && statusmsg == 1)
           96         std::cout << "  Transfering fluid data from the device:         ";
           97 
           98     // memory size for a scalar field
           99     unsigned int memSizeF  = sizeof(Float)*darcyCells();
          100 
          101     cudaMemcpy(darcy.p, dev_darcy_p, memSizeF, cudaMemcpyDeviceToHost);
          102     checkForCudaErrors("In transferDarcyFromGlobalDeviceMemory, dev_darcy_p", 0);
          103     cudaMemcpy(darcy.v, dev_darcy_v, memSizeF*3, cudaMemcpyDeviceToHost);
          104     cudaMemcpy(darcy.phi, dev_darcy_phi, memSizeF, cudaMemcpyDeviceToHost);
          105     cudaMemcpy(darcy.dphi, dev_darcy_dphi, memSizeF, cudaMemcpyDeviceToHost);
          106     cudaMemcpy(darcy.f_p, dev_darcy_f_p, sizeof(Float4)*np,
          107             cudaMemcpyDeviceToHost);
          108     cudaMemcpy(darcy.k, dev_darcy_k, memSizeF, cudaMemcpyDeviceToHost);
          109     cudaMemcpy(darcy.p_constant, dev_darcy_p_constant, sizeof(int)*darcyCells(),
          110             cudaMemcpyDeviceToHost);
          111 
          112     checkForCudaErrors("End of transferDarcyFromGlobalDeviceMemory", 0);
          113     if (verbose == 1 && statusmsg == 1)
          114         std::cout << "Done" << std::endl;
          115 }
          116 
          117 // Transfer the normalized residuals from device to host
          118 void DEM::transferDarcyNormFromGlobalDeviceMemory()
          119 {
          120     cudaMemcpy(darcy.norm, dev_darcy_norm, sizeof(Float)*darcyCells(),
          121             cudaMemcpyDeviceToHost);
          122     checkForCudaErrors("End of transferDarcyNormFromGlobalDeviceMemory");
          123 }
          124 
          125 // Transfer the pressures from device to host
          126 void DEM::transferDarcyPressuresFromGlobalDeviceMemory()
          127 {
          128     cudaMemcpy(darcy.p, dev_darcy_p, sizeof(Float)*darcyCells(),
          129             cudaMemcpyDeviceToHost);
          130     checkForCudaErrors("End of transferDarcyNormFromGlobalDeviceMemory");
          131 }
          132 
          133 // Get linear index from 3D grid position
          134 __inline__ __device__ unsigned int d_idx(
          135         const int x, const int y, const int z)
          136 {
          137     // without ghost nodes
          138     //return x + dev_grid.num[0]*y + dev_grid.num[0]*dev_grid.num[1]*z;
          139 
          140     // with ghost nodes
          141     // the ghost nodes are placed at x,y,z = -1 and WIDTH
          142     return (x+1) + (devC_grid.num[0]+2)*(y+1) +
          143         (devC_grid.num[0]+2)*(devC_grid.num[1]+2)*(z+1);
          144 }
          145 
          146 // Get linear index of velocity node from 3D grid position in staggered grid
          147 __inline__ __device__ unsigned int d_vidx(
          148         const int x, const int y, const int z)
          149 {
          150     // with ghost nodes
          151     // the ghost nodes are placed at x,y,z = -1 and WIDTH+1
          152     return (x+1) + (devC_grid.num[0]+3)*(y+1)
          153         + (devC_grid.num[0]+3)*(devC_grid.num[1]+3)*(z+1);
          154 }
          155 
          156 // The normalized residuals are given an initial value of 0, since the values at
          157 // the Dirichlet boundaries aren't written during the iterations.
          158 __global__ void setDarcyNormZero(
          159         Float* __restrict__ dev_darcy_norm)
          160 {
          161     // 3D thread index
          162     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
          163     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
          164     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
          165 
          166     // check that we are not outside the fluid grid
          167     if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2])
          168         dev_darcy_norm[d_idx(x,y,z)] = 0.0;
          169 }
          170 
          171 // Set an array of scalars to 0.0 inside devC_grid
          172     template<typename T>
          173 __global__ void setDarcyZeros(T* __restrict__ dev_scalarfield)
          174 {
          175     // 3D thread index
          176     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
          177     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
          178     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
          179 
          180     // check that we are not outside the fluid grid
          181     if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2])
          182         dev_scalarfield[d_idx(x,y,z)] = 0.0;
          183 }
          184 
          185 
          186 // Update a field in the ghost nodes from their parent cell values. The edge
          187 // (diagonal) cells are not written since they are not read. Launch this kernel
          188 // for all cells in the grid using
          189 // setDarcyGhostNodes<datatype><<<.. , ..>>>( .. );
          190     template<typename T>
          191 __global__ void setDarcyGhostNodes(
          192         T* __restrict__ dev_scalarfield,
          193         const int bc_xn,
          194         const int bc_xp,
          195         const int bc_yn,
          196         const int bc_yp,
          197         const int bc_bot,
          198         const int bc_top)
          199 {
          200     // 3D thread index
          201     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
          202     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
          203     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
          204 
          205     // Grid dimensions
          206     const unsigned int nx = devC_grid.num[0];
          207     const unsigned int ny = devC_grid.num[1];
          208     const unsigned int nz = devC_grid.num[2];
          209 
          210     // check that we are not outside the fluid grid
          211     if (x < nx && y < ny && z < nz) {
          212 
          213         const T val = dev_scalarfield[d_idx(x,y,z)];
          214 
          215         // x
          216         if (x == 0 && bc_xn == 0)
          217             dev_scalarfield[idx(-1,y,z)] = val;     // Dirichlet
          218         if (x == 1 && bc_xn == 1)
          219             dev_scalarfield[idx(-1,y,z)] = val;     // Neumann
          220         if (x == 0 && bc_xn == 2)
          221             dev_scalarfield[idx(nx,y,z)] = val;     // Periodic -y
          222 
          223         if (x == nx-1 && bc_xp == 0)
          224             dev_scalarfield[idx(nx,y,z)] = val;     // Dirichlet
          225         if (x == nx-2 && bc_xp == 1)
          226             dev_scalarfield[idx(nx,y,z)] = val;     // Neumann
          227         if (x == nx-1 && bc_xp == 2)
          228             dev_scalarfield[idx(-1,y,z)] = val;     // Periodic +x
          229 
          230         // y
          231         if (y == 0 && bc_yn == 0)
          232             dev_scalarfield[idx(x,-1,z)] = val;     // Dirichlet
          233         if (y == 1 && bc_yn == 1)
          234             dev_scalarfield[idx(x,-1,z)] = val;     // Neumann
          235         if (y == 0 && bc_yn == 2)
          236             dev_scalarfield[idx(x,ny,z)] = val;     // Periodic -y
          237 
          238         if (y == ny-1 && bc_yp == 0)
          239             dev_scalarfield[idx(x,ny,z)] = val;     // Dirichlet
          240         if (y == ny-2 && bc_yp == 1)
          241             dev_scalarfield[idx(x,ny,z)] = val;     // Neumann
          242         if (y == ny-1 && bc_yp == 2)
          243             dev_scalarfield[idx(x,-1,z)] = val;     // Periodic +y
          244 
          245         // z
          246         if (z == 0 && bc_bot == 0)
          247             dev_scalarfield[idx(x,y,-1)] = val;     // Dirichlet
          248         if (z == 1 && bc_bot == 1)
          249             dev_scalarfield[idx(x,y,-1)] = val;     // Neumann
          250         if (z == 0 && bc_bot == 2)
          251             dev_scalarfield[idx(x,y,nz)] = val;     // Periodic -z
          252 
          253         if (z == nz-1 && bc_top == 0)
          254             dev_scalarfield[idx(x,y,nz)] = val;     // Dirichlet
          255         if (z == nz-2 && bc_top == 1)
          256             dev_scalarfield[idx(x,y,nz)] = val;     // Neumann
          257         if (z == nz-1 && bc_top == 2)
          258             dev_scalarfield[idx(x,y,-1)] = val;     // Periodic +z
          259     }
          260 }
          261 
          262 // Update a field in the ghost nodes from their parent cell values. The edge
          263 // (diagonal) cells are not written since they are not read. Launch this kernel
          264 // for all cells in the grid using
          265 // setDarcyGhostNodes<datatype><<<.. , ..>>>( .. );
          266     template<typename T>
          267 __global__ void setDarcyGhostNodesFlux(
          268         T* __restrict__ dev_scalarfield, // out
          269         const int bc_bot, // in
          270         const int bc_top, // in
          271         const Float bc_bot_flux, // in
          272         const Float bc_top_flux, // in
          273         const Float* __restrict__ dev_darcy_k, // in
          274         const Float mu) // in
          275 {
          276     // 3D thread index
          277     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
          278     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
          279     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
          280 
          281     // Grid dimensions
          282     const unsigned int nx = devC_grid.num[0];
          283     const unsigned int ny = devC_grid.num[1];
          284     const unsigned int nz = devC_grid.num[2];
          285 
          286     // check that we are not outside the fluid grid
          287     if (x < nx && y < ny && z < nz && (bc_bot == 4 || bc_top == 4)) {
          288 
          289         const T p = dev_scalarfield[d_idx(x,y,z)];
          290         const Float k = dev_darcy_k[d_idx(x,y,z)];
          291         const Float dz = devC_grid.L[2]/nz;
          292 
          293         Float q_z = 0.;
          294         if (z == 0)
          295             q_z = bc_bot_flux;
          296         else if (z == nz-1)
          297             q_z = bc_top_flux;
          298 
          299         const Float p_ghost = -mu/k*q_z * dz + p;
          300 
          301         // z
          302         if (z == 0 && bc_bot == 4)
          303             dev_scalarfield[idx(x,y,-1)] = p_ghost;
          304 
          305         if (z == nz-1 && bc_top == 4)
          306             dev_scalarfield[idx(x,y,nz)] = p_ghost;
          307     }
          308 }
          309 
          310 // Return the volume of a sphere with radius r
          311 __forceinline__ __device__ Float sphereVolume(const Float r)
          312 {
          313     return 4.0/3.0*M_PI*pow(r, 3);
          314 }
          315 
          316 __device__ Float3 abs(const Float3 v)
          317 {
          318     return MAKE_FLOAT3(abs(v.x), abs(v.y), abs(v.z));
          319 }
          320 
          321 
          322 // Returns a weighting factor based on particle position and fluid center
          323 // position
          324 __device__ Float weight(
          325         const Float3 x_p, // in: Particle center position
          326         const Float3 x_f, // in: Fluid pressure node position
          327         const Float  dx,  // in: Cell spacing, x
          328         const Float  dy,  // in: Cell spacing, y
          329         const Float  dz)  // in: Cell spacing, z
          330 {
          331     const Float3 dist = abs(x_p - x_f);
          332     if (dist.x < dx && dist.y < dy && dist.z < dz)
          333         return (1.0 - dist.x/dx)*(1.0 - dist.y/dy)*(1.0 - dist.z/dz);
          334     else
          335         return 0.0;
          336 }
          337 
          338 // Returns a weighting factor based on particle position and fluid center
          339 // position
          340 __device__ Float weightDist(
          341         const Float3 x,   // in: Vector between cell and particle center
          342         const Float  dx,  // in: Cell spacing, x
          343         const Float  dy,  // in: Cell spacing, y
          344         const Float  dz)  // in: Cell spacing, z
          345 {
          346     const Float3 dist = abs(x);
          347     if (dist.x < dx && dist.y < dy && dist.z < dz)
          348         return (1.0 - dist.x/dx)*(1.0 - dist.y/dy)*(1.0 - dist.z/dz);
          349     else
          350         return 0.0;
          351 }
          352 
          353 // Find the porosity in each cell on the base of a bilinear weighing scheme,
          354 // centered at the cell center.
          355 __global__ void findDarcyPorositiesLinear(
          356         const unsigned int* __restrict__ dev_cellStart,   // in
          357         const unsigned int* __restrict__ dev_cellEnd,     // in
          358         const Float4* __restrict__ dev_x_sorted,          // in
          359         const Float4* __restrict__ dev_vel_sorted,        // in
          360         const unsigned int iteration,                     // in
          361         const unsigned int ndem,                          // in
          362         const unsigned int np,                            // in
          363         const Float c_phi,                                // in
          364         Float*  __restrict__ dev_darcy_phi,               // in + out
          365         Float*  __restrict__ dev_darcy_dphi,              // in + out
          366         Float*  __restrict__ dev_darcy_div_v_p,           // out
          367         Float3* __restrict__ dev_darcy_vp_avg)            // out
          368 {
          369     // 3D thread index
          370     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
          371     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
          372     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
          373 
          374     // Grid dimensions
          375     const unsigned int nx = devC_grid.num[0];
          376     const unsigned int ny = devC_grid.num[1];
          377     const unsigned int nz = devC_grid.num[2];
          378 
          379     // Cell dimensions
          380     const Float dx = devC_grid.L[0]/nx;
          381     const Float dy = devC_grid.L[1]/ny;
          382     const Float dz = devC_grid.L[2]/nz;
          383 
          384     Float solid_volume = 0.0;
          385     Float solid_volume_new = 0.0;
          386     Float4 xr;  // particle pos. and radius
          387 
          388     // check that we are not outside the fluid grid
          389     if (x < nx && y < ny && z < nz) {
          390 
          391         if (np > 0) {
          392 
          393             // Cell center position
          394             const Float3 X = MAKE_FLOAT3(
          395                     x*dx + 0.5*dx,
          396                     y*dy + 0.5*dy,
          397                     z*dz + 0.5*dz);
          398 
          399             //Float d, r;
          400             Float s, vol_p;
          401             Float phi = 1.00;
          402             Float4 v;
          403             Float3 vp_avg_num   = MAKE_FLOAT3(0.0, 0.0, 0.0);
          404             Float  vp_avg_denum = 0.0;
          405 
          406             // Read old porosity
          407             Float phi_0 = dev_darcy_phi[d_idx(x,y,z)];
          408 
          409             // The cell 3d index
          410             const int3 gridPos = make_int3((int)x,(int)y,(int)z);
          411 
          412             // The neighbor cell 3d index
          413             int3 targetCell;
          414 
          415             // The distance modifier for particles across periodic boundaries
          416             Float3 dist, distmod;
          417 
          418             unsigned int cellID, startIdx, endIdx, i;
          419 
          420             // Iterate over 27 neighbor cells, R = cell width
          421             for (int z_dim=-1; z_dim<2; ++z_dim) { // z-axis
          422                 for (int y_dim=-1; y_dim<2; ++y_dim) { // y-axis
          423                     for (int x_dim=-1; x_dim<2; ++x_dim) { // x-axis
          424 
          425                         // Index of neighbor cell this iteration is looking at
          426                         targetCell = gridPos + make_int3(x_dim, y_dim, z_dim);
          427 
          428                         // Do not artifically enhance porosity at lower boundary
          429                         if (targetCell.z == -1)
          430                             targetCell.z = 1;
          431 
          432                         // Mirror particle grid at frictionless boundaries
          433                         if (devC_grid.periodic == 2) {
          434                             if (targetCell.y == -1) {
          435                                 targetCell.y = 1;
          436                             }
          437                             if (targetCell.y == devC_grid.num[1]) {
          438                                 targetCell.y = devC_grid.num[1] - 2;
          439                             }
          440                         }
          441 
          442                         // Get distance modifier for interparticle
          443                         // vector, if it crosses a periodic boundary
          444                         distmod = MAKE_FLOAT3(0.0, 0.0, 0.0);
          445                         if (findDistMod(&targetCell, &distmod) != -1) {
          446 
          447                             // Calculate linear cell ID
          448                             cellID = targetCell.x
          449                                 + targetCell.y * devC_grid.num[0]
          450                                 + (devC_grid.num[0] * devC_grid.num[1])
          451                                 * targetCell.z;
          452 
          453                             // Lowest particle index in cell
          454                             startIdx = dev_cellStart[cellID];
          455 
          456                             // Make sure cell is not empty
          457                             if (startIdx != 0xffffffff) {
          458 
          459                                 // Highest particle index in cell
          460                                 endIdx = dev_cellEnd[cellID];
          461 
          462                                 // Iterate over cell particles
          463                                 for (i=startIdx; i<endIdx; ++i) {
          464 
          465                                     // Read particle position and radius
          466                                     xr = dev_x_sorted[i];
          467                                     v  = dev_vel_sorted[i];
          468                                     //x3 = MAKE_FLOAT3(xr.x, xr.y, xr.z);
          469                                     //v3 = MAKE_FLOAT3(v.x, v.y, v.z);
          470 
          471                                     // Find center distance
          472                                     dist = MAKE_FLOAT3(
          473                                             X.x - xr.x,
          474                                             X.y - xr.y,
          475                                             X.z - xr.z);
          476                                     dist += distmod;
          477                                     s = weightDist(dist, dx, dy, dz);
          478                                     vol_p = sphereVolume(xr.w);
          479 
          480                                     solid_volume += s*vol_p;
          481 
          482                                     // Summation procedure for average velocity
          483                                     vp_avg_num +=
          484                                         s*vol_p*MAKE_FLOAT3(v.x, v.y, v.z);
          485                                     vp_avg_denum += s*vol_p;
          486 
          487                                     // Find projected new void volume
          488                                     // Eulerian update of positions
          489                                     xr += v*devC_dt;
          490                                     dist = MAKE_FLOAT3(
          491                                             X.x - xr.x,
          492                                             X.y - xr.y,
          493                                             X.z - xr.z) + distmod;
          494                                     solid_volume_new +=
          495                                         weightDist(dist, dx, dy, dz)*vol_p;
          496                                 }
          497                             }
          498                         }
          499                     }
          500                 }
          501             }
          502 
          503             Float cell_volume = dx*dy*dz;
          504             if (z == nz - 1)
          505                 cell_volume *= 0.875;
          506 
          507             // Make sure that the porosity is in the interval [0.0;1.0]
          508             phi = fmin(0.9, fmax(0.1, 1.0 - solid_volume/cell_volume));
          509             Float phi_new = fmin(0.9, fmax(0.1,
          510                         1.0 - solid_volume_new/cell_volume));
          511 
          512             Float dphi;
          513             Float3 vp_avg;
          514             if (iteration == 0) {
          515                 dphi = 0.0;
          516                 vp_avg = MAKE_FLOAT3(0.0, 0.0, 0.0);
          517             } else {
          518                 dphi = 0.5*(phi_new - phi_0);
          519                 vp_avg = vp_avg_num/fmax(1.0e-16, vp_avg_denum);
          520             }
          521 
          522             // Save porosity and porosity change
          523             const unsigned int cellidx = d_idx(x,y,z);
          524             dev_darcy_phi[cellidx]     = phi*c_phi;
          525             dev_darcy_dphi[cellidx]    = dphi*c_phi;
          526             dev_darcy_vp_avg[cellidx] = vp_avg;
          527 
          528 #ifdef CHECK_FLUID_FINITE
          529             (void)checkFiniteFloat("phi", x, y, z, phi);
          530             (void)checkFiniteFloat("dphi", x, y, z, dphi);
          531 #endif
          532         } else {
          533 
          534             const unsigned int cellidx = d_idx(x,y,z);
          535 
          536             dev_darcy_phi[cellidx]  = 0.9;
          537             dev_darcy_dphi[cellidx] = 0.0;
          538         }
          539     }
          540 }
          541 
          542 
          543 // Copy the porosity, porosity change, div_v_p and vp_avg values to the grid
          544 // edges from the grid interior at the frictionless Y boundaries (grid.periodic
          545 // == 2).
          546 __global__ void copyDarcyPorositiesToEdges(
          547         Float*  __restrict__ dev_darcy_phi,               // in + out
          548         Float*  __restrict__ dev_darcy_dphi,              // in + out
          549         Float*  __restrict__ dev_darcy_div_v_p,           // in + out
          550         Float3* __restrict__ dev_darcy_vp_avg)            // in + out
          551 {
          552     // 3D thread index
          553     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
          554     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
          555     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
          556 
          557     // Grid dimensions
          558     const unsigned int nx = devC_grid.num[0];
          559     const unsigned int ny = devC_grid.num[1];
          560     const unsigned int nz = devC_grid.num[2];
          561 
          562     // check that we are not outside the fluid grid
          563     if (devC_grid.periodic == 2 &&
          564             x < nx && (y == 0 || y == ny - 1) && z < nz) {
          565 
          566             // Read porosities from this cell
          567             int y_read;
          568 
          569             // read values from inside cells
          570             if (y == 0)
          571                 y_read = 1;
          572             if (y == ny - 1)
          573                 y_read = ny - 2;
          574 
          575             const unsigned int readidx = d_idx(x, y_read, z);
          576             const unsigned int writeidx = d_idx(x, y, z);
          577 
          578             dev_darcy_phi[writeidx] = dev_darcy_phi[readidx];
          579             dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx];
          580             dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx];
          581             dev_darcy_vp_avg[writeidx] = dev_darcy_vp_avg[readidx];
          582     }
          583 }
          584 
          585 
          586 // Copy the porosity, porosity change, div_v_p and vp_avg values to the grid
          587 // bottom from the grid interior at the frictionless Y boundaries (grid.periodic
          588 // == 2).
          589 __global__ void copyDarcyPorositiesToBottom(
          590         Float*  __restrict__ dev_darcy_phi,               // in + out
          591         Float*  __restrict__ dev_darcy_dphi,              // in + out
          592         Float*  __restrict__ dev_darcy_div_v_p,           // in + out
          593         Float3* __restrict__ dev_darcy_vp_avg)            // in + out
          594 {
          595     // 3D thread index
          596     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
          597     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
          598     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
          599 
          600     // Grid dimensions
          601     const unsigned int nx = devC_grid.num[0];
          602     const unsigned int ny = devC_grid.num[1];
          603 
          604     // check that we are not outside the fluid grid
          605     if (devC_grid.periodic == 2 &&
          606             x < nx && y < ny && z == 0) {
          607 
          608             const unsigned int readidx = d_idx(x, y, 1);
          609             const unsigned int writeidx = d_idx(x, y, z);
          610 
          611             dev_darcy_phi[writeidx] = dev_darcy_phi[readidx];
          612             dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx];
          613             dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx];
          614             dev_darcy_vp_avg[writeidx] = dev_darcy_vp_avg[readidx];
          615     }
          616 }
          617 
          618 
          619 // Find the porosity in each cell on the base of a sphere, centered at the cell
          620 // center.
          621 __global__ void findDarcyPorosities(
          622         const unsigned int* __restrict__ dev_cellStart,   // in
          623         const unsigned int* __restrict__ dev_cellEnd,     // in
          624         const Float4* __restrict__ dev_x_sorted,          // in
          625         const Float4* __restrict__ dev_vel_sorted,        // in
          626         const unsigned int iteration,                     // in
          627         const unsigned int ndem,                          // in
          628         const unsigned int np,                            // in
          629         const Float c_phi,                                // in
          630         Float*  __restrict__ dev_darcy_phi,               // in + out
          631         Float*  __restrict__ dev_darcy_dphi)              // in + out
          632 {
          633     // 3D thread index
          634     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
          635     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
          636     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
          637 
          638     // Grid dimensions
          639     const unsigned int nx = devC_grid.num[0];
          640     const unsigned int ny = devC_grid.num[1];
          641     const unsigned int nz = devC_grid.num[2];
          642 
          643     // Cell dimensions
          644     const Float dx = devC_grid.L[0]/nx;
          645     const Float dy = devC_grid.L[1]/ny;
          646     const Float dz = devC_grid.L[2]/nz;
          647 
          648     // Cell sphere radius
          649     const Float R = fmin(dx, fmin(dy,dz)) * 0.5; // diameter = cell width
          650     const Float cell_volume = sphereVolume(R);
          651 
          652     Float void_volume = cell_volume;     // current void volume
          653     Float void_volume_new = cell_volume; // projected new void volume
          654     Float4 xr;  // particle pos. and radius
          655 
          656     // check that we are not outside the fluid grid
          657     if (x < nx && y < ny && z < nz) {
          658 
          659         if (np > 0) {
          660 
          661             // Cell sphere center position
          662             const Float3 X = MAKE_FLOAT3(
          663                     x*dx + 0.5*dx,
          664                     y*dy + 0.5*dy,
          665                     z*dz + 0.5*dz);
          666 
          667             Float d, r;
          668             Float phi = 1.00;
          669             Float4 v;
          670             //unsigned int n = 0;
          671 
          672             // Read old porosity
          673             Float phi_0 = dev_darcy_phi[d_idx(x,y,z)];
          674 
          675             // The cell 3d index
          676             const int3 gridPos = make_int3((int)x,(int)y,(int)z);
          677 
          678             // The neighbor cell 3d index
          679             int3 targetCell;
          680 
          681             // The distance modifier for particles across periodic boundaries
          682             Float3 dist, distmod;
          683 
          684             unsigned int cellID, startIdx, endIdx, i;
          685 
          686             // Iterate over 27 neighbor cells, R = cell width
          687             for (int z_dim=-1; z_dim<2; ++z_dim) { // z-axis
          688                 for (int y_dim=-1; y_dim<2; ++y_dim) { // y-axis
          689                     for (int x_dim=-1; x_dim<2; ++x_dim) { // x-axis
          690 
          691                         // Index of neighbor cell this iteration is looking at
          692                         targetCell = gridPos + make_int3(x_dim, y_dim, z_dim);
          693 
          694                         // Get distance modifier for interparticle
          695                         // vector, if it crosses a periodic boundary
          696                         distmod = MAKE_FLOAT3(0.0, 0.0, 0.0);
          697                         if (findDistMod(&targetCell, &distmod) != -1) {
          698 
          699                             // Calculate linear cell ID
          700                             cellID = targetCell.x
          701                                 + targetCell.y * devC_grid.num[0]
          702                                 + (devC_grid.num[0] * devC_grid.num[1])
          703                                 * targetCell.z;
          704 
          705                             // Lowest particle index in cell
          706                             startIdx = dev_cellStart[cellID];
          707 
          708                             // Make sure cell is not empty
          709                             if (startIdx != 0xffffffff) {
          710 
          711                                 // Highest particle index in cell
          712                                 endIdx = dev_cellEnd[cellID];
          713 
          714                                 // Iterate over cell particles
          715                                 for (i=startIdx; i<endIdx; ++i) {
          716 
          717                                     // Read particle position and radius
          718                                     xr = dev_x_sorted[i];
          719                                     v  = dev_vel_sorted[i];
          720                                     r = xr.w;
          721 
          722                                     // Find center distance
          723                                     dist = MAKE_FLOAT3(
          724                                             X.x - xr.x,
          725                                             X.y - xr.y,
          726                                             X.z - xr.z);
          727                                     dist += distmod;
          728                                     d = length(dist);
          729 
          730                                     // Lens shaped intersection
          731                                     if ((R - r) < d && d < (R + r))
          732                                         void_volume -=
          733                                             1.0/(12.0*d) * (
          734                                                     M_PI*(R + r - d)*(R + r - d)
          735                                                     *(d*d + 2.0*d*r - 3.0*r*r
          736                                                         + 2.0*d*R + 6.0*r*R
          737                                                         - 3.0*R*R) );
          738 
          739                                     // Particle fully contained in cell sphere
          740                                     if (d <= R - r)
          741                                         void_volume -= sphereVolume(r);
          742 
          743                                     //// Find projected new void volume
          744                                     // Eulerian update of positions
          745                                     xr += v*devC_dt;
          746 
          747                                     // Find center distance
          748                                     dist = MAKE_FLOAT3(
          749                                             X.x - xr.x,
          750                                             X.y - xr.y,
          751                                             X.z - xr.z);
          752                                     dist += distmod;
          753                                     d = length(dist);
          754 
          755                                     // Lens shaped intersection
          756                                     if ((R - r) < d && d < (R + r))
          757                                         void_volume_new -=
          758                                             1.0/(12.0*d) * (
          759                                                     M_PI*(R + r - d)*(R + r - d)
          760                                                     *(d*d + 2.0*d*r - 3.0*r*r
          761                                                         + 2.0*d*R + 6.0*r*R
          762                                                         - 3.0*R*R) );
          763 
          764                                     // Particle fully contained in cell sphere
          765                                     if (d <= R - r)
          766                                         void_volume_new -= sphereVolume(r);
          767                                 }
          768                             }
          769                         }
          770                     }
          771                 }
          772             }
          773 
          774             // Make sure that the porosity is in the interval [0.0;1.0]
          775             phi = fmin(0.9, fmax(0.1, void_volume/cell_volume));
          776             Float phi_new = fmin(0.9, fmax(0.1, void_volume_new/cell_volume));
          777 
          778             // Central difference after first iteration
          779             Float dphi;
          780             if (iteration == 0)
          781                 dphi = phi_new - phi;
          782             else
          783                 dphi = 0.5*(phi_new - phi_0);
          784 
          785             // Save porosity and porosity change
          786             //phi = 0.5; dphi = 0.0; // disable porosity effects
          787             const unsigned int cellidx = d_idx(x,y,z);
          788             dev_darcy_phi[cellidx]  = phi*c_phi;
          789             dev_darcy_dphi[cellidx] = dphi*c_phi;
          790 
          791             /*printf("\n%d,%d,%d: findDarcyPorosities\n"
          792                     "\tphi     = %f\n"
          793                     "\tdphi    = %e\n"
          794                     "\tdphi_eps= %e\n"
          795                     "\tX       = %e, %e, %e\n"
          796                     "\txr      = %e, %e, %e\n"
          797                     "\tq       = %e\n"
          798                     "\tdiv_v_p = %e\n"
          799                     , x,y,z,
          800                     phi, dphi,
          801                     dot_epsilon_kk*(1.0 - phi)*devC_dt,
          802                     X.x, X.y, X.z,
          803                     xr.x, xr.y, xr.z,
          804                     q,
          805                     dot_epsilon_kk);*/
          806 
          807 #ifdef CHECK_FLUID_FINITE
          808             (void)checkFiniteFloat("phi", x, y, z, phi);
          809             (void)checkFiniteFloat("dphi", x, y, z, dphi);
          810 #endif
          811         } else {
          812             const unsigned int cellidx = d_idx(x,y,z);
          813             dev_darcy_phi[cellidx]  = 0.999;
          814             dev_darcy_dphi[cellidx] = 0.0;
          815         }
          816     }
          817 }
          818 
          819 // Find the particle velocity divergence at the cell center from the average
          820 // particle velocities on the cell faces
          821 __global__ void findDarcyParticleVelocityDivergence(
          822         const Float* __restrict__ dev_darcy_v_p_x,  // in
          823         const Float* __restrict__ dev_darcy_v_p_y,  // in
          824         const Float* __restrict__ dev_darcy_v_p_z,  // in
          825         Float* __restrict__ dev_darcy_div_v_p)      // out
          826 {
          827     // 3D thread index
          828     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
          829     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
          830     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
          831 
          832     // Grid dimensions
          833     const unsigned int nx = devC_grid.num[0];
          834     const unsigned int ny = devC_grid.num[1];
          835     const unsigned int nz = devC_grid.num[2];
          836 
          837     if (x < nx && y < ny && z < nz) {
          838 
          839         // read values
          840         Float v_p_xn = dev_darcy_v_p_x[d_vidx(x,y,z)];
          841         Float v_p_xp = dev_darcy_v_p_x[d_vidx(x+1,y,z)];
          842         Float v_p_yn = dev_darcy_v_p_y[d_vidx(x,y,z)];
          843         Float v_p_yp = dev_darcy_v_p_y[d_vidx(x,y+1,z)];
          844         Float v_p_zn = dev_darcy_v_p_z[d_vidx(x,y,z)];
          845         Float v_p_zp = dev_darcy_v_p_z[d_vidx(x,y,z+1)];
          846 
          847         // cell dimensions
          848         const Float dx = devC_grid.L[0]/nx;
          849         const Float dy = devC_grid.L[1]/ny;
          850         const Float dz = devC_grid.L[2]/nz;
          851 
          852         // calculate the divergence using first order central finite differences
          853         const Float div_v_p =
          854             (v_p_xp - v_p_xn)/dx +
          855             (v_p_yp - v_p_yn)/dy +
          856             (v_p_zp - v_p_zn)/dz;
          857 
          858         dev_darcy_div_v_p[d_idx(x,y,z)] = div_v_p;
          859 
          860 #ifdef CHECK_FLUID_FINITE
          861         checkFiniteFloat("div_v_p", x, y, z, div_v_p);
          862 #endif
          863     }
          864 }
          865 
          866 // Find the cell-centered pressure gradient using central differences
          867 __global__ void findDarcyPressureGradient(
          868         const Float* __restrict__ dev_darcy_p,  // in
          869         Float3* __restrict__ dev_darcy_grad_p)  // out
          870 {
          871     // 3D thread index
          872     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
          873     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
          874     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
          875 
          876     // Grid dimensions
          877     const unsigned int nx = devC_grid.num[0];
          878     const unsigned int ny = devC_grid.num[1];
          879     const unsigned int nz = devC_grid.num[2];
          880 
          881     if (x < nx && y < ny && z < nz) {
          882 
          883         // read values
          884         Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
          885         Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
          886         Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
          887         Float p_yp = dev_darcy_p[d_idx(x,y+1,z)];
          888         Float p_zn = dev_darcy_p[d_idx(x,y,z-1)];
          889         Float p_zp = dev_darcy_p[d_idx(x,y,z+1)];
          890 
          891         // cell dimensions
          892         const Float dx = devC_grid.L[0]/nx;
          893         const Float dy = devC_grid.L[1]/ny;
          894         const Float dz = devC_grid.L[2]/nz;
          895 
          896         // calculate the divergence using first order central finite differences
          897         const Float3 grad_p = MAKE_FLOAT3(
          898                 (p_xp - p_xn)/(dx + dx),
          899                 (p_yp - p_yn)/(dy + dy),
          900                 (p_zp - p_zn)/(dz + dz));
          901 
          902         dev_darcy_grad_p[d_idx(x,y,z)] = grad_p;
          903 
          904         //if (grad_p.x != 0.0 || grad_p.y != 0.0 || grad_p.z != 0.0)
          905         /*printf("%d,%d,%d findDarcyPressureGradient:\n"
          906                 "\tp_x    = %.2e, %.2e\n"
          907                 "\tp_y    = %.2e, %.2e\n"
          908                 "\tp_z    = %.2e, %.2e\n"
          909                 "\tgrad_p = %.2e, %.2e, %.2e\n",
          910                 x, y, z,
          911                 p_xn, p_xp,
          912                 p_yn, p_yp,
          913                 p_zn, p_zp,
          914                 grad_p.x, grad_p.y, grad_p.z); // */
          915 #ifdef CHECK_FLUID_FINITE
          916         checkFiniteFloat3("grad_p", x, y, z, grad_p);
          917 #endif
          918     }
          919 }
          920 
          921 // Find particle-fluid interaction force as outlined by Goren et al. 2011, and
          922 // originally by Gidaspow 1992. All terms other than the pressure force are
          923 // neglected. The buoyancy force is included.
          924 __global__ void findDarcyPressureForceLinear(
          925     const Float4* __restrict__ dev_x,            // in
          926     const Float3* __restrict__ dev_darcy_grad_p, // in
          927     const Float*  __restrict__ dev_darcy_phi,    // in
          928     const unsigned int wall0_iz,                 // in
          929     const Float rho_f,                           // in
          930     const int bc_top,                            // in
          931     Float4* __restrict__ dev_force,              // out
          932     Float4* __restrict__ dev_darcy_f_p)          // out
          933 {
          934     unsigned int i = threadIdx.x + blockIdx.x*blockDim.x; // Particle index
          935 
          936     if (i < devC_np) {
          937 
          938         // read particle information
          939         const Float4 x = dev_x[i];
          940         const Float3 x3 = MAKE_FLOAT3(x.x, x.y, x.z);
          941 
          942         // determine fluid cell containing the particle
          943         const unsigned int i_x =
          944             floor((x.x - devC_grid.origo[0])/(devC_grid.L[0]/devC_grid.num[0]));
          945         const unsigned int i_y =
          946             floor((x.y - devC_grid.origo[1])/(devC_grid.L[1]/devC_grid.num[1]));
          947         const unsigned int i_z =
          948             floor((x.z - devC_grid.origo[2])/(devC_grid.L[2]/devC_grid.num[2]));
          949         const unsigned int cellidx = d_idx(i_x, i_y, i_z);
          950 
          951         // determine cell dimensions
          952         const unsigned int nx = devC_grid.num[0];
          953         const unsigned int ny = devC_grid.num[1];
          954         const unsigned int nz = devC_grid.num[2];
          955         const Float dx = devC_grid.L[0]/nx;
          956         const Float dy = devC_grid.L[1]/ny;
          957         const Float dz = devC_grid.L[2]/nz;
          958 
          959         // read fluid information
          960         //const Float phi = dev_darcy_phi[cellidx];
          961 
          962         // Cell center position
          963         const Float3 X = MAKE_FLOAT3(
          964                 i_x*dx + 0.5*dx,
          965                 i_y*dy + 0.5*dy,
          966                 i_z*dz + 0.5*dz);
          967 
          968         Float3 grad_p = MAKE_FLOAT3(0., 0., 0.);
          969         Float3 grad_p_iter, n;
          970         int ix_n, iy_n, iz_n; // neighbor indexes
          971 
          972         // Loop over 27 closest cells to find all pressure gradient
          973         // contributions
          974         for (int d_iz = -1; d_iz<2; d_iz++) {
          975             for (int d_iy = -1; d_iy<2; d_iy++) {
          976                 for (int d_ix = -1; d_ix<2; d_ix++) {
          977 
          978                     ix_n = i_x + d_ix;
          979                     iy_n = i_y + d_iy;
          980                     iz_n = i_z + d_iz;
          981 
          982                     grad_p_iter = dev_darcy_grad_p[d_idx(ix_n, iy_n, iz_n)];
          983 
          984                     // make sure edge and corner ghost nodes aren't read
          985                     if (    // edges passing through (0,0,0)
          986                             (ix_n == -1 && iy_n == -1) ||
          987                             (ix_n == -1 && iz_n == -1) ||
          988                             (iy_n == -1 && iz_n == -1) ||
          989 
          990                             // edges passing through (nx,ny,nz)
          991                             (ix_n == nx && iy_n == ny) ||
          992                             (ix_n == nx && iz_n == nz) ||
          993                             (iy_n == ny && iz_n == nz) ||
          994 
          995                             (ix_n == nx && iy_n == -1) ||
          996                             (ix_n == nx && iz_n == -1) ||
          997 
          998                             (iy_n == ny && ix_n == -1) ||
          999                             (iy_n == ny && iz_n == -1) ||
         1000 
         1001                             (iz_n == nz && ix_n == -1) ||
         1002                             (iz_n == nz && iy_n == -1))
         1003                         grad_p_iter = MAKE_FLOAT3(0., 0., 0.);
         1004 
         1005                     // Add Neumann BC at top wall
         1006                     if (bc_top == 0 && i_z + d_iz >= wall0_iz - 1)
         1007                         grad_p_iter.z = 0.0;
         1008 
         1009                     n = MAKE_FLOAT3(dx*d_ix, dy*d_iy, dz*d_iz);
         1010 
         1011                     grad_p += weight(x3, X + n, dx, dy, dz)*grad_p_iter;
         1012 
         1013                     //*
         1014                     Float s = weight(x3, X + n, dx, dy, dz);
         1015                     /*if (s > 1.0e-12)
         1016                     printf("[%d+%d, %d+%d, %d+%d] findPF nb\n"
         1017                             "\tn      = %f, %f, %f\n"
         1018                             "\tgrad_pi= %.2e, %.2e, %.2e\n"
         1019                             "\ts      = %f\n"
         1020                             "\tgrad_p = %.2e, %.2e, %.2e\n",
         1021                             i_x, d_ix,
         1022                             i_y, d_iy,
         1023                             i_z, d_iz,
         1024                             n.x, n.y, n.z,
         1025                             grad_p_iter.x, grad_p_iter.y, grad_p_iter.z,
         1026                             s,
         1027                             s*grad_p_iter.x,
         1028                             s*grad_p_iter.y,
         1029                             s*grad_p_iter.z); // */
         1030                 }
         1031             }
         1032         }
         1033 
         1034         // find particle volume (radius in x.w)
         1035         const Float v = sphereVolume(x.w);
         1036 
         1037         // find pressure gradient force plus buoyancy force.
         1038         // buoyancy force = weight of displaced fluid
         1039         Float3 f_p = -1.0*grad_p*v//(1.0 - phi)
         1040             - rho_f*v*MAKE_FLOAT3(
         1041                     devC_params.g[0],
         1042                     devC_params.g[1],
         1043                     devC_params.g[2]);
         1044 
         1045         // Add Neumann BC at top wall
         1046         //if (i_z >= wall0_iz - 1)
         1047         if (bc_top == 0 && i_z >= wall0_iz)
         1048             f_p.z = 0.0;
         1049 
         1050         //if (length(f_p) > 1.0e-12)
         1051         /*printf("%d,%d,%d findPressureForceLinear:\n"
         1052                 "\tphi    = %f\n"
         1053                 "\tx      = %f, %f, %f\n"
         1054                 "\tX      = %f, %f, %f\n"
         1055                 "\tgrad_p = %.2e, %.2e, %.2e\n"
         1056                 //"\tp_x    = %.2e, %.2e\n"
         1057                 //"\tp_y    = %.2e, %.2e\n"
         1058                 //"\tp_z    = %.2e, %.2e\n"
         1059                 "\tf_p    = %.2e, %.2e, %.2e\n",
         1060                 i_x, i_y, i_z,
         1061                 phi,
         1062                 x3.x, x3.y, x3.z,
         1063                 X.x, X.y, X.z,
         1064                 grad_p.x, grad_p.y, grad_p.z,
         1065                 f_p.x, f_p.y, f_p.z); // */
         1066 
         1067 #ifdef CHECK_FLUID_FINITE
         1068         checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p);
         1069 #endif
         1070         // save force
         1071         dev_force[i]    += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
         1072         dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
         1073     }
         1074 }
         1075 
         1076 // Find particle-fluid interaction force as outlined by Zhou et al. 2010, and
         1077 // originally by Gidaspow 1992. All terms other than the pressure force are
         1078 // neglected. The buoyancy force is included.
         1079 __global__ void findDarcyPressureForce(
         1080     const Float4* __restrict__ dev_x,           // in
         1081     const Float*  __restrict__ dev_darcy_p,     // in
         1082     //const Float*  __restrict__ dev_darcy_phi,   // in
         1083     const unsigned int wall0_iz,                // in
         1084     const Float rho_f,                          // in
         1085     Float4* __restrict__ dev_force,             // out
         1086     Float4* __restrict__ dev_darcy_f_p)         // out
         1087 {
         1088     unsigned int i = threadIdx.x + blockIdx.x*blockDim.x; // Particle index
         1089 
         1090     if (i < devC_np) {
         1091 
         1092         // read particle information
         1093         const Float4 x = dev_x[i];
         1094 
         1095         // determine fluid cell containing the particle
         1096         const unsigned int i_x =
         1097             floor((x.x - devC_grid.origo[0])/(devC_grid.L[0]/devC_grid.num[0]));
         1098         const unsigned int i_y =
         1099             floor((x.y - devC_grid.origo[1])/(devC_grid.L[1]/devC_grid.num[1]));
         1100         const unsigned int i_z =
         1101             floor((x.z - devC_grid.origo[2])/(devC_grid.L[2]/devC_grid.num[2]));
         1102         const unsigned int cellidx = d_idx(i_x, i_y, i_z);
         1103 
         1104         // determine cell dimensions
         1105         const Float dx = devC_grid.L[0]/devC_grid.num[0];
         1106         const Float dy = devC_grid.L[1]/devC_grid.num[1];
         1107         const Float dz = devC_grid.L[2]/devC_grid.num[2];
         1108 
         1109         // read fluid information
         1110         //const Float phi = dev_darcy_phi[cellidx];
         1111         const Float p_xn = dev_darcy_p[d_idx(i_x-1,i_y,i_z)];
         1112         const Float p    = dev_darcy_p[cellidx];
         1113         const Float p_xp = dev_darcy_p[d_idx(i_x+1,i_y,i_z)];
         1114         const Float p_yn = dev_darcy_p[d_idx(i_x,i_y-1,i_z)];
         1115         const Float p_yp = dev_darcy_p[d_idx(i_x,i_y+1,i_z)];
         1116         const Float p_zn = dev_darcy_p[d_idx(i_x,i_y,i_z-1)];
         1117         Float p_zp = dev_darcy_p[d_idx(i_x,i_y,i_z+1)];
         1118 
         1119         // Add Neumann BC at top wall
         1120         if (i_z >= wall0_iz - 1)
         1121             p_zp = p;
         1122 
         1123         // find particle volume (radius in x.w)
         1124         const Float V = sphereVolume(x.w);
         1125 
         1126         // determine pressure gradient from first order central difference
         1127         const Float3 grad_p = MAKE_FLOAT3(
         1128                 (p_xp - p_xn)/(dx + dx),
         1129                 (p_yp - p_yn)/(dy + dy),
         1130                 (p_zp - p_zn)/(dz + dz));
         1131 
         1132         // find pressure gradient force plus buoyancy force.
         1133         // buoyancy force = weight of displaced fluid
         1134         Float3 f_p = -1.0*grad_p*V
         1135             - rho_f*V*MAKE_FLOAT3(
         1136                     devC_params.g[0],
         1137                     devC_params.g[1],
         1138                     devC_params.g[2]);
         1139 
         1140         // Add Neumann BC at top wall
         1141         if (i_z >= wall0_iz)
         1142             f_p.z = 0.0;
         1143 
         1144         /*printf("%d,%d,%d findPF:\n"
         1145                 "\tphi    = %f\n"
         1146                 "\tp      = %f\n"
         1147                 "\tgrad_p = % f, % f, % f\n"
         1148                 "\tf_p    = % f, % f, % f\n",
         1149                 i_x, i_y, i_z,
         1150                 phi, p,
         1151                 grad_p.x, grad_p.y, grad_p.z,
         1152                 f_p.x, f_p.y, f_p.z);*/
         1153 
         1154 #ifdef CHECK_FLUID_FINITE
         1155         checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p);
         1156 #endif
         1157         // save force
         1158         dev_force[i]    += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
         1159         dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
         1160     }
         1161 }
         1162 
         1163 // Set the pressure at the top boundary to new_pressure
         1164 __global__ void setDarcyTopPressure(
         1165     const Float new_pressure,
         1166     Float* __restrict__ dev_darcy_p,
         1167     const unsigned int wall0_iz)
         1168 {
         1169     // 3D thread index
         1170     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
         1171     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
         1172     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
         1173 
         1174     // check that the thread is located at the top boundary or at the top wall
         1175     if (x < devC_grid.num[0] &&
         1176         y < devC_grid.num[1] &&
         1177         z == devC_grid.num[2]-1 || z == wall0_iz) {
         1178 
         1179         const unsigned int cellidx = idx(x,y,z);
         1180 
         1181         // Write the new pressure the top boundary cells
         1182         dev_darcy_p[cellidx] = new_pressure;
         1183     }
         1184 }
         1185 
         1186 // Set the pressure at the top wall to new_pressure
         1187 __global__ void setDarcyTopWallPressure(
         1188     const Float new_pressure,
         1189     const unsigned int wall0_iz,
         1190     Float* __restrict__ dev_darcy_p)
         1191 {
         1192     // 3D thread index
         1193     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
         1194     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
         1195     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
         1196 
         1197     // check that the thread is located at the top boundary
         1198     if (x < devC_grid.num[0] &&
         1199         y < devC_grid.num[1] &&
         1200         z == wall0_iz) {
         1201 
         1202         const unsigned int cellidx = idx(x,y,z);
         1203 
         1204         // Write the new pressure the top boundary cells
         1205         dev_darcy_p[cellidx] = new_pressure;
         1206     }
         1207 }
         1208 
         1209 // Enforce fixed-flow BC at top wall
         1210 __global__ void setDarcyTopWallFixedFlow(
         1211     const unsigned int wall0_iz,
         1212     Float* __restrict__ dev_darcy_p)
         1213 {
         1214     // 3D thread index
         1215     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
         1216     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
         1217     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
         1218 
         1219     // check that the thread is located at the top boundary
         1220     if (x < devC_grid.num[0] &&
         1221         y < devC_grid.num[1] &&
         1222         z == wall0_iz+1) {
         1223 
         1224         // Write the new pressure the top boundary cells
         1225         const Float new_pressure = dev_darcy_p[idx(x,y,z-2)];
         1226         dev_darcy_p[idx(x,y,z)] = new_pressure;
         1227     }
         1228 }
         1229 
         1230 
         1231 // Find the cell permeabilities from the Kozeny-Carman equation
         1232 __global__ void findDarcyPermeabilities(
         1233         const Float k_c,                            // in
         1234         const Float* __restrict__ dev_darcy_phi,    // in
         1235         Float* __restrict__ dev_darcy_k)            // out
         1236 {
         1237     // 3D thread index
         1238     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
         1239     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
         1240     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
         1241 
         1242     // Grid dimensions
         1243     const unsigned int nx = devC_grid.num[0];
         1244     const unsigned int ny = devC_grid.num[1];
         1245     const unsigned int nz = devC_grid.num[2];
         1246 
         1247     if (x < nx && y < ny && z < nz) {
         1248 
         1249         // 1D thread index
         1250         const unsigned int cellidx = d_idx(x,y,z);
         1251 
         1252         Float phi = dev_darcy_phi[cellidx];
         1253 
         1254         // avoid division by zero
         1255         if (phi > 0.9999)
         1256             phi = 0.9999;
         1257 
         1258         Float k = k_c*pow(phi,3)/pow(1.0 - phi, 2);
         1259 
         1260         /*printf("%d,%d,%d findK:\n"
         1261                 "\tphi    = %f\n"
         1262                 "\tk      = %e\n",
         1263                 x, y, z,
         1264                 phi, k);*/
         1265 
         1266         // limit permeability [m*m]
         1267         // K_gravel = 3.0e-2 m/s => k_gravel = 2.7e-9 m*m
         1268         //k = fmin(2.7e-9, k);
         1269         k = fmin(2.7e-10, k);
         1270 
         1271         dev_darcy_k[cellidx] = k;
         1272 
         1273 #ifdef CHECK_FLUID_FINITE
         1274         checkFiniteFloat("k", x, y, z, k);
         1275 #endif
         1276     }
         1277 }
         1278 
         1279 // Find the spatial gradients of the permeability.
         1280 __global__ void findDarcyPermeabilityGradients(
         1281         const Float*  __restrict__ dev_darcy_k,   // in
         1282         Float3* __restrict__ dev_darcy_grad_k)    // out
         1283 {
         1284     // 3D thread index
         1285     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
         1286     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
         1287     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
         1288 
         1289     // Grid dimensions
         1290     const unsigned int nx = devC_grid.num[0];
         1291     const unsigned int ny = devC_grid.num[1];
         1292     const unsigned int nz = devC_grid.num[2];
         1293 
         1294     // Cell size
         1295     const Float dx = devC_grid.L[0]/nx;
         1296     const Float dy = devC_grid.L[1]/ny;
         1297     const Float dz = devC_grid.L[2]/nz;
         1298 
         1299     if (x < nx && y < ny && z < nz) {
         1300 
         1301         // 1D thread index
         1302         const unsigned int cellidx = d_idx(x,y,z);
         1303 
         1304         // read values
         1305         const Float k_xn = dev_darcy_k[d_idx(x-1,y,z)];
         1306         const Float k_xp = dev_darcy_k[d_idx(x+1,y,z)];
         1307         const Float k_yn = dev_darcy_k[d_idx(x,y-1,z)];
         1308         const Float k_yp = dev_darcy_k[d_idx(x,y+1,z)];
         1309         const Float k_zn = dev_darcy_k[d_idx(x,y,z-1)];
         1310         const Float k_zp = dev_darcy_k[d_idx(x,y,z+1)];
         1311 
         1312         // gradient approximated by first-order central difference
         1313         const Float3 grad_k = MAKE_FLOAT3(
         1314                 (k_xp - k_xn)/(dx+dx),
         1315                 (k_yp - k_yn)/(dy+dy),
         1316                 (k_zp - k_zn)/(dz+dz));
         1317 
         1318         // write result
         1319         dev_darcy_grad_k[cellidx] = grad_k;
         1320 
         1321         /*printf("%d,%d,%d findK:\n"
         1322                 "\tk_x     = %e, %e\n"
         1323                 "\tk_y     = %e, %e\n"
         1324                 "\tk_z     = %e, %e\n"
         1325                 "\tgrad(k) = %e, %e, %e\n",
         1326                 x, y, z,
         1327                 k_xn, k_xp,
         1328                 k_yn, k_yp,
         1329                 k_zn, k_zp,
         1330                 grad_k.x, grad_k.y, grad_k.z);*/
         1331 
         1332 #ifdef CHECK_FLUID_FINITE
         1333         checkFiniteFloat3("grad_k", x, y, z, grad_k);
         1334 #endif
         1335     }
         1336 }
         1337 
         1338 // Find the temporal gradient in pressure using the backwards Euler method
         1339 __global__ void findDarcyPressureChange(
         1340         const Float* __restrict__ dev_darcy_p_old,    // in
         1341         const Float* __restrict__ dev_darcy_p,        // in
         1342         const unsigned int iter,                      // in
         1343         const unsigned int ndem,                      // in
         1344         Float* __restrict__ dev_darcy_dpdt)           // out
         1345 {
         1346     // 3D thread index
         1347     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
         1348     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
         1349     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
         1350 
         1351     if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
         1352 
         1353         // 1D thread index
         1354         const unsigned int cellidx = d_idx(x,y,z);
         1355 
         1356         // read values
         1357         const Float p_old = dev_darcy_p_old[cellidx];
         1358         const Float p     = dev_darcy_p[cellidx];
         1359 
         1360         Float dpdt = (p - p_old)/(ndem*devC_dt);
         1361 
         1362         // Ignore the large initial pressure gradients caused by solver "warm
         1363         // up" towards hydrostatic pressure distribution
         1364         if (iter < 2)
         1365             dpdt = 0.0;
         1366 
         1367         // write result
         1368         dev_darcy_dpdt[cellidx] = dpdt;
         1369 
         1370         /*printf("%d,%d,%d\n"
         1371                 "\tp_old = %e\n"
         1372                 "\tp     = %e\n"
         1373                 "\tdt    = %e\n"
         1374                 "\tdpdt  = %e\n",
         1375                 x,y,z,
         1376                 p_old, p,
         1377                 devC_dt, dpdt);*/
         1378 
         1379 #ifdef CHECK_FLUID_FINITE
         1380         checkFiniteFloat("dpdt", x, y, z, dpdt);
         1381 #endif
         1382     }
         1383 }
         1384 
         1385 __global__ void firstDarcySolution(
         1386         const Float*  __restrict__ dev_darcy_p,       // in
         1387         const Float*  __restrict__ dev_darcy_k,       // in
         1388         const Float*  __restrict__ dev_darcy_phi,     // in
         1389         const Float*  __restrict__ dev_darcy_dphi,    // in
         1390         const Float*  __restrict__ dev_darcy_div_v_p, // in
         1391         const Float3* __restrict__ dev_darcy_vp_avg,  // in
         1392         const Float3* __restrict__ dev_darcy_grad_k,  // in
         1393         const Float beta_f,                           // in
         1394         const Float mu,                               // in
         1395         const int bc_xn,                              // in
         1396         const int bc_xp,                              // in
         1397         const int bc_yn,                              // in
         1398         const int bc_yp,                              // in
         1399         const int bc_bot,                             // in
         1400         const int bc_top,                             // in
         1401         const unsigned int ndem,                      // in
         1402         const unsigned int wall0_iz,                  // in
         1403         const int* __restrict__ dev_darcy_p_constant, // in
         1404         Float* __restrict__ dev_darcy_dp_expl)        // out
         1405 {
         1406     // 3D thread index
         1407     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
         1408     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
         1409     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
         1410 
         1411     // Grid dimensions
         1412     const unsigned int nx = devC_grid.num[0];
         1413     const unsigned int ny = devC_grid.num[1];
         1414     const unsigned int nz = devC_grid.num[2];
         1415 
         1416     // Cell size
         1417     const Float dx = devC_grid.L[0]/nx;
         1418     const Float dy = devC_grid.L[1]/ny;
         1419     const Float dz = devC_grid.L[2]/nz;
         1420 
         1421     if (x < nx && y < ny && z < nz) {
         1422 
         1423         // 1D thread index
         1424         const unsigned int cellidx = d_idx(x,y,z);
         1425 
         1426         // read values
         1427         const Float  phi_xn = dev_darcy_phi[d_idx(x-1,y,z)];
         1428         const Float  phi    = dev_darcy_phi[cellidx];
         1429         const Float  phi_xp = dev_darcy_phi[d_idx(x+1,y,z)];
         1430         const Float  phi_yn = dev_darcy_phi[d_idx(x,y-1,z)];
         1431         const Float  phi_yp = dev_darcy_phi[d_idx(x,y+1,z)];
         1432         const Float  phi_zn = dev_darcy_phi[d_idx(x,y,z-1)];
         1433         const Float  phi_zp = dev_darcy_phi[d_idx(x,y,z+1)];
         1434         const Float  dphi   = dev_darcy_dphi[cellidx];
         1435         const Float3 vp_avg = dev_darcy_vp_avg[cellidx];
         1436         const int p_constant = dev_darcy_p_constant[cellidx];
         1437 
         1438         Float p_xn    = dev_darcy_p[d_idx(x-1,y,z)];
         1439         const Float p = dev_darcy_p[cellidx];
         1440         Float p_xp    = dev_darcy_p[d_idx(x+1,y,z)];
         1441         Float p_yn    = dev_darcy_p[d_idx(x,y-1,z)];
         1442         Float p_yp    = dev_darcy_p[d_idx(x,y+1,z)];
         1443         Float p_zn    = dev_darcy_p[d_idx(x,y,z-1)];
         1444         Float p_zp    = dev_darcy_p[d_idx(x,y,z+1)];
         1445 
         1446         const Float k_xn    = dev_darcy_k[d_idx(x-1,y,z)];
         1447         const Float k       = dev_darcy_k[cellidx];
         1448         const Float k_xp    = dev_darcy_k[d_idx(x+1,y,z)];
         1449         const Float k_yn    = dev_darcy_k[d_idx(x,y-1,z)];
         1450         const Float k_yp    = dev_darcy_k[d_idx(x,y+1,z)];
         1451         const Float k_zn    = dev_darcy_k[d_idx(x,y,z-1)];
         1452         const Float k_zp    = dev_darcy_k[d_idx(x,y,z+1)];
         1453 
         1454         // Neumann BCs
         1455         if (x == 0 && bc_xn == 1)
         1456             p_xn = p;
         1457         if (x == nx-1 && bc_xp == 1)
         1458             p_xp = p;
         1459         if (y == 0 && bc_yn == 1)
         1460             p_yn = p;
         1461         if (y == ny-1 && bc_yp == 1)
         1462             p_yp = p;
         1463         if (z == 0 && bc_bot == 1)
         1464             p_zn = p;
         1465         if (z == nz-1 && bc_top == 1)
         1466             p_zp = p;
         1467 
         1468         const Float3 grad_phi_central = MAKE_FLOAT3(
         1469                 (phi_xp - phi_xn)/(dx + dx),
         1470                 (phi_yp - phi_yn)/(dy + dy),
         1471                 (phi_zp - phi_zn)/(dz + dz));
         1472 
         1473         // Solve div(k*grad(p)) as a single term, using harmonic mean for
         1474         // permeability. k_HM*grad(p) is found between the pressure nodes.
         1475         const Float div_k_grad_p =
         1476                 (2.*k_xp*k/(k_xp + k) *
         1477                  (p_xp - p)/dx
         1478                  -
         1479                  2.*k_xn*k/(k_xn + k) *
         1480                  (p - p_xn)/dx)/dx
         1481             +
         1482                 (2.*k_yp*k/(k_yp + k) *
         1483                  (p_yp - p)/dy
         1484                  -
         1485                  2.*k_yn*k/(k_yn + k) *
         1486                  (p - p_yn)/dy)/dy
         1487             +
         1488                 (2.*k_zp*k/(k_zp + k) *
         1489                  (p_zp - p)/dz
         1490                  -
         1491                  2.*k_zn*k/(k_zn + k) *
         1492                  (p - p_zn)/dz)/dz;
         1493 
         1494         Float dp_expl =
         1495             ndem*devC_dt/(beta_f*phi*mu)*div_k_grad_p
         1496             -(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
         1497             *(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi_central));
         1498 
         1499         // Dirichlet BC at fixed-pressure boundaries and at the dynamic top
         1500         // wall.  wall0_iz will be larger than the grid if the wall isn't
         1501         // dynamic
         1502         if ((bc_bot == 0 && z == 0) || (bc_top == 0 && z == nz-1)
         1503                 || (z >= wall0_iz && bc_top == 0)
         1504                 || (bc_xn == 0 && x == 0) || (bc_xp == 0 && x == nx-1)
         1505                 || (bc_yn == 0 && y == 0) || (bc_yp == 0 && y == nx-1)
         1506                 || p_constant == 1)
         1507             dp_expl = 0.0;
         1508 
         1509 #ifdef REPORT_FORCING_TERMS
         1510             const Float dp_diff =
         1511                 ndem*devC_dt/(beta_f*phi*mu)
         1512                 *div_k_grad_p;
         1513             const Float dp_forc =
         1514                 -(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
         1515                 *(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi));
         1516 
         1517         printf("\n%d,%d,%d firstDarcySolution\n"
         1518                 "p           = %e\n"
         1519                 "p_x         = %e, %e\n"
         1520                 "p_y         = %e, %e\n"
         1521                 "p_z         = %e, %e\n"
         1522                 "dp_expl     = %e\n"
         1523                 "div_k_grad_p= %e\n"
         1524                 "dp_diff     = %e\n"
         1525                 "dp_forc     = %e\n"
         1526                 "phi         = %e\n"
         1527                 "dphi        = %e\n"
         1528                 "dphi/dt     = %e\n"
         1529                 "vp_avg      = %e, %e, %e\n"
         1530                 "grad_phi    = %e, %e, %e\n"
         1531                 ,
         1532                 x,y,z,
         1533                 p,
         1534                 p_xn, p_xp,
         1535                 p_yn, p_yp,
         1536                 p_zn, p_zp,
         1537                 dp_expl,
         1538                 div_k_grad_p,
         1539                 dp_diff,
         1540                 dp_forc,
         1541                 phi,
         1542                 dphi,
         1543                 dphi/(ndem*devC_dt),
         1544                 vp_avg.x, vp_avg.y, vp_avg.z,
         1545                 grad_phi_central.x, grad_phi_central.y, grad_phi_central.z
         1546                 );
         1547 #endif
         1548 
         1549         // save explicit integrated pressure change
         1550         dev_darcy_dp_expl[cellidx] = dp_expl;
         1551 
         1552 #ifdef CHECK_FLUID_FINITE
         1553         checkFiniteFloat("dp_expl", x, y, z, dp_expl);
         1554 #endif
         1555     }
         1556 }
         1557 // A single jacobi iteration where the pressure values are updated to
         1558 // dev_darcy_p_new.
         1559 // bc = 0: Dirichlet, 1: Neumann
         1560 __global__ void updateDarcySolution(
         1561         const Float*  __restrict__ dev_darcy_p_old,   // in
         1562         const Float*  __restrict__ dev_darcy_dp_expl, // in
         1563         const Float*  __restrict__ dev_darcy_p,       // in
         1564         const Float*  __restrict__ dev_darcy_k,       // in
         1565         const Float*  __restrict__ dev_darcy_phi,     // in
         1566         const Float*  __restrict__ dev_darcy_dphi,    // in
         1567         const Float*  __restrict__ dev_darcy_div_v_p, // in
         1568         const Float3* __restrict__ dev_darcy_vp_avg,  // in
         1569         const Float3* __restrict__ dev_darcy_grad_k,  // in
         1570         const Float beta_f,                           // in
         1571         const Float mu,                               // in
         1572         const int bc_xn,                              // in
         1573         const int bc_xp,                              // in
         1574         const int bc_yn,                              // in
         1575         const int bc_yp,                              // in
         1576         const int bc_bot,                             // in
         1577         const int bc_top,                             // in
         1578         const unsigned int ndem,                      // in
         1579         const unsigned int wall0_iz,                  // in
         1580         const int* __restrict__ dev_darcy_p_constant, // in
         1581         Float* __restrict__ dev_darcy_p_new,          // out
         1582         Float* __restrict__ dev_darcy_norm)           // out
         1583 {
         1584     // 3D thread index
         1585     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
         1586     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
         1587     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
         1588 
         1589     // Grid dimensions
         1590     const unsigned int nx = devC_grid.num[0];
         1591     const unsigned int ny = devC_grid.num[1];
         1592     const unsigned int nz = devC_grid.num[2];
         1593 
         1594     // Cell size
         1595     const Float dx = devC_grid.L[0]/nx;
         1596     const Float dy = devC_grid.L[1]/ny;
         1597     const Float dz = devC_grid.L[2]/nz;
         1598 
         1599     if (x < nx && y < ny && z < nz) {
         1600 
         1601         // 1D thread index
         1602         const unsigned int cellidx = d_idx(x,y,z);
         1603 
         1604         // read values
         1605         const Float  phi_xn = dev_darcy_phi[d_idx(x-1,y,z)];
         1606         const Float  phi    = dev_darcy_phi[cellidx];
         1607         const Float  phi_xp = dev_darcy_phi[d_idx(x+1,y,z)];
         1608         const Float  phi_yn = dev_darcy_phi[d_idx(x,y-1,z)];
         1609         const Float  phi_yp = dev_darcy_phi[d_idx(x,y+1,z)];
         1610         const Float  phi_zn = dev_darcy_phi[d_idx(x,y,z-1)];
         1611         const Float  phi_zp = dev_darcy_phi[d_idx(x,y,z+1)];
         1612         const Float  dphi   = dev_darcy_dphi[cellidx];
         1613         const Float3 vp_avg = dev_darcy_vp_avg[cellidx];
         1614         const int p_constant = dev_darcy_p_constant[cellidx];
         1615 
         1616         const Float p_old   = dev_darcy_p_old[cellidx];
         1617         const Float dp_expl = dev_darcy_dp_expl[cellidx];
         1618 
         1619         Float p_xn    = dev_darcy_p[d_idx(x-1,y,z)];
         1620         const Float p = dev_darcy_p[cellidx];
         1621         Float p_xp    = dev_darcy_p[d_idx(x+1,y,z)];
         1622         Float p_yn    = dev_darcy_p[d_idx(x,y-1,z)];
         1623         Float p_yp    = dev_darcy_p[d_idx(x,y+1,z)];
         1624         Float p_zn    = dev_darcy_p[d_idx(x,y,z-1)];
         1625         Float p_zp    = dev_darcy_p[d_idx(x,y,z+1)];
         1626 
         1627         const Float k_xn    = dev_darcy_k[d_idx(x-1,y,z)];
         1628         const Float k       = dev_darcy_k[cellidx];
         1629         const Float k_xp    = dev_darcy_k[d_idx(x+1,y,z)];
         1630         const Float k_yn    = dev_darcy_k[d_idx(x,y-1,z)];
         1631         const Float k_yp    = dev_darcy_k[d_idx(x,y+1,z)];
         1632         const Float k_zn    = dev_darcy_k[d_idx(x,y,z-1)];
         1633         const Float k_zp    = dev_darcy_k[d_idx(x,y,z+1)];
         1634 
         1635         // Neumann BCs
         1636         if (x == 0 && bc_xn == 1)
         1637             p_xn = p;
         1638         if (x == nx-1 && bc_xp == 1)
         1639             p_xp = p;
         1640         if (y == 0 && bc_yn == 1)
         1641             p_yn = p;
         1642         if (y == ny-1 && bc_yp == 1)
         1643             p_yp = p;
         1644         if (z == 0 && bc_bot == 1)
         1645             p_zn = p;
         1646         if (z == nz-1 && bc_top == 1)
         1647             p_zp = p;
         1648 
         1649         const Float3 grad_phi_central = MAKE_FLOAT3(
         1650                 (phi_xp - phi_xn)/(dx + dx),
         1651                 (phi_yp - phi_yn)/(dy + dy),
         1652                 (phi_zp - phi_zn)/(dz + dz));
         1653 
         1654         // Solve div(k*grad(p)) as a single term, using harmonic mean for
         1655         // permeability. k_HM*grad(p) is found between the pressure nodes.
         1656         const Float div_k_grad_p =
         1657                 (2.*k_xp*k/(k_xp + k) *
         1658                  (p_xp - p)/dx
         1659                  -
         1660                  2.*k_xn*k/(k_xn + k) *
         1661                  (p - p_xn)/dx)/dx
         1662             +
         1663                 (2.*k_yp*k/(k_yp + k) *
         1664                  (p_yp - p)/dy
         1665                  -
         1666                  2.*k_yn*k/(k_yn + k) *
         1667                  (p - p_yn)/dy)/dy
         1668             +
         1669                 (2.*k_zp*k/(k_zp + k) *
         1670                  (p_zp - p)/dz
         1671                  -
         1672                  2.*k_zn*k/(k_zn + k) *
         1673                  (p - p_zn)/dz)/dz;
         1674 
         1675         //Float p_new = p_old
         1676         Float dp_impl =
         1677             ndem*devC_dt/(beta_f*phi*mu)*div_k_grad_p
         1678             -(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
         1679             *(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi_central));
         1680 
         1681         // Dirichlet BC at fixed-pressure boundaries and at the dynamic top
         1682         // wall.  wall0_iz will be larger than the grid if the wall isn't
         1683         // dynamic
         1684         if ((bc_bot == 0 && z == 0) || (bc_top == 0 && z == nz-1)
         1685                 || (z >= wall0_iz && bc_top == 0)
         1686                 || (bc_xn == 0 && x == 0) || (bc_xp == 0 && x == nx-1)
         1687                 || (bc_yn == 0 && y == 0) || (bc_yp == 0 && y == nx-1)
         1688                 || p_constant == 1)
         1689             dp_impl = 0.0;
         1690 
         1691         // choose integration method, parameter in [0.0; 1.0]
         1692         //    epsilon = 0.0: explicit
         1693         //    epsilon = 0.5: Crank-Nicolson
         1694         //    epsilon = 1.0: implicit
         1695         const Float epsilon = 0.5;
         1696         Float p_new = p_old + (1.0 - epsilon)*dp_expl + epsilon*dp_impl;
         1697 
         1698         // add underrelaxation
         1699         const Float theta = 0.05;
         1700         p_new = p*(1.0 - theta) + p_new*theta;
         1701 
         1702         // normalized residual, avoid division by zero
         1703         //const Float res_norm = (p_new - p)*(p_new - p)/(p_new*p_new + 1.0e-16);
         1704         const Float res_norm = (p_new - p)/(p + 1.0e-16);
         1705 
         1706 #ifdef REPORT_FORCING_TERMS_JACOBIAN
         1707         const Float dp_diff = (ndem*devC_dt)/(beta_f*phi*mu)
         1708             *(k*laplace_p + dot(grad_k, grad_p));
         1709         const Float dp_forc =
         1710             -(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
         1711             *(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi));
         1712         printf("\n%d,%d,%d updateDarcySolution\n"
         1713                 "p_new       = %e\n"
         1714                 "p           = %e\n"
         1715                 "p_x         = %e, %e\n"
         1716                 "p_y         = %e, %e\n"
         1717                 "p_z         = %e, %e\n"
         1718                 "dp_expl     = %e\n"
         1719                 "p_old       = %e\n"
         1720                 "div_k_grad_p= %e\n"
         1721                 "dp_diff     = %e\n"
         1722                 "dp_forc     = %e\n"
         1723                 "div_v_p     = %e\n"
         1724                 "res_norm    = %e\n"
         1725                 ,
         1726                 x,y,z,
         1727                 p_new, p,
         1728                 p_xn, p_xp,
         1729                 p_yn, p_yp,
         1730                 p_zn, p_zp,
         1731                 dp_expl,
         1732                 p_old,
         1733                 div_k_grad_p,
         1734                 dp_diff,
         1735                 dp_forc,
         1736                 dphi, dphi/(ndem*devC_dt),
         1737                 res_norm); //
         1738 #endif
         1739 
         1740         // save new pressure and the residual
         1741         dev_darcy_p_new[cellidx] = p_new;
         1742         dev_darcy_norm[cellidx]  = res_norm;
         1743 
         1744         /*printf("%d,%d,%d\tp = % f\tp_new = % f\tres_norm = % f\n",
         1745                 x,y,z,
         1746                 p,
         1747                 p_new,
         1748                 res_norm);*/
         1749 
         1750 #ifdef CHECK_FLUID_FINITE
         1751         checkFiniteFloat("p_new", x, y, z, p_new);
         1752         checkFiniteFloat("res_norm", x, y, z, res_norm);
         1753 #endif
         1754     }
         1755 }
         1756 
         1757 __global__ void findNewPressure(
         1758         const Float* __restrict__ dev_darcy_dp,     // in
         1759         Float* __restrict__ dev_darcy_p)            // in+out
         1760 {
         1761     // 3D thread index
         1762     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
         1763     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
         1764     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
         1765 
         1766     // Grid dimensions
         1767     const unsigned int nx = devC_grid.num[0];
         1768     const unsigned int ny = devC_grid.num[1];
         1769     const unsigned int nz = devC_grid.num[2];
         1770 
         1771     // Check that we are not outside the fluid grid
         1772     if (x < nx && y < ny && z < nz) {
         1773 
         1774         const unsigned int cellidx = d_idx(x,y,z);
         1775 
         1776         const Float dp = dev_darcy_dp[cellidx];
         1777 
         1778         // save new pressure
         1779         dev_darcy_p[cellidx] += dp;
         1780 
         1781         /*printf("%d,%d,%d\tp = % f\tp_new = % f\tres_norm = % f\n",
         1782                 x,y,z,
         1783                 p,
         1784                 p_new,
         1785                 res_norm);*/
         1786 
         1787 #ifdef CHECK_FLUID_FINITE
         1788         checkFiniteFloat("dp", x, y, z, dp);
         1789 #endif
         1790     }
         1791 }
         1792 
         1793 // Find cell velocities
         1794 __global__ void findDarcyVelocities(
         1795         const Float* __restrict__ dev_darcy_p,      // in
         1796         const Float* __restrict__ dev_darcy_phi,    // in
         1797         const Float* __restrict__ dev_darcy_k,      // in
         1798         const Float mu,                             // in
         1799         Float3* __restrict__ dev_darcy_v)           // out
         1800 {
         1801     // 3D thread index
         1802     const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
         1803     const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
         1804     const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
         1805 
         1806     // Grid dimensions
         1807     const unsigned int nx = devC_grid.num[0];
         1808     const unsigned int ny = devC_grid.num[1];
         1809     const unsigned int nz = devC_grid.num[2];
         1810 
         1811     // Cell size
         1812     const Float dx = devC_grid.L[0]/nx;
         1813     const Float dy = devC_grid.L[1]/ny;
         1814     const Float dz = devC_grid.L[2]/nz;
         1815 
         1816     // Check that we are not outside the fluid grid
         1817     if (x < nx && y < ny && z < nz) {
         1818 
         1819         const unsigned int cellidx = d_idx(x,y,z);
         1820 
         1821         const Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
         1822         const Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
         1823         const Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
         1824         const Float p_yp = dev_darcy_p[d_idx(x,y+1,z)];
         1825         const Float p_zn = dev_darcy_p[d_idx(x,y,z-1)];
         1826         const Float p_zp = dev_darcy_p[d_idx(x,y,z+1)];
         1827 
         1828         const Float k   = dev_darcy_k[cellidx];
         1829         const Float phi = dev_darcy_phi[cellidx];
         1830 
         1831         // approximate pressure gradient with first order central differences
         1832         const Float3 grad_p = MAKE_FLOAT3(
         1833                 (p_xp - p_xn)/(dx + dx),
         1834                 (p_yp - p_yn)/(dy + dy),
         1835                 (p_zp - p_zn)/(dz + dz));
         1836 
         1837         // Flux [m/s]: q = -k/nu * dH
         1838         // Pore velocity [m/s]: v = q/n
         1839 
         1840         // calculate flux
         1841         //const Float3 q = -k/mu*grad_p;
         1842 
         1843         // calculate velocity
         1844         //const Float3 v = q/phi;
         1845         const Float3 v = (-k/mu * grad_p)/phi;
         1846 
         1847         // Save velocity
         1848         dev_darcy_v[cellidx] = v;
         1849     }
         1850 }
         1851 
         1852 // Print final heads and free memory
         1853 void DEM::endDarcyDev()
         1854 {
         1855     freeDarcyMemDev();
         1856 }
         1857 
         1858 // vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4