tdarcy.cuh - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
HTML git clone git://src.adamsgaard.dk/sphere
DIR Log
DIR Files
DIR Refs
DIR LICENSE
---
tdarcy.cuh (70180B)
---
1 // darcy.cuh
2 // CUDA implementation of Darcy porous flow
3
4 #include <iostream>
5 #include <cuda.h>
6 //#include <cutil_math.h>
7 #include <helper_math.h>
8
9 #include "vector_arithmetic.h" // for arbitrary precision vectors
10 #include "sphere.h"
11 #include "datatypes.h"
12 #include "utility.h"
13 #include "constants.cuh"
14 #include "debug.h"
15
16 // Initialize memory
17 void DEM::initDarcyMemDev(void)
18 {
19 // size of scalar field
20 unsigned int memSizeF = sizeof(Float)*darcyCells();
21
22 cudaMalloc((void**)&dev_darcy_dp_expl, memSizeF); // Expl. pressure change
23 cudaMalloc((void**)&dev_darcy_p_old, memSizeF); // old pressure
24 cudaMalloc((void**)&dev_darcy_p, memSizeF); // hydraulic pressure
25 cudaMalloc((void**)&dev_darcy_p_new, memSizeF); // updated pressure
26 cudaMalloc((void**)&dev_darcy_v, memSizeF*3); // cell hydraulic velocity
27 cudaMalloc((void**)&dev_darcy_vp_avg, memSizeF*3); // avg. particle velocity
28 cudaMalloc((void**)&dev_darcy_phi, memSizeF); // cell porosity
29 cudaMalloc((void**)&dev_darcy_dphi, memSizeF); // cell porosity change
30 cudaMalloc((void**)&dev_darcy_norm, memSizeF); // normalized residual
31 cudaMalloc((void**)&dev_darcy_f_p, sizeof(Float4)*np); // pressure force
32 cudaMalloc((void**)&dev_darcy_k, memSizeF); // hydraulic permeability
33 cudaMalloc((void**)&dev_darcy_grad_k, memSizeF*3); // grad(permeability)
34 cudaMalloc((void**)&dev_darcy_div_v_p, memSizeF); // divergence(v_p)
35 cudaMalloc((void**)&dev_darcy_grad_p, memSizeF*3); // grad(pressure)
36 cudaMalloc((void**)&dev_darcy_p_constant,
37 sizeof(int)*darcyCells()); // grad(pressure)
38
39 checkForCudaErrors("End of initDarcyMemDev");
40 }
41
42 // Free memory
43 void DEM::freeDarcyMemDev()
44 {
45 cudaFree(dev_darcy_dp_expl);
46 cudaFree(dev_darcy_p_old);
47 cudaFree(dev_darcy_p);
48 cudaFree(dev_darcy_p_new);
49 cudaFree(dev_darcy_v);
50 cudaFree(dev_darcy_vp_avg);
51 cudaFree(dev_darcy_phi);
52 cudaFree(dev_darcy_dphi);
53 cudaFree(dev_darcy_norm);
54 cudaFree(dev_darcy_f_p);
55 cudaFree(dev_darcy_k);
56 cudaFree(dev_darcy_grad_k);
57 cudaFree(dev_darcy_div_v_p);
58 cudaFree(dev_darcy_grad_p);
59 cudaFree(dev_darcy_p_constant);
60 }
61
62 // Transfer to device
63 void DEM::transferDarcyToGlobalDeviceMemory(int statusmsg)
64 {
65 checkForCudaErrors("Before attempting cudaMemcpy in "
66 "transferDarcyToGlobalDeviceMemory");
67
68 //if (verbose == 1 && statusmsg == 1)
69 //std::cout << " Transfering fluid data to the device: ";
70
71 // memory size for a scalar field
72 unsigned int memSizeF = sizeof(Float)*darcyCells();
73
74 //writeNSarray(ns.p, "ns.p.txt");
75
76 cudaMemcpy(dev_darcy_p, darcy.p, memSizeF, cudaMemcpyHostToDevice);
77 checkForCudaErrors("transferDarcytoGlobalDeviceMemory after first "
78 "cudaMemcpy");
79 cudaMemcpy(dev_darcy_v, darcy.v, memSizeF*3, cudaMemcpyHostToDevice);
80 cudaMemcpy(dev_darcy_phi, darcy.phi, memSizeF, cudaMemcpyHostToDevice);
81 cudaMemcpy(dev_darcy_dphi, darcy.dphi, memSizeF, cudaMemcpyHostToDevice);
82 cudaMemcpy(dev_darcy_f_p, darcy.f_p, sizeof(Float4)*np,
83 cudaMemcpyHostToDevice);
84 cudaMemcpy(dev_darcy_p_constant, darcy.p_constant, sizeof(int)*darcyCells(),
85 cudaMemcpyHostToDevice);
86
87 checkForCudaErrors("End of transferDarcyToGlobalDeviceMemory");
88 //if (verbose == 1 && statusmsg == 1)
89 //std::cout << "Done" << std::endl;
90 }
91
92 // Transfer from device
93 void DEM::transferDarcyFromGlobalDeviceMemory(int statusmsg)
94 {
95 if (verbose == 1 && statusmsg == 1)
96 std::cout << " Transfering fluid data from the device: ";
97
98 // memory size for a scalar field
99 unsigned int memSizeF = sizeof(Float)*darcyCells();
100
101 cudaMemcpy(darcy.p, dev_darcy_p, memSizeF, cudaMemcpyDeviceToHost);
102 checkForCudaErrors("In transferDarcyFromGlobalDeviceMemory, dev_darcy_p", 0);
103 cudaMemcpy(darcy.v, dev_darcy_v, memSizeF*3, cudaMemcpyDeviceToHost);
104 cudaMemcpy(darcy.phi, dev_darcy_phi, memSizeF, cudaMemcpyDeviceToHost);
105 cudaMemcpy(darcy.dphi, dev_darcy_dphi, memSizeF, cudaMemcpyDeviceToHost);
106 cudaMemcpy(darcy.f_p, dev_darcy_f_p, sizeof(Float4)*np,
107 cudaMemcpyDeviceToHost);
108 cudaMemcpy(darcy.k, dev_darcy_k, memSizeF, cudaMemcpyDeviceToHost);
109 cudaMemcpy(darcy.p_constant, dev_darcy_p_constant, sizeof(int)*darcyCells(),
110 cudaMemcpyDeviceToHost);
111
112 checkForCudaErrors("End of transferDarcyFromGlobalDeviceMemory", 0);
113 if (verbose == 1 && statusmsg == 1)
114 std::cout << "Done" << std::endl;
115 }
116
117 // Transfer the normalized residuals from device to host
118 void DEM::transferDarcyNormFromGlobalDeviceMemory()
119 {
120 cudaMemcpy(darcy.norm, dev_darcy_norm, sizeof(Float)*darcyCells(),
121 cudaMemcpyDeviceToHost);
122 checkForCudaErrors("End of transferDarcyNormFromGlobalDeviceMemory");
123 }
124
125 // Transfer the pressures from device to host
126 void DEM::transferDarcyPressuresFromGlobalDeviceMemory()
127 {
128 cudaMemcpy(darcy.p, dev_darcy_p, sizeof(Float)*darcyCells(),
129 cudaMemcpyDeviceToHost);
130 checkForCudaErrors("End of transferDarcyNormFromGlobalDeviceMemory");
131 }
132
133 // Get linear index from 3D grid position
134 __inline__ __device__ unsigned int d_idx(
135 const int x, const int y, const int z)
136 {
137 // without ghost nodes
138 //return x + dev_grid.num[0]*y + dev_grid.num[0]*dev_grid.num[1]*z;
139
140 // with ghost nodes
141 // the ghost nodes are placed at x,y,z = -1 and WIDTH
142 return (x+1) + (devC_grid.num[0]+2)*(y+1) +
143 (devC_grid.num[0]+2)*(devC_grid.num[1]+2)*(z+1);
144 }
145
146 // Get linear index of velocity node from 3D grid position in staggered grid
147 __inline__ __device__ unsigned int d_vidx(
148 const int x, const int y, const int z)
149 {
150 // with ghost nodes
151 // the ghost nodes are placed at x,y,z = -1 and WIDTH+1
152 return (x+1) + (devC_grid.num[0]+3)*(y+1)
153 + (devC_grid.num[0]+3)*(devC_grid.num[1]+3)*(z+1);
154 }
155
156 // The normalized residuals are given an initial value of 0, since the values at
157 // the Dirichlet boundaries aren't written during the iterations.
158 __global__ void setDarcyNormZero(
159 Float* __restrict__ dev_darcy_norm)
160 {
161 // 3D thread index
162 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
163 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
164 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
165
166 // check that we are not outside the fluid grid
167 if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2])
168 dev_darcy_norm[d_idx(x,y,z)] = 0.0;
169 }
170
171 // Set an array of scalars to 0.0 inside devC_grid
172 template<typename T>
173 __global__ void setDarcyZeros(T* __restrict__ dev_scalarfield)
174 {
175 // 3D thread index
176 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
177 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
178 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
179
180 // check that we are not outside the fluid grid
181 if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2])
182 dev_scalarfield[d_idx(x,y,z)] = 0.0;
183 }
184
185
186 // Update a field in the ghost nodes from their parent cell values. The edge
187 // (diagonal) cells are not written since they are not read. Launch this kernel
188 // for all cells in the grid using
189 // setDarcyGhostNodes<datatype><<<.. , ..>>>( .. );
190 template<typename T>
191 __global__ void setDarcyGhostNodes(
192 T* __restrict__ dev_scalarfield,
193 const int bc_xn,
194 const int bc_xp,
195 const int bc_yn,
196 const int bc_yp,
197 const int bc_bot,
198 const int bc_top)
199 {
200 // 3D thread index
201 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
202 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
203 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
204
205 // Grid dimensions
206 const unsigned int nx = devC_grid.num[0];
207 const unsigned int ny = devC_grid.num[1];
208 const unsigned int nz = devC_grid.num[2];
209
210 // check that we are not outside the fluid grid
211 if (x < nx && y < ny && z < nz) {
212
213 const T val = dev_scalarfield[d_idx(x,y,z)];
214
215 // x
216 if (x == 0 && bc_xn == 0)
217 dev_scalarfield[idx(-1,y,z)] = val; // Dirichlet
218 if (x == 1 && bc_xn == 1)
219 dev_scalarfield[idx(-1,y,z)] = val; // Neumann
220 if (x == 0 && bc_xn == 2)
221 dev_scalarfield[idx(nx,y,z)] = val; // Periodic -y
222
223 if (x == nx-1 && bc_xp == 0)
224 dev_scalarfield[idx(nx,y,z)] = val; // Dirichlet
225 if (x == nx-2 && bc_xp == 1)
226 dev_scalarfield[idx(nx,y,z)] = val; // Neumann
227 if (x == nx-1 && bc_xp == 2)
228 dev_scalarfield[idx(-1,y,z)] = val; // Periodic +x
229
230 // y
231 if (y == 0 && bc_yn == 0)
232 dev_scalarfield[idx(x,-1,z)] = val; // Dirichlet
233 if (y == 1 && bc_yn == 1)
234 dev_scalarfield[idx(x,-1,z)] = val; // Neumann
235 if (y == 0 && bc_yn == 2)
236 dev_scalarfield[idx(x,ny,z)] = val; // Periodic -y
237
238 if (y == ny-1 && bc_yp == 0)
239 dev_scalarfield[idx(x,ny,z)] = val; // Dirichlet
240 if (y == ny-2 && bc_yp == 1)
241 dev_scalarfield[idx(x,ny,z)] = val; // Neumann
242 if (y == ny-1 && bc_yp == 2)
243 dev_scalarfield[idx(x,-1,z)] = val; // Periodic +y
244
245 // z
246 if (z == 0 && bc_bot == 0)
247 dev_scalarfield[idx(x,y,-1)] = val; // Dirichlet
248 if (z == 1 && bc_bot == 1)
249 dev_scalarfield[idx(x,y,-1)] = val; // Neumann
250 if (z == 0 && bc_bot == 2)
251 dev_scalarfield[idx(x,y,nz)] = val; // Periodic -z
252
253 if (z == nz-1 && bc_top == 0)
254 dev_scalarfield[idx(x,y,nz)] = val; // Dirichlet
255 if (z == nz-2 && bc_top == 1)
256 dev_scalarfield[idx(x,y,nz)] = val; // Neumann
257 if (z == nz-1 && bc_top == 2)
258 dev_scalarfield[idx(x,y,-1)] = val; // Periodic +z
259 }
260 }
261
262 // Update a field in the ghost nodes from their parent cell values. The edge
263 // (diagonal) cells are not written since they are not read. Launch this kernel
264 // for all cells in the grid using
265 // setDarcyGhostNodes<datatype><<<.. , ..>>>( .. );
266 template<typename T>
267 __global__ void setDarcyGhostNodesFlux(
268 T* __restrict__ dev_scalarfield, // out
269 const int bc_bot, // in
270 const int bc_top, // in
271 const Float bc_bot_flux, // in
272 const Float bc_top_flux, // in
273 const Float* __restrict__ dev_darcy_k, // in
274 const Float mu) // in
275 {
276 // 3D thread index
277 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
278 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
279 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
280
281 // Grid dimensions
282 const unsigned int nx = devC_grid.num[0];
283 const unsigned int ny = devC_grid.num[1];
284 const unsigned int nz = devC_grid.num[2];
285
286 // check that we are not outside the fluid grid
287 if (x < nx && y < ny && z < nz && (bc_bot == 4 || bc_top == 4)) {
288
289 const T p = dev_scalarfield[d_idx(x,y,z)];
290 const Float k = dev_darcy_k[d_idx(x,y,z)];
291 const Float dz = devC_grid.L[2]/nz;
292
293 Float q_z = 0.;
294 if (z == 0)
295 q_z = bc_bot_flux;
296 else if (z == nz-1)
297 q_z = bc_top_flux;
298
299 const Float p_ghost = -mu/k*q_z * dz + p;
300
301 // z
302 if (z == 0 && bc_bot == 4)
303 dev_scalarfield[idx(x,y,-1)] = p_ghost;
304
305 if (z == nz-1 && bc_top == 4)
306 dev_scalarfield[idx(x,y,nz)] = p_ghost;
307 }
308 }
309
310 // Return the volume of a sphere with radius r
311 __forceinline__ __device__ Float sphereVolume(const Float r)
312 {
313 return 4.0/3.0*M_PI*pow(r, 3);
314 }
315
316 __device__ Float3 abs(const Float3 v)
317 {
318 return MAKE_FLOAT3(abs(v.x), abs(v.y), abs(v.z));
319 }
320
321
322 // Returns a weighting factor based on particle position and fluid center
323 // position
324 __device__ Float weight(
325 const Float3 x_p, // in: Particle center position
326 const Float3 x_f, // in: Fluid pressure node position
327 const Float dx, // in: Cell spacing, x
328 const Float dy, // in: Cell spacing, y
329 const Float dz) // in: Cell spacing, z
330 {
331 const Float3 dist = abs(x_p - x_f);
332 if (dist.x < dx && dist.y < dy && dist.z < dz)
333 return (1.0 - dist.x/dx)*(1.0 - dist.y/dy)*(1.0 - dist.z/dz);
334 else
335 return 0.0;
336 }
337
338 // Returns a weighting factor based on particle position and fluid center
339 // position
340 __device__ Float weightDist(
341 const Float3 x, // in: Vector between cell and particle center
342 const Float dx, // in: Cell spacing, x
343 const Float dy, // in: Cell spacing, y
344 const Float dz) // in: Cell spacing, z
345 {
346 const Float3 dist = abs(x);
347 if (dist.x < dx && dist.y < dy && dist.z < dz)
348 return (1.0 - dist.x/dx)*(1.0 - dist.y/dy)*(1.0 - dist.z/dz);
349 else
350 return 0.0;
351 }
352
353 // Find the porosity in each cell on the base of a bilinear weighing scheme,
354 // centered at the cell center.
355 __global__ void findDarcyPorositiesLinear(
356 const unsigned int* __restrict__ dev_cellStart, // in
357 const unsigned int* __restrict__ dev_cellEnd, // in
358 const Float4* __restrict__ dev_x_sorted, // in
359 const Float4* __restrict__ dev_vel_sorted, // in
360 const unsigned int iteration, // in
361 const unsigned int ndem, // in
362 const unsigned int np, // in
363 const Float c_phi, // in
364 Float* __restrict__ dev_darcy_phi, // in + out
365 Float* __restrict__ dev_darcy_dphi, // in + out
366 Float* __restrict__ dev_darcy_div_v_p, // out
367 Float3* __restrict__ dev_darcy_vp_avg) // out
368 {
369 // 3D thread index
370 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
371 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
372 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
373
374 // Grid dimensions
375 const unsigned int nx = devC_grid.num[0];
376 const unsigned int ny = devC_grid.num[1];
377 const unsigned int nz = devC_grid.num[2];
378
379 // Cell dimensions
380 const Float dx = devC_grid.L[0]/nx;
381 const Float dy = devC_grid.L[1]/ny;
382 const Float dz = devC_grid.L[2]/nz;
383
384 Float solid_volume = 0.0;
385 Float solid_volume_new = 0.0;
386 Float4 xr; // particle pos. and radius
387
388 // check that we are not outside the fluid grid
389 if (x < nx && y < ny && z < nz) {
390
391 if (np > 0) {
392
393 // Cell center position
394 const Float3 X = MAKE_FLOAT3(
395 x*dx + 0.5*dx,
396 y*dy + 0.5*dy,
397 z*dz + 0.5*dz);
398
399 //Float d, r;
400 Float s, vol_p;
401 Float phi = 1.00;
402 Float4 v;
403 Float3 vp_avg_num = MAKE_FLOAT3(0.0, 0.0, 0.0);
404 Float vp_avg_denum = 0.0;
405
406 // Read old porosity
407 Float phi_0 = dev_darcy_phi[d_idx(x,y,z)];
408
409 // The cell 3d index
410 const int3 gridPos = make_int3((int)x,(int)y,(int)z);
411
412 // The neighbor cell 3d index
413 int3 targetCell;
414
415 // The distance modifier for particles across periodic boundaries
416 Float3 dist, distmod;
417
418 unsigned int cellID, startIdx, endIdx, i;
419
420 // Iterate over 27 neighbor cells, R = cell width
421 for (int z_dim=-1; z_dim<2; ++z_dim) { // z-axis
422 for (int y_dim=-1; y_dim<2; ++y_dim) { // y-axis
423 for (int x_dim=-1; x_dim<2; ++x_dim) { // x-axis
424
425 // Index of neighbor cell this iteration is looking at
426 targetCell = gridPos + make_int3(x_dim, y_dim, z_dim);
427
428 // Do not artifically enhance porosity at lower boundary
429 if (targetCell.z == -1)
430 targetCell.z = 1;
431
432 // Mirror particle grid at frictionless boundaries
433 if (devC_grid.periodic == 2) {
434 if (targetCell.y == -1) {
435 targetCell.y = 1;
436 }
437 if (targetCell.y == devC_grid.num[1]) {
438 targetCell.y = devC_grid.num[1] - 2;
439 }
440 }
441
442 // Get distance modifier for interparticle
443 // vector, if it crosses a periodic boundary
444 distmod = MAKE_FLOAT3(0.0, 0.0, 0.0);
445 if (findDistMod(&targetCell, &distmod) != -1) {
446
447 // Calculate linear cell ID
448 cellID = targetCell.x
449 + targetCell.y * devC_grid.num[0]
450 + (devC_grid.num[0] * devC_grid.num[1])
451 * targetCell.z;
452
453 // Lowest particle index in cell
454 startIdx = dev_cellStart[cellID];
455
456 // Make sure cell is not empty
457 if (startIdx != 0xffffffff) {
458
459 // Highest particle index in cell
460 endIdx = dev_cellEnd[cellID];
461
462 // Iterate over cell particles
463 for (i=startIdx; i<endIdx; ++i) {
464
465 // Read particle position and radius
466 xr = dev_x_sorted[i];
467 v = dev_vel_sorted[i];
468 //x3 = MAKE_FLOAT3(xr.x, xr.y, xr.z);
469 //v3 = MAKE_FLOAT3(v.x, v.y, v.z);
470
471 // Find center distance
472 dist = MAKE_FLOAT3(
473 X.x - xr.x,
474 X.y - xr.y,
475 X.z - xr.z);
476 dist += distmod;
477 s = weightDist(dist, dx, dy, dz);
478 vol_p = sphereVolume(xr.w);
479
480 solid_volume += s*vol_p;
481
482 // Summation procedure for average velocity
483 vp_avg_num +=
484 s*vol_p*MAKE_FLOAT3(v.x, v.y, v.z);
485 vp_avg_denum += s*vol_p;
486
487 // Find projected new void volume
488 // Eulerian update of positions
489 xr += v*devC_dt;
490 dist = MAKE_FLOAT3(
491 X.x - xr.x,
492 X.y - xr.y,
493 X.z - xr.z) + distmod;
494 solid_volume_new +=
495 weightDist(dist, dx, dy, dz)*vol_p;
496 }
497 }
498 }
499 }
500 }
501 }
502
503 Float cell_volume = dx*dy*dz;
504 if (z == nz - 1)
505 cell_volume *= 0.875;
506
507 // Make sure that the porosity is in the interval [0.0;1.0]
508 phi = fmin(0.9, fmax(0.1, 1.0 - solid_volume/cell_volume));
509 Float phi_new = fmin(0.9, fmax(0.1,
510 1.0 - solid_volume_new/cell_volume));
511
512 Float dphi;
513 Float3 vp_avg;
514 if (iteration == 0) {
515 dphi = 0.0;
516 vp_avg = MAKE_FLOAT3(0.0, 0.0, 0.0);
517 } else {
518 dphi = 0.5*(phi_new - phi_0);
519 vp_avg = vp_avg_num/fmax(1.0e-16, vp_avg_denum);
520 }
521
522 // Save porosity and porosity change
523 const unsigned int cellidx = d_idx(x,y,z);
524 dev_darcy_phi[cellidx] = phi*c_phi;
525 dev_darcy_dphi[cellidx] = dphi*c_phi;
526 dev_darcy_vp_avg[cellidx] = vp_avg;
527
528 #ifdef CHECK_FLUID_FINITE
529 (void)checkFiniteFloat("phi", x, y, z, phi);
530 (void)checkFiniteFloat("dphi", x, y, z, dphi);
531 #endif
532 } else {
533
534 const unsigned int cellidx = d_idx(x,y,z);
535
536 dev_darcy_phi[cellidx] = 0.9;
537 dev_darcy_dphi[cellidx] = 0.0;
538 }
539 }
540 }
541
542
543 // Copy the porosity, porosity change, div_v_p and vp_avg values to the grid
544 // edges from the grid interior at the frictionless Y boundaries (grid.periodic
545 // == 2).
546 __global__ void copyDarcyPorositiesToEdges(
547 Float* __restrict__ dev_darcy_phi, // in + out
548 Float* __restrict__ dev_darcy_dphi, // in + out
549 Float* __restrict__ dev_darcy_div_v_p, // in + out
550 Float3* __restrict__ dev_darcy_vp_avg) // in + out
551 {
552 // 3D thread index
553 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
554 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
555 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
556
557 // Grid dimensions
558 const unsigned int nx = devC_grid.num[0];
559 const unsigned int ny = devC_grid.num[1];
560 const unsigned int nz = devC_grid.num[2];
561
562 // check that we are not outside the fluid grid
563 if (devC_grid.periodic == 2 &&
564 x < nx && (y == 0 || y == ny - 1) && z < nz) {
565
566 // Read porosities from this cell
567 int y_read;
568
569 // read values from inside cells
570 if (y == 0)
571 y_read = 1;
572 if (y == ny - 1)
573 y_read = ny - 2;
574
575 const unsigned int readidx = d_idx(x, y_read, z);
576 const unsigned int writeidx = d_idx(x, y, z);
577
578 dev_darcy_phi[writeidx] = dev_darcy_phi[readidx];
579 dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx];
580 dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx];
581 dev_darcy_vp_avg[writeidx] = dev_darcy_vp_avg[readidx];
582 }
583 }
584
585
586 // Copy the porosity, porosity change, div_v_p and vp_avg values to the grid
587 // bottom from the grid interior at the frictionless Y boundaries (grid.periodic
588 // == 2).
589 __global__ void copyDarcyPorositiesToBottom(
590 Float* __restrict__ dev_darcy_phi, // in + out
591 Float* __restrict__ dev_darcy_dphi, // in + out
592 Float* __restrict__ dev_darcy_div_v_p, // in + out
593 Float3* __restrict__ dev_darcy_vp_avg) // in + out
594 {
595 // 3D thread index
596 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
597 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
598 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
599
600 // Grid dimensions
601 const unsigned int nx = devC_grid.num[0];
602 const unsigned int ny = devC_grid.num[1];
603
604 // check that we are not outside the fluid grid
605 if (devC_grid.periodic == 2 &&
606 x < nx && y < ny && z == 0) {
607
608 const unsigned int readidx = d_idx(x, y, 1);
609 const unsigned int writeidx = d_idx(x, y, z);
610
611 dev_darcy_phi[writeidx] = dev_darcy_phi[readidx];
612 dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx];
613 dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx];
614 dev_darcy_vp_avg[writeidx] = dev_darcy_vp_avg[readidx];
615 }
616 }
617
618
619 // Find the porosity in each cell on the base of a sphere, centered at the cell
620 // center.
621 __global__ void findDarcyPorosities(
622 const unsigned int* __restrict__ dev_cellStart, // in
623 const unsigned int* __restrict__ dev_cellEnd, // in
624 const Float4* __restrict__ dev_x_sorted, // in
625 const Float4* __restrict__ dev_vel_sorted, // in
626 const unsigned int iteration, // in
627 const unsigned int ndem, // in
628 const unsigned int np, // in
629 const Float c_phi, // in
630 Float* __restrict__ dev_darcy_phi, // in + out
631 Float* __restrict__ dev_darcy_dphi) // in + out
632 {
633 // 3D thread index
634 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
635 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
636 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
637
638 // Grid dimensions
639 const unsigned int nx = devC_grid.num[0];
640 const unsigned int ny = devC_grid.num[1];
641 const unsigned int nz = devC_grid.num[2];
642
643 // Cell dimensions
644 const Float dx = devC_grid.L[0]/nx;
645 const Float dy = devC_grid.L[1]/ny;
646 const Float dz = devC_grid.L[2]/nz;
647
648 // Cell sphere radius
649 const Float R = fmin(dx, fmin(dy,dz)) * 0.5; // diameter = cell width
650 const Float cell_volume = sphereVolume(R);
651
652 Float void_volume = cell_volume; // current void volume
653 Float void_volume_new = cell_volume; // projected new void volume
654 Float4 xr; // particle pos. and radius
655
656 // check that we are not outside the fluid grid
657 if (x < nx && y < ny && z < nz) {
658
659 if (np > 0) {
660
661 // Cell sphere center position
662 const Float3 X = MAKE_FLOAT3(
663 x*dx + 0.5*dx,
664 y*dy + 0.5*dy,
665 z*dz + 0.5*dz);
666
667 Float d, r;
668 Float phi = 1.00;
669 Float4 v;
670 //unsigned int n = 0;
671
672 // Read old porosity
673 Float phi_0 = dev_darcy_phi[d_idx(x,y,z)];
674
675 // The cell 3d index
676 const int3 gridPos = make_int3((int)x,(int)y,(int)z);
677
678 // The neighbor cell 3d index
679 int3 targetCell;
680
681 // The distance modifier for particles across periodic boundaries
682 Float3 dist, distmod;
683
684 unsigned int cellID, startIdx, endIdx, i;
685
686 // Iterate over 27 neighbor cells, R = cell width
687 for (int z_dim=-1; z_dim<2; ++z_dim) { // z-axis
688 for (int y_dim=-1; y_dim<2; ++y_dim) { // y-axis
689 for (int x_dim=-1; x_dim<2; ++x_dim) { // x-axis
690
691 // Index of neighbor cell this iteration is looking at
692 targetCell = gridPos + make_int3(x_dim, y_dim, z_dim);
693
694 // Get distance modifier for interparticle
695 // vector, if it crosses a periodic boundary
696 distmod = MAKE_FLOAT3(0.0, 0.0, 0.0);
697 if (findDistMod(&targetCell, &distmod) != -1) {
698
699 // Calculate linear cell ID
700 cellID = targetCell.x
701 + targetCell.y * devC_grid.num[0]
702 + (devC_grid.num[0] * devC_grid.num[1])
703 * targetCell.z;
704
705 // Lowest particle index in cell
706 startIdx = dev_cellStart[cellID];
707
708 // Make sure cell is not empty
709 if (startIdx != 0xffffffff) {
710
711 // Highest particle index in cell
712 endIdx = dev_cellEnd[cellID];
713
714 // Iterate over cell particles
715 for (i=startIdx; i<endIdx; ++i) {
716
717 // Read particle position and radius
718 xr = dev_x_sorted[i];
719 v = dev_vel_sorted[i];
720 r = xr.w;
721
722 // Find center distance
723 dist = MAKE_FLOAT3(
724 X.x - xr.x,
725 X.y - xr.y,
726 X.z - xr.z);
727 dist += distmod;
728 d = length(dist);
729
730 // Lens shaped intersection
731 if ((R - r) < d && d < (R + r))
732 void_volume -=
733 1.0/(12.0*d) * (
734 M_PI*(R + r - d)*(R + r - d)
735 *(d*d + 2.0*d*r - 3.0*r*r
736 + 2.0*d*R + 6.0*r*R
737 - 3.0*R*R) );
738
739 // Particle fully contained in cell sphere
740 if (d <= R - r)
741 void_volume -= sphereVolume(r);
742
743 //// Find projected new void volume
744 // Eulerian update of positions
745 xr += v*devC_dt;
746
747 // Find center distance
748 dist = MAKE_FLOAT3(
749 X.x - xr.x,
750 X.y - xr.y,
751 X.z - xr.z);
752 dist += distmod;
753 d = length(dist);
754
755 // Lens shaped intersection
756 if ((R - r) < d && d < (R + r))
757 void_volume_new -=
758 1.0/(12.0*d) * (
759 M_PI*(R + r - d)*(R + r - d)
760 *(d*d + 2.0*d*r - 3.0*r*r
761 + 2.0*d*R + 6.0*r*R
762 - 3.0*R*R) );
763
764 // Particle fully contained in cell sphere
765 if (d <= R - r)
766 void_volume_new -= sphereVolume(r);
767 }
768 }
769 }
770 }
771 }
772 }
773
774 // Make sure that the porosity is in the interval [0.0;1.0]
775 phi = fmin(0.9, fmax(0.1, void_volume/cell_volume));
776 Float phi_new = fmin(0.9, fmax(0.1, void_volume_new/cell_volume));
777
778 // Central difference after first iteration
779 Float dphi;
780 if (iteration == 0)
781 dphi = phi_new - phi;
782 else
783 dphi = 0.5*(phi_new - phi_0);
784
785 // Save porosity and porosity change
786 //phi = 0.5; dphi = 0.0; // disable porosity effects
787 const unsigned int cellidx = d_idx(x,y,z);
788 dev_darcy_phi[cellidx] = phi*c_phi;
789 dev_darcy_dphi[cellidx] = dphi*c_phi;
790
791 /*printf("\n%d,%d,%d: findDarcyPorosities\n"
792 "\tphi = %f\n"
793 "\tdphi = %e\n"
794 "\tdphi_eps= %e\n"
795 "\tX = %e, %e, %e\n"
796 "\txr = %e, %e, %e\n"
797 "\tq = %e\n"
798 "\tdiv_v_p = %e\n"
799 , x,y,z,
800 phi, dphi,
801 dot_epsilon_kk*(1.0 - phi)*devC_dt,
802 X.x, X.y, X.z,
803 xr.x, xr.y, xr.z,
804 q,
805 dot_epsilon_kk);*/
806
807 #ifdef CHECK_FLUID_FINITE
808 (void)checkFiniteFloat("phi", x, y, z, phi);
809 (void)checkFiniteFloat("dphi", x, y, z, dphi);
810 #endif
811 } else {
812 const unsigned int cellidx = d_idx(x,y,z);
813 dev_darcy_phi[cellidx] = 0.999;
814 dev_darcy_dphi[cellidx] = 0.0;
815 }
816 }
817 }
818
819 // Find the particle velocity divergence at the cell center from the average
820 // particle velocities on the cell faces
821 __global__ void findDarcyParticleVelocityDivergence(
822 const Float* __restrict__ dev_darcy_v_p_x, // in
823 const Float* __restrict__ dev_darcy_v_p_y, // in
824 const Float* __restrict__ dev_darcy_v_p_z, // in
825 Float* __restrict__ dev_darcy_div_v_p) // out
826 {
827 // 3D thread index
828 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
829 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
830 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
831
832 // Grid dimensions
833 const unsigned int nx = devC_grid.num[0];
834 const unsigned int ny = devC_grid.num[1];
835 const unsigned int nz = devC_grid.num[2];
836
837 if (x < nx && y < ny && z < nz) {
838
839 // read values
840 Float v_p_xn = dev_darcy_v_p_x[d_vidx(x,y,z)];
841 Float v_p_xp = dev_darcy_v_p_x[d_vidx(x+1,y,z)];
842 Float v_p_yn = dev_darcy_v_p_y[d_vidx(x,y,z)];
843 Float v_p_yp = dev_darcy_v_p_y[d_vidx(x,y+1,z)];
844 Float v_p_zn = dev_darcy_v_p_z[d_vidx(x,y,z)];
845 Float v_p_zp = dev_darcy_v_p_z[d_vidx(x,y,z+1)];
846
847 // cell dimensions
848 const Float dx = devC_grid.L[0]/nx;
849 const Float dy = devC_grid.L[1]/ny;
850 const Float dz = devC_grid.L[2]/nz;
851
852 // calculate the divergence using first order central finite differences
853 const Float div_v_p =
854 (v_p_xp - v_p_xn)/dx +
855 (v_p_yp - v_p_yn)/dy +
856 (v_p_zp - v_p_zn)/dz;
857
858 dev_darcy_div_v_p[d_idx(x,y,z)] = div_v_p;
859
860 #ifdef CHECK_FLUID_FINITE
861 checkFiniteFloat("div_v_p", x, y, z, div_v_p);
862 #endif
863 }
864 }
865
866 // Find the cell-centered pressure gradient using central differences
867 __global__ void findDarcyPressureGradient(
868 const Float* __restrict__ dev_darcy_p, // in
869 Float3* __restrict__ dev_darcy_grad_p) // out
870 {
871 // 3D thread index
872 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
873 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
874 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
875
876 // Grid dimensions
877 const unsigned int nx = devC_grid.num[0];
878 const unsigned int ny = devC_grid.num[1];
879 const unsigned int nz = devC_grid.num[2];
880
881 if (x < nx && y < ny && z < nz) {
882
883 // read values
884 Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
885 Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
886 Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
887 Float p_yp = dev_darcy_p[d_idx(x,y+1,z)];
888 Float p_zn = dev_darcy_p[d_idx(x,y,z-1)];
889 Float p_zp = dev_darcy_p[d_idx(x,y,z+1)];
890
891 // cell dimensions
892 const Float dx = devC_grid.L[0]/nx;
893 const Float dy = devC_grid.L[1]/ny;
894 const Float dz = devC_grid.L[2]/nz;
895
896 // calculate the divergence using first order central finite differences
897 const Float3 grad_p = MAKE_FLOAT3(
898 (p_xp - p_xn)/(dx + dx),
899 (p_yp - p_yn)/(dy + dy),
900 (p_zp - p_zn)/(dz + dz));
901
902 dev_darcy_grad_p[d_idx(x,y,z)] = grad_p;
903
904 //if (grad_p.x != 0.0 || grad_p.y != 0.0 || grad_p.z != 0.0)
905 /*printf("%d,%d,%d findDarcyPressureGradient:\n"
906 "\tp_x = %.2e, %.2e\n"
907 "\tp_y = %.2e, %.2e\n"
908 "\tp_z = %.2e, %.2e\n"
909 "\tgrad_p = %.2e, %.2e, %.2e\n",
910 x, y, z,
911 p_xn, p_xp,
912 p_yn, p_yp,
913 p_zn, p_zp,
914 grad_p.x, grad_p.y, grad_p.z); // */
915 #ifdef CHECK_FLUID_FINITE
916 checkFiniteFloat3("grad_p", x, y, z, grad_p);
917 #endif
918 }
919 }
920
921 // Find particle-fluid interaction force as outlined by Goren et al. 2011, and
922 // originally by Gidaspow 1992. All terms other than the pressure force are
923 // neglected. The buoyancy force is included.
924 __global__ void findDarcyPressureForceLinear(
925 const Float4* __restrict__ dev_x, // in
926 const Float3* __restrict__ dev_darcy_grad_p, // in
927 const Float* __restrict__ dev_darcy_phi, // in
928 const unsigned int wall0_iz, // in
929 const Float rho_f, // in
930 const int bc_top, // in
931 Float4* __restrict__ dev_force, // out
932 Float4* __restrict__ dev_darcy_f_p) // out
933 {
934 unsigned int i = threadIdx.x + blockIdx.x*blockDim.x; // Particle index
935
936 if (i < devC_np) {
937
938 // read particle information
939 const Float4 x = dev_x[i];
940 const Float3 x3 = MAKE_FLOAT3(x.x, x.y, x.z);
941
942 // determine fluid cell containing the particle
943 const unsigned int i_x =
944 floor((x.x - devC_grid.origo[0])/(devC_grid.L[0]/devC_grid.num[0]));
945 const unsigned int i_y =
946 floor((x.y - devC_grid.origo[1])/(devC_grid.L[1]/devC_grid.num[1]));
947 const unsigned int i_z =
948 floor((x.z - devC_grid.origo[2])/(devC_grid.L[2]/devC_grid.num[2]));
949 const unsigned int cellidx = d_idx(i_x, i_y, i_z);
950
951 // determine cell dimensions
952 const unsigned int nx = devC_grid.num[0];
953 const unsigned int ny = devC_grid.num[1];
954 const unsigned int nz = devC_grid.num[2];
955 const Float dx = devC_grid.L[0]/nx;
956 const Float dy = devC_grid.L[1]/ny;
957 const Float dz = devC_grid.L[2]/nz;
958
959 // read fluid information
960 //const Float phi = dev_darcy_phi[cellidx];
961
962 // Cell center position
963 const Float3 X = MAKE_FLOAT3(
964 i_x*dx + 0.5*dx,
965 i_y*dy + 0.5*dy,
966 i_z*dz + 0.5*dz);
967
968 Float3 grad_p = MAKE_FLOAT3(0., 0., 0.);
969 Float3 grad_p_iter, n;
970 int ix_n, iy_n, iz_n; // neighbor indexes
971
972 // Loop over 27 closest cells to find all pressure gradient
973 // contributions
974 for (int d_iz = -1; d_iz<2; d_iz++) {
975 for (int d_iy = -1; d_iy<2; d_iy++) {
976 for (int d_ix = -1; d_ix<2; d_ix++) {
977
978 ix_n = i_x + d_ix;
979 iy_n = i_y + d_iy;
980 iz_n = i_z + d_iz;
981
982 grad_p_iter = dev_darcy_grad_p[d_idx(ix_n, iy_n, iz_n)];
983
984 // make sure edge and corner ghost nodes aren't read
985 if ( // edges passing through (0,0,0)
986 (ix_n == -1 && iy_n == -1) ||
987 (ix_n == -1 && iz_n == -1) ||
988 (iy_n == -1 && iz_n == -1) ||
989
990 // edges passing through (nx,ny,nz)
991 (ix_n == nx && iy_n == ny) ||
992 (ix_n == nx && iz_n == nz) ||
993 (iy_n == ny && iz_n == nz) ||
994
995 (ix_n == nx && iy_n == -1) ||
996 (ix_n == nx && iz_n == -1) ||
997
998 (iy_n == ny && ix_n == -1) ||
999 (iy_n == ny && iz_n == -1) ||
1000
1001 (iz_n == nz && ix_n == -1) ||
1002 (iz_n == nz && iy_n == -1))
1003 grad_p_iter = MAKE_FLOAT3(0., 0., 0.);
1004
1005 // Add Neumann BC at top wall
1006 if (bc_top == 0 && i_z + d_iz >= wall0_iz - 1)
1007 grad_p_iter.z = 0.0;
1008
1009 n = MAKE_FLOAT3(dx*d_ix, dy*d_iy, dz*d_iz);
1010
1011 grad_p += weight(x3, X + n, dx, dy, dz)*grad_p_iter;
1012
1013 //*
1014 Float s = weight(x3, X + n, dx, dy, dz);
1015 /*if (s > 1.0e-12)
1016 printf("[%d+%d, %d+%d, %d+%d] findPF nb\n"
1017 "\tn = %f, %f, %f\n"
1018 "\tgrad_pi= %.2e, %.2e, %.2e\n"
1019 "\ts = %f\n"
1020 "\tgrad_p = %.2e, %.2e, %.2e\n",
1021 i_x, d_ix,
1022 i_y, d_iy,
1023 i_z, d_iz,
1024 n.x, n.y, n.z,
1025 grad_p_iter.x, grad_p_iter.y, grad_p_iter.z,
1026 s,
1027 s*grad_p_iter.x,
1028 s*grad_p_iter.y,
1029 s*grad_p_iter.z); // */
1030 }
1031 }
1032 }
1033
1034 // find particle volume (radius in x.w)
1035 const Float v = sphereVolume(x.w);
1036
1037 // find pressure gradient force plus buoyancy force.
1038 // buoyancy force = weight of displaced fluid
1039 Float3 f_p = -1.0*grad_p*v//(1.0 - phi)
1040 - rho_f*v*MAKE_FLOAT3(
1041 devC_params.g[0],
1042 devC_params.g[1],
1043 devC_params.g[2]);
1044
1045 // Add Neumann BC at top wall
1046 //if (i_z >= wall0_iz - 1)
1047 if (bc_top == 0 && i_z >= wall0_iz)
1048 f_p.z = 0.0;
1049
1050 //if (length(f_p) > 1.0e-12)
1051 /*printf("%d,%d,%d findPressureForceLinear:\n"
1052 "\tphi = %f\n"
1053 "\tx = %f, %f, %f\n"
1054 "\tX = %f, %f, %f\n"
1055 "\tgrad_p = %.2e, %.2e, %.2e\n"
1056 //"\tp_x = %.2e, %.2e\n"
1057 //"\tp_y = %.2e, %.2e\n"
1058 //"\tp_z = %.2e, %.2e\n"
1059 "\tf_p = %.2e, %.2e, %.2e\n",
1060 i_x, i_y, i_z,
1061 phi,
1062 x3.x, x3.y, x3.z,
1063 X.x, X.y, X.z,
1064 grad_p.x, grad_p.y, grad_p.z,
1065 f_p.x, f_p.y, f_p.z); // */
1066
1067 #ifdef CHECK_FLUID_FINITE
1068 checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p);
1069 #endif
1070 // save force
1071 dev_force[i] += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
1072 dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
1073 }
1074 }
1075
1076 // Find particle-fluid interaction force as outlined by Zhou et al. 2010, and
1077 // originally by Gidaspow 1992. All terms other than the pressure force are
1078 // neglected. The buoyancy force is included.
1079 __global__ void findDarcyPressureForce(
1080 const Float4* __restrict__ dev_x, // in
1081 const Float* __restrict__ dev_darcy_p, // in
1082 //const Float* __restrict__ dev_darcy_phi, // in
1083 const unsigned int wall0_iz, // in
1084 const Float rho_f, // in
1085 Float4* __restrict__ dev_force, // out
1086 Float4* __restrict__ dev_darcy_f_p) // out
1087 {
1088 unsigned int i = threadIdx.x + blockIdx.x*blockDim.x; // Particle index
1089
1090 if (i < devC_np) {
1091
1092 // read particle information
1093 const Float4 x = dev_x[i];
1094
1095 // determine fluid cell containing the particle
1096 const unsigned int i_x =
1097 floor((x.x - devC_grid.origo[0])/(devC_grid.L[0]/devC_grid.num[0]));
1098 const unsigned int i_y =
1099 floor((x.y - devC_grid.origo[1])/(devC_grid.L[1]/devC_grid.num[1]));
1100 const unsigned int i_z =
1101 floor((x.z - devC_grid.origo[2])/(devC_grid.L[2]/devC_grid.num[2]));
1102 const unsigned int cellidx = d_idx(i_x, i_y, i_z);
1103
1104 // determine cell dimensions
1105 const Float dx = devC_grid.L[0]/devC_grid.num[0];
1106 const Float dy = devC_grid.L[1]/devC_grid.num[1];
1107 const Float dz = devC_grid.L[2]/devC_grid.num[2];
1108
1109 // read fluid information
1110 //const Float phi = dev_darcy_phi[cellidx];
1111 const Float p_xn = dev_darcy_p[d_idx(i_x-1,i_y,i_z)];
1112 const Float p = dev_darcy_p[cellidx];
1113 const Float p_xp = dev_darcy_p[d_idx(i_x+1,i_y,i_z)];
1114 const Float p_yn = dev_darcy_p[d_idx(i_x,i_y-1,i_z)];
1115 const Float p_yp = dev_darcy_p[d_idx(i_x,i_y+1,i_z)];
1116 const Float p_zn = dev_darcy_p[d_idx(i_x,i_y,i_z-1)];
1117 Float p_zp = dev_darcy_p[d_idx(i_x,i_y,i_z+1)];
1118
1119 // Add Neumann BC at top wall
1120 if (i_z >= wall0_iz - 1)
1121 p_zp = p;
1122
1123 // find particle volume (radius in x.w)
1124 const Float V = sphereVolume(x.w);
1125
1126 // determine pressure gradient from first order central difference
1127 const Float3 grad_p = MAKE_FLOAT3(
1128 (p_xp - p_xn)/(dx + dx),
1129 (p_yp - p_yn)/(dy + dy),
1130 (p_zp - p_zn)/(dz + dz));
1131
1132 // find pressure gradient force plus buoyancy force.
1133 // buoyancy force = weight of displaced fluid
1134 Float3 f_p = -1.0*grad_p*V
1135 - rho_f*V*MAKE_FLOAT3(
1136 devC_params.g[0],
1137 devC_params.g[1],
1138 devC_params.g[2]);
1139
1140 // Add Neumann BC at top wall
1141 if (i_z >= wall0_iz)
1142 f_p.z = 0.0;
1143
1144 /*printf("%d,%d,%d findPF:\n"
1145 "\tphi = %f\n"
1146 "\tp = %f\n"
1147 "\tgrad_p = % f, % f, % f\n"
1148 "\tf_p = % f, % f, % f\n",
1149 i_x, i_y, i_z,
1150 phi, p,
1151 grad_p.x, grad_p.y, grad_p.z,
1152 f_p.x, f_p.y, f_p.z);*/
1153
1154 #ifdef CHECK_FLUID_FINITE
1155 checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p);
1156 #endif
1157 // save force
1158 dev_force[i] += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
1159 dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
1160 }
1161 }
1162
1163 // Set the pressure at the top boundary to new_pressure
1164 __global__ void setDarcyTopPressure(
1165 const Float new_pressure,
1166 Float* __restrict__ dev_darcy_p,
1167 const unsigned int wall0_iz)
1168 {
1169 // 3D thread index
1170 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1171 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1172 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1173
1174 // check that the thread is located at the top boundary or at the top wall
1175 if (x < devC_grid.num[0] &&
1176 y < devC_grid.num[1] &&
1177 z == devC_grid.num[2]-1 || z == wall0_iz) {
1178
1179 const unsigned int cellidx = idx(x,y,z);
1180
1181 // Write the new pressure the top boundary cells
1182 dev_darcy_p[cellidx] = new_pressure;
1183 }
1184 }
1185
1186 // Set the pressure at the top wall to new_pressure
1187 __global__ void setDarcyTopWallPressure(
1188 const Float new_pressure,
1189 const unsigned int wall0_iz,
1190 Float* __restrict__ dev_darcy_p)
1191 {
1192 // 3D thread index
1193 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1194 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1195 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1196
1197 // check that the thread is located at the top boundary
1198 if (x < devC_grid.num[0] &&
1199 y < devC_grid.num[1] &&
1200 z == wall0_iz) {
1201
1202 const unsigned int cellidx = idx(x,y,z);
1203
1204 // Write the new pressure the top boundary cells
1205 dev_darcy_p[cellidx] = new_pressure;
1206 }
1207 }
1208
1209 // Enforce fixed-flow BC at top wall
1210 __global__ void setDarcyTopWallFixedFlow(
1211 const unsigned int wall0_iz,
1212 Float* __restrict__ dev_darcy_p)
1213 {
1214 // 3D thread index
1215 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1216 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1217 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1218
1219 // check that the thread is located at the top boundary
1220 if (x < devC_grid.num[0] &&
1221 y < devC_grid.num[1] &&
1222 z == wall0_iz+1) {
1223
1224 // Write the new pressure the top boundary cells
1225 const Float new_pressure = dev_darcy_p[idx(x,y,z-2)];
1226 dev_darcy_p[idx(x,y,z)] = new_pressure;
1227 }
1228 }
1229
1230
1231 // Find the cell permeabilities from the Kozeny-Carman equation
1232 __global__ void findDarcyPermeabilities(
1233 const Float k_c, // in
1234 const Float* __restrict__ dev_darcy_phi, // in
1235 Float* __restrict__ dev_darcy_k) // out
1236 {
1237 // 3D thread index
1238 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1239 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1240 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1241
1242 // Grid dimensions
1243 const unsigned int nx = devC_grid.num[0];
1244 const unsigned int ny = devC_grid.num[1];
1245 const unsigned int nz = devC_grid.num[2];
1246
1247 if (x < nx && y < ny && z < nz) {
1248
1249 // 1D thread index
1250 const unsigned int cellidx = d_idx(x,y,z);
1251
1252 Float phi = dev_darcy_phi[cellidx];
1253
1254 // avoid division by zero
1255 if (phi > 0.9999)
1256 phi = 0.9999;
1257
1258 Float k = k_c*pow(phi,3)/pow(1.0 - phi, 2);
1259
1260 /*printf("%d,%d,%d findK:\n"
1261 "\tphi = %f\n"
1262 "\tk = %e\n",
1263 x, y, z,
1264 phi, k);*/
1265
1266 // limit permeability [m*m]
1267 // K_gravel = 3.0e-2 m/s => k_gravel = 2.7e-9 m*m
1268 //k = fmin(2.7e-9, k);
1269 k = fmin(2.7e-10, k);
1270
1271 dev_darcy_k[cellidx] = k;
1272
1273 #ifdef CHECK_FLUID_FINITE
1274 checkFiniteFloat("k", x, y, z, k);
1275 #endif
1276 }
1277 }
1278
1279 // Find the spatial gradients of the permeability.
1280 __global__ void findDarcyPermeabilityGradients(
1281 const Float* __restrict__ dev_darcy_k, // in
1282 Float3* __restrict__ dev_darcy_grad_k) // out
1283 {
1284 // 3D thread index
1285 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1286 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1287 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1288
1289 // Grid dimensions
1290 const unsigned int nx = devC_grid.num[0];
1291 const unsigned int ny = devC_grid.num[1];
1292 const unsigned int nz = devC_grid.num[2];
1293
1294 // Cell size
1295 const Float dx = devC_grid.L[0]/nx;
1296 const Float dy = devC_grid.L[1]/ny;
1297 const Float dz = devC_grid.L[2]/nz;
1298
1299 if (x < nx && y < ny && z < nz) {
1300
1301 // 1D thread index
1302 const unsigned int cellidx = d_idx(x,y,z);
1303
1304 // read values
1305 const Float k_xn = dev_darcy_k[d_idx(x-1,y,z)];
1306 const Float k_xp = dev_darcy_k[d_idx(x+1,y,z)];
1307 const Float k_yn = dev_darcy_k[d_idx(x,y-1,z)];
1308 const Float k_yp = dev_darcy_k[d_idx(x,y+1,z)];
1309 const Float k_zn = dev_darcy_k[d_idx(x,y,z-1)];
1310 const Float k_zp = dev_darcy_k[d_idx(x,y,z+1)];
1311
1312 // gradient approximated by first-order central difference
1313 const Float3 grad_k = MAKE_FLOAT3(
1314 (k_xp - k_xn)/(dx+dx),
1315 (k_yp - k_yn)/(dy+dy),
1316 (k_zp - k_zn)/(dz+dz));
1317
1318 // write result
1319 dev_darcy_grad_k[cellidx] = grad_k;
1320
1321 /*printf("%d,%d,%d findK:\n"
1322 "\tk_x = %e, %e\n"
1323 "\tk_y = %e, %e\n"
1324 "\tk_z = %e, %e\n"
1325 "\tgrad(k) = %e, %e, %e\n",
1326 x, y, z,
1327 k_xn, k_xp,
1328 k_yn, k_yp,
1329 k_zn, k_zp,
1330 grad_k.x, grad_k.y, grad_k.z);*/
1331
1332 #ifdef CHECK_FLUID_FINITE
1333 checkFiniteFloat3("grad_k", x, y, z, grad_k);
1334 #endif
1335 }
1336 }
1337
1338 // Find the temporal gradient in pressure using the backwards Euler method
1339 __global__ void findDarcyPressureChange(
1340 const Float* __restrict__ dev_darcy_p_old, // in
1341 const Float* __restrict__ dev_darcy_p, // in
1342 const unsigned int iter, // in
1343 const unsigned int ndem, // in
1344 Float* __restrict__ dev_darcy_dpdt) // out
1345 {
1346 // 3D thread index
1347 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1348 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1349 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1350
1351 if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
1352
1353 // 1D thread index
1354 const unsigned int cellidx = d_idx(x,y,z);
1355
1356 // read values
1357 const Float p_old = dev_darcy_p_old[cellidx];
1358 const Float p = dev_darcy_p[cellidx];
1359
1360 Float dpdt = (p - p_old)/(ndem*devC_dt);
1361
1362 // Ignore the large initial pressure gradients caused by solver "warm
1363 // up" towards hydrostatic pressure distribution
1364 if (iter < 2)
1365 dpdt = 0.0;
1366
1367 // write result
1368 dev_darcy_dpdt[cellidx] = dpdt;
1369
1370 /*printf("%d,%d,%d\n"
1371 "\tp_old = %e\n"
1372 "\tp = %e\n"
1373 "\tdt = %e\n"
1374 "\tdpdt = %e\n",
1375 x,y,z,
1376 p_old, p,
1377 devC_dt, dpdt);*/
1378
1379 #ifdef CHECK_FLUID_FINITE
1380 checkFiniteFloat("dpdt", x, y, z, dpdt);
1381 #endif
1382 }
1383 }
1384
1385 __global__ void firstDarcySolution(
1386 const Float* __restrict__ dev_darcy_p, // in
1387 const Float* __restrict__ dev_darcy_k, // in
1388 const Float* __restrict__ dev_darcy_phi, // in
1389 const Float* __restrict__ dev_darcy_dphi, // in
1390 const Float* __restrict__ dev_darcy_div_v_p, // in
1391 const Float3* __restrict__ dev_darcy_vp_avg, // in
1392 const Float3* __restrict__ dev_darcy_grad_k, // in
1393 const Float beta_f, // in
1394 const Float mu, // in
1395 const int bc_xn, // in
1396 const int bc_xp, // in
1397 const int bc_yn, // in
1398 const int bc_yp, // in
1399 const int bc_bot, // in
1400 const int bc_top, // in
1401 const unsigned int ndem, // in
1402 const unsigned int wall0_iz, // in
1403 const int* __restrict__ dev_darcy_p_constant, // in
1404 Float* __restrict__ dev_darcy_dp_expl) // out
1405 {
1406 // 3D thread index
1407 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1408 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1409 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1410
1411 // Grid dimensions
1412 const unsigned int nx = devC_grid.num[0];
1413 const unsigned int ny = devC_grid.num[1];
1414 const unsigned int nz = devC_grid.num[2];
1415
1416 // Cell size
1417 const Float dx = devC_grid.L[0]/nx;
1418 const Float dy = devC_grid.L[1]/ny;
1419 const Float dz = devC_grid.L[2]/nz;
1420
1421 if (x < nx && y < ny && z < nz) {
1422
1423 // 1D thread index
1424 const unsigned int cellidx = d_idx(x,y,z);
1425
1426 // read values
1427 const Float phi_xn = dev_darcy_phi[d_idx(x-1,y,z)];
1428 const Float phi = dev_darcy_phi[cellidx];
1429 const Float phi_xp = dev_darcy_phi[d_idx(x+1,y,z)];
1430 const Float phi_yn = dev_darcy_phi[d_idx(x,y-1,z)];
1431 const Float phi_yp = dev_darcy_phi[d_idx(x,y+1,z)];
1432 const Float phi_zn = dev_darcy_phi[d_idx(x,y,z-1)];
1433 const Float phi_zp = dev_darcy_phi[d_idx(x,y,z+1)];
1434 const Float dphi = dev_darcy_dphi[cellidx];
1435 const Float3 vp_avg = dev_darcy_vp_avg[cellidx];
1436 const int p_constant = dev_darcy_p_constant[cellidx];
1437
1438 Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
1439 const Float p = dev_darcy_p[cellidx];
1440 Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
1441 Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
1442 Float p_yp = dev_darcy_p[d_idx(x,y+1,z)];
1443 Float p_zn = dev_darcy_p[d_idx(x,y,z-1)];
1444 Float p_zp = dev_darcy_p[d_idx(x,y,z+1)];
1445
1446 const Float k_xn = dev_darcy_k[d_idx(x-1,y,z)];
1447 const Float k = dev_darcy_k[cellidx];
1448 const Float k_xp = dev_darcy_k[d_idx(x+1,y,z)];
1449 const Float k_yn = dev_darcy_k[d_idx(x,y-1,z)];
1450 const Float k_yp = dev_darcy_k[d_idx(x,y+1,z)];
1451 const Float k_zn = dev_darcy_k[d_idx(x,y,z-1)];
1452 const Float k_zp = dev_darcy_k[d_idx(x,y,z+1)];
1453
1454 // Neumann BCs
1455 if (x == 0 && bc_xn == 1)
1456 p_xn = p;
1457 if (x == nx-1 && bc_xp == 1)
1458 p_xp = p;
1459 if (y == 0 && bc_yn == 1)
1460 p_yn = p;
1461 if (y == ny-1 && bc_yp == 1)
1462 p_yp = p;
1463 if (z == 0 && bc_bot == 1)
1464 p_zn = p;
1465 if (z == nz-1 && bc_top == 1)
1466 p_zp = p;
1467
1468 const Float3 grad_phi_central = MAKE_FLOAT3(
1469 (phi_xp - phi_xn)/(dx + dx),
1470 (phi_yp - phi_yn)/(dy + dy),
1471 (phi_zp - phi_zn)/(dz + dz));
1472
1473 // Solve div(k*grad(p)) as a single term, using harmonic mean for
1474 // permeability. k_HM*grad(p) is found between the pressure nodes.
1475 const Float div_k_grad_p =
1476 (2.*k_xp*k/(k_xp + k) *
1477 (p_xp - p)/dx
1478 -
1479 2.*k_xn*k/(k_xn + k) *
1480 (p - p_xn)/dx)/dx
1481 +
1482 (2.*k_yp*k/(k_yp + k) *
1483 (p_yp - p)/dy
1484 -
1485 2.*k_yn*k/(k_yn + k) *
1486 (p - p_yn)/dy)/dy
1487 +
1488 (2.*k_zp*k/(k_zp + k) *
1489 (p_zp - p)/dz
1490 -
1491 2.*k_zn*k/(k_zn + k) *
1492 (p - p_zn)/dz)/dz;
1493
1494 Float dp_expl =
1495 ndem*devC_dt/(beta_f*phi*mu)*div_k_grad_p
1496 -(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
1497 *(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi_central));
1498
1499 // Dirichlet BC at fixed-pressure boundaries and at the dynamic top
1500 // wall. wall0_iz will be larger than the grid if the wall isn't
1501 // dynamic
1502 if ((bc_bot == 0 && z == 0) || (bc_top == 0 && z == nz-1)
1503 || (z >= wall0_iz && bc_top == 0)
1504 || (bc_xn == 0 && x == 0) || (bc_xp == 0 && x == nx-1)
1505 || (bc_yn == 0 && y == 0) || (bc_yp == 0 && y == nx-1)
1506 || p_constant == 1)
1507 dp_expl = 0.0;
1508
1509 #ifdef REPORT_FORCING_TERMS
1510 const Float dp_diff =
1511 ndem*devC_dt/(beta_f*phi*mu)
1512 *div_k_grad_p;
1513 const Float dp_forc =
1514 -(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
1515 *(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi));
1516
1517 printf("\n%d,%d,%d firstDarcySolution\n"
1518 "p = %e\n"
1519 "p_x = %e, %e\n"
1520 "p_y = %e, %e\n"
1521 "p_z = %e, %e\n"
1522 "dp_expl = %e\n"
1523 "div_k_grad_p= %e\n"
1524 "dp_diff = %e\n"
1525 "dp_forc = %e\n"
1526 "phi = %e\n"
1527 "dphi = %e\n"
1528 "dphi/dt = %e\n"
1529 "vp_avg = %e, %e, %e\n"
1530 "grad_phi = %e, %e, %e\n"
1531 ,
1532 x,y,z,
1533 p,
1534 p_xn, p_xp,
1535 p_yn, p_yp,
1536 p_zn, p_zp,
1537 dp_expl,
1538 div_k_grad_p,
1539 dp_diff,
1540 dp_forc,
1541 phi,
1542 dphi,
1543 dphi/(ndem*devC_dt),
1544 vp_avg.x, vp_avg.y, vp_avg.z,
1545 grad_phi_central.x, grad_phi_central.y, grad_phi_central.z
1546 );
1547 #endif
1548
1549 // save explicit integrated pressure change
1550 dev_darcy_dp_expl[cellidx] = dp_expl;
1551
1552 #ifdef CHECK_FLUID_FINITE
1553 checkFiniteFloat("dp_expl", x, y, z, dp_expl);
1554 #endif
1555 }
1556 }
1557 // A single jacobi iteration where the pressure values are updated to
1558 // dev_darcy_p_new.
1559 // bc = 0: Dirichlet, 1: Neumann
1560 __global__ void updateDarcySolution(
1561 const Float* __restrict__ dev_darcy_p_old, // in
1562 const Float* __restrict__ dev_darcy_dp_expl, // in
1563 const Float* __restrict__ dev_darcy_p, // in
1564 const Float* __restrict__ dev_darcy_k, // in
1565 const Float* __restrict__ dev_darcy_phi, // in
1566 const Float* __restrict__ dev_darcy_dphi, // in
1567 const Float* __restrict__ dev_darcy_div_v_p, // in
1568 const Float3* __restrict__ dev_darcy_vp_avg, // in
1569 const Float3* __restrict__ dev_darcy_grad_k, // in
1570 const Float beta_f, // in
1571 const Float mu, // in
1572 const int bc_xn, // in
1573 const int bc_xp, // in
1574 const int bc_yn, // in
1575 const int bc_yp, // in
1576 const int bc_bot, // in
1577 const int bc_top, // in
1578 const unsigned int ndem, // in
1579 const unsigned int wall0_iz, // in
1580 const int* __restrict__ dev_darcy_p_constant, // in
1581 Float* __restrict__ dev_darcy_p_new, // out
1582 Float* __restrict__ dev_darcy_norm) // out
1583 {
1584 // 3D thread index
1585 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1586 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1587 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1588
1589 // Grid dimensions
1590 const unsigned int nx = devC_grid.num[0];
1591 const unsigned int ny = devC_grid.num[1];
1592 const unsigned int nz = devC_grid.num[2];
1593
1594 // Cell size
1595 const Float dx = devC_grid.L[0]/nx;
1596 const Float dy = devC_grid.L[1]/ny;
1597 const Float dz = devC_grid.L[2]/nz;
1598
1599 if (x < nx && y < ny && z < nz) {
1600
1601 // 1D thread index
1602 const unsigned int cellidx = d_idx(x,y,z);
1603
1604 // read values
1605 const Float phi_xn = dev_darcy_phi[d_idx(x-1,y,z)];
1606 const Float phi = dev_darcy_phi[cellidx];
1607 const Float phi_xp = dev_darcy_phi[d_idx(x+1,y,z)];
1608 const Float phi_yn = dev_darcy_phi[d_idx(x,y-1,z)];
1609 const Float phi_yp = dev_darcy_phi[d_idx(x,y+1,z)];
1610 const Float phi_zn = dev_darcy_phi[d_idx(x,y,z-1)];
1611 const Float phi_zp = dev_darcy_phi[d_idx(x,y,z+1)];
1612 const Float dphi = dev_darcy_dphi[cellidx];
1613 const Float3 vp_avg = dev_darcy_vp_avg[cellidx];
1614 const int p_constant = dev_darcy_p_constant[cellidx];
1615
1616 const Float p_old = dev_darcy_p_old[cellidx];
1617 const Float dp_expl = dev_darcy_dp_expl[cellidx];
1618
1619 Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
1620 const Float p = dev_darcy_p[cellidx];
1621 Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
1622 Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
1623 Float p_yp = dev_darcy_p[d_idx(x,y+1,z)];
1624 Float p_zn = dev_darcy_p[d_idx(x,y,z-1)];
1625 Float p_zp = dev_darcy_p[d_idx(x,y,z+1)];
1626
1627 const Float k_xn = dev_darcy_k[d_idx(x-1,y,z)];
1628 const Float k = dev_darcy_k[cellidx];
1629 const Float k_xp = dev_darcy_k[d_idx(x+1,y,z)];
1630 const Float k_yn = dev_darcy_k[d_idx(x,y-1,z)];
1631 const Float k_yp = dev_darcy_k[d_idx(x,y+1,z)];
1632 const Float k_zn = dev_darcy_k[d_idx(x,y,z-1)];
1633 const Float k_zp = dev_darcy_k[d_idx(x,y,z+1)];
1634
1635 // Neumann BCs
1636 if (x == 0 && bc_xn == 1)
1637 p_xn = p;
1638 if (x == nx-1 && bc_xp == 1)
1639 p_xp = p;
1640 if (y == 0 && bc_yn == 1)
1641 p_yn = p;
1642 if (y == ny-1 && bc_yp == 1)
1643 p_yp = p;
1644 if (z == 0 && bc_bot == 1)
1645 p_zn = p;
1646 if (z == nz-1 && bc_top == 1)
1647 p_zp = p;
1648
1649 const Float3 grad_phi_central = MAKE_FLOAT3(
1650 (phi_xp - phi_xn)/(dx + dx),
1651 (phi_yp - phi_yn)/(dy + dy),
1652 (phi_zp - phi_zn)/(dz + dz));
1653
1654 // Solve div(k*grad(p)) as a single term, using harmonic mean for
1655 // permeability. k_HM*grad(p) is found between the pressure nodes.
1656 const Float div_k_grad_p =
1657 (2.*k_xp*k/(k_xp + k) *
1658 (p_xp - p)/dx
1659 -
1660 2.*k_xn*k/(k_xn + k) *
1661 (p - p_xn)/dx)/dx
1662 +
1663 (2.*k_yp*k/(k_yp + k) *
1664 (p_yp - p)/dy
1665 -
1666 2.*k_yn*k/(k_yn + k) *
1667 (p - p_yn)/dy)/dy
1668 +
1669 (2.*k_zp*k/(k_zp + k) *
1670 (p_zp - p)/dz
1671 -
1672 2.*k_zn*k/(k_zn + k) *
1673 (p - p_zn)/dz)/dz;
1674
1675 //Float p_new = p_old
1676 Float dp_impl =
1677 ndem*devC_dt/(beta_f*phi*mu)*div_k_grad_p
1678 -(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
1679 *(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi_central));
1680
1681 // Dirichlet BC at fixed-pressure boundaries and at the dynamic top
1682 // wall. wall0_iz will be larger than the grid if the wall isn't
1683 // dynamic
1684 if ((bc_bot == 0 && z == 0) || (bc_top == 0 && z == nz-1)
1685 || (z >= wall0_iz && bc_top == 0)
1686 || (bc_xn == 0 && x == 0) || (bc_xp == 0 && x == nx-1)
1687 || (bc_yn == 0 && y == 0) || (bc_yp == 0 && y == nx-1)
1688 || p_constant == 1)
1689 dp_impl = 0.0;
1690
1691 // choose integration method, parameter in [0.0; 1.0]
1692 // epsilon = 0.0: explicit
1693 // epsilon = 0.5: Crank-Nicolson
1694 // epsilon = 1.0: implicit
1695 const Float epsilon = 0.5;
1696 Float p_new = p_old + (1.0 - epsilon)*dp_expl + epsilon*dp_impl;
1697
1698 // add underrelaxation
1699 const Float theta = 0.05;
1700 p_new = p*(1.0 - theta) + p_new*theta;
1701
1702 // normalized residual, avoid division by zero
1703 //const Float res_norm = (p_new - p)*(p_new - p)/(p_new*p_new + 1.0e-16);
1704 const Float res_norm = (p_new - p)/(p + 1.0e-16);
1705
1706 #ifdef REPORT_FORCING_TERMS_JACOBIAN
1707 const Float dp_diff = (ndem*devC_dt)/(beta_f*phi*mu)
1708 *(k*laplace_p + dot(grad_k, grad_p));
1709 const Float dp_forc =
1710 -(ndem*devC_dt/(beta_f*phi*(1.0 - phi)))
1711 *(dphi/(ndem*devC_dt) + dot(vp_avg, grad_phi));
1712 printf("\n%d,%d,%d updateDarcySolution\n"
1713 "p_new = %e\n"
1714 "p = %e\n"
1715 "p_x = %e, %e\n"
1716 "p_y = %e, %e\n"
1717 "p_z = %e, %e\n"
1718 "dp_expl = %e\n"
1719 "p_old = %e\n"
1720 "div_k_grad_p= %e\n"
1721 "dp_diff = %e\n"
1722 "dp_forc = %e\n"
1723 "div_v_p = %e\n"
1724 "res_norm = %e\n"
1725 ,
1726 x,y,z,
1727 p_new, p,
1728 p_xn, p_xp,
1729 p_yn, p_yp,
1730 p_zn, p_zp,
1731 dp_expl,
1732 p_old,
1733 div_k_grad_p,
1734 dp_diff,
1735 dp_forc,
1736 dphi, dphi/(ndem*devC_dt),
1737 res_norm); //
1738 #endif
1739
1740 // save new pressure and the residual
1741 dev_darcy_p_new[cellidx] = p_new;
1742 dev_darcy_norm[cellidx] = res_norm;
1743
1744 /*printf("%d,%d,%d\tp = % f\tp_new = % f\tres_norm = % f\n",
1745 x,y,z,
1746 p,
1747 p_new,
1748 res_norm);*/
1749
1750 #ifdef CHECK_FLUID_FINITE
1751 checkFiniteFloat("p_new", x, y, z, p_new);
1752 checkFiniteFloat("res_norm", x, y, z, res_norm);
1753 #endif
1754 }
1755 }
1756
1757 __global__ void findNewPressure(
1758 const Float* __restrict__ dev_darcy_dp, // in
1759 Float* __restrict__ dev_darcy_p) // in+out
1760 {
1761 // 3D thread index
1762 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1763 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1764 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1765
1766 // Grid dimensions
1767 const unsigned int nx = devC_grid.num[0];
1768 const unsigned int ny = devC_grid.num[1];
1769 const unsigned int nz = devC_grid.num[2];
1770
1771 // Check that we are not outside the fluid grid
1772 if (x < nx && y < ny && z < nz) {
1773
1774 const unsigned int cellidx = d_idx(x,y,z);
1775
1776 const Float dp = dev_darcy_dp[cellidx];
1777
1778 // save new pressure
1779 dev_darcy_p[cellidx] += dp;
1780
1781 /*printf("%d,%d,%d\tp = % f\tp_new = % f\tres_norm = % f\n",
1782 x,y,z,
1783 p,
1784 p_new,
1785 res_norm);*/
1786
1787 #ifdef CHECK_FLUID_FINITE
1788 checkFiniteFloat("dp", x, y, z, dp);
1789 #endif
1790 }
1791 }
1792
1793 // Find cell velocities
1794 __global__ void findDarcyVelocities(
1795 const Float* __restrict__ dev_darcy_p, // in
1796 const Float* __restrict__ dev_darcy_phi, // in
1797 const Float* __restrict__ dev_darcy_k, // in
1798 const Float mu, // in
1799 Float3* __restrict__ dev_darcy_v) // out
1800 {
1801 // 3D thread index
1802 const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
1803 const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
1804 const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
1805
1806 // Grid dimensions
1807 const unsigned int nx = devC_grid.num[0];
1808 const unsigned int ny = devC_grid.num[1];
1809 const unsigned int nz = devC_grid.num[2];
1810
1811 // Cell size
1812 const Float dx = devC_grid.L[0]/nx;
1813 const Float dy = devC_grid.L[1]/ny;
1814 const Float dz = devC_grid.L[2]/nz;
1815
1816 // Check that we are not outside the fluid grid
1817 if (x < nx && y < ny && z < nz) {
1818
1819 const unsigned int cellidx = d_idx(x,y,z);
1820
1821 const Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
1822 const Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
1823 const Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
1824 const Float p_yp = dev_darcy_p[d_idx(x,y+1,z)];
1825 const Float p_zn = dev_darcy_p[d_idx(x,y,z-1)];
1826 const Float p_zp = dev_darcy_p[d_idx(x,y,z+1)];
1827
1828 const Float k = dev_darcy_k[cellidx];
1829 const Float phi = dev_darcy_phi[cellidx];
1830
1831 // approximate pressure gradient with first order central differences
1832 const Float3 grad_p = MAKE_FLOAT3(
1833 (p_xp - p_xn)/(dx + dx),
1834 (p_yp - p_yn)/(dy + dy),
1835 (p_zp - p_zn)/(dz + dz));
1836
1837 // Flux [m/s]: q = -k/nu * dH
1838 // Pore velocity [m/s]: v = q/n
1839
1840 // calculate flux
1841 //const Float3 q = -k/mu*grad_p;
1842
1843 // calculate velocity
1844 //const Float3 v = q/phi;
1845 const Float3 v = (-k/mu * grad_p)/phi;
1846
1847 // Save velocity
1848 dev_darcy_v[cellidx] = v;
1849 }
1850 }
1851
1852 // Print final heads and free memory
1853 void DEM::endDarcyDev()
1854 {
1855 freeDarcyMemDev();
1856 }
1857
1858 // vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4