URI:
       tsphere.h - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
  HTML git clone git://src.adamsgaard.dk/sphere
   DIR Log
   DIR Files
   DIR Refs
   DIR LICENSE
       ---
       tsphere.h (16560B)
       ---
            1 // Make sure the header is only included once
            2 #ifndef SPHERE_H_
            3 #define SPHERE_H_
            4 
            5 #include <string>
            6 #include <vector>
            7 
            8 //#include "eigen-nvcc/Eigen/Core"
            9 
           10 #include "datatypes.h"
           11 
           12 // DEM class
           13 class DEM {
           14 
           15     // Values and functions only accessible from the class internally
           16     private:
           17 
           18         // Input filename (full path)
           19         std::string inputbin;
           20 
           21         // Simulation ID
           22         std::string sid;
           23 
           24         // Output level
           25         int verbose;
           26 
           27         // Number of dimensions
           28         unsigned int nd;
           29 
           30         // Number of particles
           31         unsigned int np;
           32 
           33         // HOST STRUCTURES
           34         // Structure containing individual particle kinematics
           35         Kinematics k;
           36 
           37         // Structure containing energy values
           38         Energies e;
           39 
           40         // Structure of global parameters
           41         Params params;
           42 
           43         // Structure containing spatial parameters
           44         Grid grid;
           45 
           46         // Structure of temporal parameters
           47         Time time;
           48 
           49         // Structure of wall parameters
           50         Walls walls;
           51 
           52         // Image structure (red, green, blue, alpa)
           53         rgba* img;
           54         unsigned int width;
           55         unsigned int height;
           56 
           57         // Device management
           58         int ndevices;     // number of CUDA GPUs
           59         int device;       // primary GPU
           60         int* domain_size; // elements per GPU
           61 
           62 
           63         // DEVICE ARRAYS
           64 
           65         // Particle kinematics arrays
           66         Float4        *dev_x;
           67         Float4        *dev_xyzsum;
           68         Float4        *dev_vel;
           69         Float4        *dev_vel0;
           70         Float4        *dev_acc;
           71         Float4        *dev_force;
           72         Float4        *dev_angpos;
           73         Float4        *dev_angvel;
           74         Float4        *dev_angvel0;
           75         Float4        *dev_angacc;
           76         Float4        *dev_torque;
           77         unsigned int  *dev_contacts;
           78         Float4        *dev_distmod;
           79         Float4        *dev_delta_t;
           80         Float         *dev_es_dot;
           81         Float         *dev_es;
           82         Float         *dev_ev_dot;
           83         Float         *dev_ev;
           84         Float         *dev_p;
           85 
           86         // Sorted kinematics arrays
           87         Float4        *dev_x_sorted;
           88         Float4        *dev_vel_sorted;
           89         Float4        *dev_angvel_sorted;
           90 
           91         // Sorting grid arrays
           92         unsigned int  *dev_gridParticleCellID;
           93         unsigned int  *dev_gridParticleIndex;
           94         unsigned int  *dev_cellStart;
           95         unsigned int  *dev_cellEnd;
           96 
           97         // Wall arrays
           98         int           *dev_walls_wmode;
           99         Float4        *dev_walls_nx;        // normal, pos.
          100         Float4        *dev_walls_mvfd;      // mass, velo., force, dev. stress
          101         Float         *dev_walls_tau_x;     // wall shear stress
          102         Float         *dev_walls_force_partial; // Pre-sum per wall
          103         Float         *dev_walls_force_pp;  // Force per particle per wall
          104         Float         *dev_walls_acc;       // Wall acceleration
          105         Float         *dev_walls_tau_eff_x_pp;      // Shear force per particle
          106         Float         *dev_walls_tau_eff_x_partial; // Pre-sum of shear force
          107 
          108         // Bond arrays
          109         uint2         *dev_bonds;           // Particle bond pairs
          110         Float4        *dev_bonds_delta;     // Particle bond displacement
          111         Float4        *dev_bonds_omega;     // Particle bond rotation
          112 
          113         // Raytracer arrays
          114         unsigned char *dev_img;
          115         float4        *dev_ray_origo;       // Ray data always single precision
          116         float4        *dev_ray_direction;
          117 
          118 
          119         // GPU initialization, must be called before startTime()
          120         void initializeGPU();
          121 
          122         // Copy all constant data to constant device memory
          123         void transferToConstantDeviceMemory();
          124         void rt_transferToConstantDeviceMemory();
          125 
          126         // Check for CUDA errors
          127         void checkForCudaErrors(const char* checkpoint_description,
          128                 const int run_diagnostics = 1);
          129         void checkForCudaErrorsIter(const char* checkpoint_description,
          130                 const unsigned int iteration,
          131                 const int run_diagnostics = 1);
          132 
          133         // Check values stored in constant device memory
          134         void checkConstantMemory();
          135 
          136         // Initialize camera values and transfer to constant device memory
          137         void cameraInit(const float3 eye,
          138                 const float3 lookat, 
          139                 const float imgw,
          140                 const float focalLength);
          141 
          142         // Adjust grid size according to wall placement
          143         void updateGridSize();
          144 
          145         // Allocate global device memory to hold data
          146         void allocateGlobalDeviceMemory();
          147         void rt_allocateGlobalDeviceMemory();
          148 
          149         // Allocate global memory on helper devices
          150         void allocateHelperDeviceMemory();
          151         void freeHelperDeviceMemory();
          152 
          153         // Free dynamically allocated global device memory
          154         void freeGlobalDeviceMemory();
          155         void rt_freeGlobalDeviceMemory();
          156 
          157         // Copy non-constant data to global GPU memory
          158         void transferToGlobalDeviceMemory(int status = 1);
          159 
          160         // Copy non-constant data from global GPU memory to host RAM
          161         void transferFromGlobalDeviceMemory();
          162         void rt_transferFromGlobalDeviceMemory();
          163 
          164         // Find and return the max. radius
          165         Float r_max();
          166 
          167         // Write porosities found in porosity() to text file
          168         void writePorosities(
          169                 const char *target,
          170                 const int z_slices,
          171                 const Float *z_pos,
          172                 const Float *porosity);
          173 
          174         // Lattice-Boltzmann data arrays (D3Q19)
          175         Float  *f;          // Fluid distribution (f0..f18)
          176         Float  *f_new;      // t+deltaT fluid distribution (f0..f18)
          177         Float  *dev_f;      // Device equivalent
          178         Float  *dev_f_new;  // Device equivalent
          179         Float4 *v_rho;      // Fluid velocity v (xyz), and pressure rho (w) 
          180         Float4 *dev_v_rho;  // Device equivalent
          181 
          182         //// Porous flow 
          183         int fluid;      // 0: no, 1: yes
          184         int cfd_solver; // 0: Navier Stokes, 1: Darcy
          185 
          186         // Navier Stokes values, host
          187         NavierStokes ns;
          188 
          189         // Navier Stokes values, device
          190         Float*  dev_ns_p;            // Cell hydraulic pressure
          191         Float3* dev_ns_v;            // Cell fluid velocity
          192         Float*  dev_ns_v_x;          // Cell fluid velocity in staggered grid
          193         Float*  dev_ns_v_y;          // Cell fluid velocity in staggered grid
          194         Float*  dev_ns_v_z;          // Cell fluid velocity in staggered grid
          195         Float3* dev_ns_v_p;          // Averaged predicted cell fluid velocity
          196         Float*  dev_ns_v_p_x;        // Predicted cell fluid velocity in st. gr.
          197         Float*  dev_ns_v_p_y;        // Predicted cell fluid velocity in st. gr.
          198         Float*  dev_ns_v_p_z;        // Predicted cell fluid velocity in st. gr.
          199         Float3* dev_ns_vp_avg;       // Average particle velocity in cell
          200         Float*  dev_ns_d_avg;        // Average particle diameter in cell
          201         Float3* dev_ns_F_pf;         // Interaction force on fluid
          202         //Float*  dev_ns_F_pf_x;       // Interaction force on fluid
          203         //Float*  dev_ns_F_pf_y;       // Interaction force on fluid
          204         //Float*  dev_ns_F_pf_z;       // Interaction force on fluid
          205         Float*  dev_ns_phi;          // Cell porosity
          206         Float*  dev_ns_dphi;         // Cell porosity change
          207         //Float3* dev_ns_div_phi_v_v;  // Divegence used in velocity prediction
          208         Float*  dev_ns_epsilon;      // Pressure difference
          209         Float*  dev_ns_epsilon_new;  // Pressure diff. after Jacobi iteration
          210         Float*  dev_ns_epsilon_old;  // Pressure diff. before Jacobi iteration
          211         Float*  dev_ns_norm;         // Normalized residual of epsilon values
          212         Float*  dev_ns_f;            // Values of forcing function
          213         Float*  dev_ns_f1;           // Constant terms in forcing function
          214         Float3* dev_ns_f2;           // Constant slopes in forcing function
          215         Float*  dev_ns_v_prod;       // Outer product of fluid velocities
          216         //Float*  dev_ns_tau;          // Fluid stress tensor
          217         Float3* dev_ns_div_phi_vi_v; // div(phi*vi*v)
          218         //Float3* dev_ns_div_phi_tau;  // div(phi*tau)
          219         //Float3* dev_ns_div_tau;      // div(tau)
          220         Float*  dev_ns_div_tau_x;    // div(tau) on x-face
          221         Float*  dev_ns_div_tau_y;    // div(tau) on y-face
          222         Float*  dev_ns_div_tau_z;    // div(tau) on z-face
          223         Float3* dev_ns_f_pf;         // Interaction force on particles
          224         Float4* dev_ns_f_d;          // Drag force on particles
          225         Float4* dev_ns_f_p;          // Pressure force on particles
          226         Float4* dev_ns_f_v;          // Viscous force on particles
          227         Float4* dev_ns_f_sum;        // Total force on particles
          228 
          229         // Helper device arrays, input
          230         unsigned int** hdev_gridParticleIndex;
          231         unsigned int** hdev_gridCellStart;
          232         unsigned int** hdev_gridCellEnd;
          233         Float4** hdev_x;
          234         Float4** hdev_x_sorted;
          235         Float4** hdev_vel;
          236         Float4** hdev_vel_sorted;
          237         Float4** hdev_angvel;
          238         Float4** hdev_angvel_sorted;
          239         Float4** hdev_walls_nx;
          240         Float4** hdev_walls_mvfd;
          241         Float4** hdev_distmod;
          242 
          243         // Helper device arrays, output
          244         Float4** hdev_force;
          245         Float4** hdev_torque;
          246         Float4** hdev_delta_t;
          247         Float** hdev_es_dot;
          248         Float** hdev_es;
          249         Float** hdev_ev_dot;
          250         Float** hdev_ev;
          251         Float** hdev_p;
          252         Float** hdev_walls_force_pp;
          253         unsigned int** hdev_contacts;
          254 
          255 
          256         //// Navier Stokes functions
          257 
          258         // Memory allocation
          259         void initNSmem();
          260         void freeNSmem();
          261 
          262         // Returns the number of fluid cells
          263         unsigned int NScells();         // Pressure and other centered nodes
          264         unsigned int NScellsVelocity(); // Inter-cell nodes (velocity)
          265         
          266         // Returns the mean particle radius
          267         Float meanRadius();
          268 
          269         // Get linear (1D) index from 3D coordinate
          270         unsigned int idx(const int x, const int y, const int z); // pres. nodes
          271         unsigned int vidx(const int x, const int y, const int z); // vel. nodes
          272 
          273         // Initialize Navier Stokes values and arrays
          274         void initNS();
          275 
          276         // Clean up Navier Stokes arrays
          277         void endNS();
          278         void endNSdev();
          279 
          280         // Check for stability in the FTCS solution
          281         void checkNSstability();
          282 
          283         // Returns the average value of the normalized residual norm in host mem
          284         double avgNormResNS();
          285 
          286         // Returns the maximum value of the normalized residual norm in host mem
          287         double maxNormResNS();
          288 
          289         // Allocate and free memory for NS arrays on device
          290         void initNSmemDev();
          291         void freeNSmemDev();
          292 
          293         // Transfer array values between GPU and CPU
          294         void transferNStoGlobalDeviceMemory(int statusmsg);
          295         void transferNSfromGlobalDeviceMemory(int statusmsg);
          296         void transferNSnormFromGlobalDeviceMemory();
          297         void transferNSepsilonFromGlobalDeviceMemory();
          298         void transferNSepsilonNewFromGlobalDeviceMemory();
          299 
          300         // Darcy values, host
          301         Darcy darcy;
          302 
          303         // Darcy values, device
          304         Float*  dev_darcy_p_old;     // Previous cell hydraulic pressure
          305         Float*  dev_darcy_dp_expl;   // Explicit integrated pressure change
          306         Float*  dev_darcy_p;         // Cell hydraulic pressure
          307         Float*  dev_darcy_p_new;     // Updated cell hydraulic pressure
          308         Float3* dev_darcy_v;         // Cell fluid velocity
          309         Float*  dev_darcy_phi;       // Cell porosity
          310         Float*  dev_darcy_dphi;      // Cell porosity change
          311         Float*  dev_darcy_div_v_p;   // Cell particle velocity divergence
          312         //Float*  dev_darcy_v_p_x;     // Cell particle velocity
          313         //Float*  dev_darcy_v_p_y;     // Cell particle velocity
          314         //Float*  dev_darcy_v_p_z;     // Cell particle velocity
          315         Float*  dev_darcy_norm;      // Normalized residual of epsilon values
          316         Float4* dev_darcy_f_p;       // Pressure gradient force on particles
          317         Float*  dev_darcy_k;         // Cell hydraulic permeability
          318         Float3* dev_darcy_grad_k;    // Spatial gradient of permeability
          319         Float3* dev_darcy_grad_p;    // Spatial gradient of fluid pressure
          320         Float3* dev_darcy_vp_avg;    // Average particle velocity in cell
          321         int* dev_darcy_p_constant;   // Constant pressure (0: False, 1: True)
          322 
          323         // Darcy functions
          324         void initDarcyMem();
          325         Float largestDarcyPermeability();
          326         Float smallestDarcyPorosity();
          327         Float3 largestDarcyVelocities();
          328         void initDarcyMemDev();
          329         unsigned int darcyCells();
          330         unsigned int darcyCellsVelocity();
          331         void transferDarcyToGlobalDeviceMemory(int statusmsg);
          332         void transferDarcyFromGlobalDeviceMemory(int statusmsg);
          333         void transferDarcyNormFromGlobalDeviceMemory();
          334         void transferDarcyPressuresFromGlobalDeviceMemory();
          335         void freeDarcyMem();
          336         void freeDarcyMemDev();
          337         unsigned int d_idx(const int x, const int y, const int z);
          338         unsigned int d_vidx(const int x, const int y, const int z);
          339         void checkDarcyStability();
          340         void printDarcyArray(FILE* stream, Float* arr);
          341         void printDarcyArray(FILE* stream, Float* arr, std::string desc);
          342         void printDarcyArray(FILE* stream, Float3* arr);
          343         void printDarcyArray(FILE* stream, Float3* arr, std::string desc);
          344         double avgNormResDarcy();
          345         double maxNormResDarcy();
          346         void initDarcy();
          347         void writeDarcyArray(Float* arr, const char* filename);
          348         void writeDarcyArray(Float3* arr, const char* filename);
          349         void endDarcy();
          350         void endDarcyDev();
          351 
          352 
          353     public:
          354         // Values and functions accessible from the outside
          355 
          356         // Constructor, some parameters with default values
          357         DEM(std::string inputbin, 
          358                 const int verbosity = 1,
          359                 const int checkVals = 1,
          360                 const int dry = 0,
          361                 const int initCuda = 1,
          362                 const int transferConstMem = 1,
          363                 const int fluidFlow = 0,
          364                 const int exclusive = 0);
          365 
          366         // Destructor
          367         ~DEM(void);
          368 
          369         // Read binary input file
          370         void readbin(const char *target);
          371 
          372         // Write binary output file
          373         void writebin(const char *target);
          374 
          375         // Check numeric values of selected parameters
          376         void diagnostics();
          377         void checkValues();
          378 
          379         // Report key parameter values to stdout
          380         void reportValues();
          381 
          382         // Iterate through time, using temporal limits
          383         // described in "time" struct.
          384         void startTime();
          385 
          386         // Render particles using raytracing
          387         void render(
          388                 const int method = 1,
          389                 const float maxval = 1.0e3f,
          390                 const float lower_cutoff = 0.0f,
          391                 const float focalLength = 1.0f,
          392                 const unsigned int img_width = 800,
          393                 const unsigned int img_height = 800);
          394 
          395         // Write image data to PPM file
          396         void writePPM(const char *target);
          397 
          398         // Calculate porosity with depth and save as text file
          399         void porosity(const int z_slices = 10);
          400 
          401         // find and return the min. position of any particle in each dimension
          402         Float3 minPos();
          403 
          404         // find and return the max. position of any particle in each dimension
          405         Float3 maxPos();
          406 
          407         // Find particle-particle intersections, saves the indexes
          408         // and the overlap sizes
          409         void findOverlaps(
          410                 std::vector< std::vector<unsigned int> > &ij,
          411                 std::vector< Float > &delta_n_ij);
          412 
          413         // Calculate force chains and save as Gnuplot script
          414         void forcechains(
          415                 const std::string format = "interactive",
          416                 const int threedim = 1,
          417                 const double lower_cutoff = 0.0,
          418                 const double upper_cutoff = 1.0e9);
          419 
          420         // Print all particle-particle contacts to stdout
          421         void printContacts();
          422 
          423 
          424         ///// Porous flow functions
          425 
          426         // Print fluid arrays to file stream
          427         void printNSarray(FILE* stream, Float* arr);
          428         void printNSarray(FILE* stream, Float* arr, std::string desc);
          429         void printNSarray(FILE* stream, Float3* arr);
          430         void printNSarray(FILE* stream, Float3* arr, std::string desc);
          431 
          432         // Write fluid arrays to file
          433         void writeNSarray(Float* array, const char* filename);
          434         void writeNSarray(Float3* array, const char* filename);
          435 };
          436 
          437 #endif
          438 // vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4