mx1.adamsgaard.dk/1/src/pism/file/src/stressbalance/ssa/SSAFD.cc.gph

       tSSAFD.cc - pism - [fork] customized build of PISM, the parallel ice sheet model (tillflux branch)
  HTML git clone git://src.adamsgaard.dk/pism
   DIR Log
   DIR Files
   DIR Refs
   DIR LICENSE
       ---
       tSSAFD.cc (62955B)
       ---
            1 // Copyright (C) 2004--2019 Constantine Khroulev, Ed Bueler and Jed Brown
            2 //
            3 // This file is part of PISM.
            4 //
            5 // PISM is free software; you can redistribute it and/or modify it under the
            6 // terms of the GNU General Public License as published by the Free Software
            7 // Foundation; either version 3 of the License, or (at your option) any later
            8 // version.
            9 //
           10 // PISM is distributed in the hope that it will be useful, but WITHOUT ANY
           11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
           12 // FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
           13 // details.
           14 //
           15 // You should have received a copy of the GNU General Public License
           16 // along with PISM; if not, write to the Free Software
           17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
           18 
           19 #include <cassert>
           20 #include <stdexcept>
           21 
           22 #include "SSAFD.hh"
           23 #include "SSAFD_diagnostics.hh"
           24 #include "pism/util/Mask.hh"
           25 #include "pism/basalstrength/basal_resistance.hh"
           26 #include "pism/util/pism_options.hh"
           27 #include "pism/rheology/FlowLaw.hh"
           28 #include "pism/util/Vars.hh"
           29 #include "pism/util/IceGrid.hh"
           30 #include "pism/util/Time.hh"
           31 #include "pism/util/IceModelVec2CellType.hh"
           32 #include "pism/stressbalance/StressBalance.hh"
           33 #include "pism/geometry/Geometry.hh"
           34 #include "pism/util/pism_utilities.hh"
           35 
           36 namespace pism {
           37 namespace stressbalance {
           38 
           39 using namespace pism::mask;
           40 
           41 SSAFD::KSPFailure::KSPFailure(const char* reason)
           42   : RuntimeError(ErrorLocation(), std::string("SSAFD KSP (linear solver) failed: ") + reason){
           43   // empty
           44 }
           45 
           46 SSAFD::PicardFailure::PicardFailure(const std::string &message)
           47   : RuntimeError(ErrorLocation(), "SSAFD Picard iterations failed: " + message) {
           48   // empty
           49 }
           50 
           51 SSA* SSAFDFactory(IceGrid::ConstPtr g) {
           52   return new SSAFD(g);
           53 }
           54 
           55 /*!
           56 Because the FD implementation of the SSA uses Picard iteration, a PETSc KSP
           57 and Mat are used directly.  In particular we set up \f$A\f$
           58 (Mat m_A) and a \f$b\f$ (= Vec m_b) and iteratively solve
           59 linear systems
           60   \f[ A x = b \f]
           61 where \f$x\f$ (= Vec SSAX).  A PETSc SNES object is never created.
           62  */
           63 SSAFD::SSAFD(IceGrid::ConstPtr g)
           64   : SSA(g) {
           65   m_b.create(m_grid, "right_hand_side", WITHOUT_GHOSTS);
           66 
           67   m_velocity_old.create(m_grid, "velocity_old", WITH_GHOSTS);
           68   m_velocity_old.set_attrs("internal",
           69                            "old SSA velocity field; used for re-trying with a different epsilon",
           70                            "m s-1", "m s-1", "", 0);
           71 
           72   auto units = pism::printf("Pa s%f", 1.0 / m_flow_law->exponent());
           73   m_hardness.create(m_grid, "hardness", WITHOUT_GHOSTS);
           74   m_hardness.set_attrs("diagnostic",
           75                        "vertically-averaged ice hardness",
           76                        units, units,
           77                        "", 0);
           78 
           79   m_nuH.create(m_grid, "nuH", WITH_GHOSTS);
           80   m_nuH.set_attrs("internal",
           81                   "ice thickness times effective viscosity",
           82                   "Pa s m", "Pa s m", "", 0);
           83 
           84   m_nuH_old.create(m_grid, "nuH_old", WITH_GHOSTS);
           85   m_nuH_old.set_attrs("internal",
           86                       "ice thickness times effective viscosity (before an update)",
           87                       "Pa s m", "Pa s m", "", 0);
           88 
           89   m_work.create(m_grid, "m_work", WITH_GHOSTS,
           90                 2, /* stencil width */
           91                 6  /* dof */);
           92   m_work.set_attrs("internal",
           93                    "temporary storage used to compute nuH",
           94                    "", "", "", 0);
           95 
           96   m_scaling = 1.0e9;  // comparable to typical beta for an ice stream;
           97 
           98   // The nuH viewer:
           99   m_view_nuh = false;
          100   m_nuh_viewer_size = 300;
          101 
          102   // PETSc objects and settings
          103   {
          104     PetscErrorCode ierr;
          105     ierr = DMSetMatType(*m_da, MATAIJ);
          106     PISM_CHK(ierr, "DMSetMatType");
          107 
          108     ierr = DMCreateMatrix(*m_da, m_A.rawptr());
          109     PISM_CHK(ierr, "DMCreateMatrix");
          110 
          111     ierr = KSPCreate(m_grid->com, m_KSP.rawptr());
          112     PISM_CHK(ierr, "KSPCreate");
          113 
          114     ierr = KSPSetOptionsPrefix(m_KSP, "ssafd_");
          115     PISM_CHK(ierr, "KSPSetOptionsPrefix");
          116 
          117     // Use non-zero initial guess (i.e. SSA velocities from the last
          118     // solve() call).
          119     ierr = KSPSetInitialGuessNonzero(m_KSP, PETSC_TRUE);
          120     PISM_CHK(ierr, "KSPSetInitialGuessNonzero");
          121 
          122     // Use the initial residual norm.
          123     ierr = KSPConvergedDefaultSetUIRNorm(m_KSP);
          124     PISM_CHK(ierr, "KSPConvergedDefaultSetUIRNorm");
          125   }
          126 }
          127 
          128 SSAFD::~SSAFD() {
          129   // empty
          130 }
          131 
          132 //! @note Uses `PetscErrorCode` *intentionally*.
          133 void SSAFD::pc_setup_bjacobi() {
          134   PetscErrorCode ierr;
          135   PC pc;
          136 
          137   ierr = KSPSetType(m_KSP, KSPGMRES);
          138   PISM_CHK(ierr, "KSPSetType");
          139 
          140   ierr = KSPSetOperators(m_KSP, m_A, m_A);
          141   PISM_CHK(ierr, "KSPSetOperators");
          142 
          143   // Get the PC from the KSP solver:
          144   ierr = KSPGetPC(m_KSP, &pc);
          145   PISM_CHK(ierr, "KSPGetPC");
          146 
          147   // Set the PC type:
          148   ierr = PCSetType(pc, PCBJACOBI);
          149   PISM_CHK(ierr, "PCSetType");
          150 
          151   // Process options:
          152   ierr = KSPSetFromOptions(m_KSP);
          153   PISM_CHK(ierr, "KSPSetFromOptions");
          154 }
          155 
          156 //! @note Uses `PetscErrorCode` *intentionally*.
          157 void SSAFD::pc_setup_asm() {
          158   PetscErrorCode ierr;
          159   PC pc, sub_pc;
          160 
          161   // Set parameters equivalent to
          162   // -ksp_type gmres -ksp_norm_type unpreconditioned -ksp_pc_side right -pc_type asm -sub_pc_type lu
          163 
          164   ierr = KSPSetType(m_KSP, KSPGMRES);
          165   PISM_CHK(ierr, "KSPSetType");
          166 
          167   ierr = KSPSetOperators(m_KSP, m_A, m_A);
          168   PISM_CHK(ierr, "KSPSetOperators");
          169 
          170   // Switch to using the "unpreconditioned" norm.
          171   ierr = KSPSetNormType(m_KSP, KSP_NORM_UNPRECONDITIONED);
          172   PISM_CHK(ierr, "KSPSetNormType");
          173 
          174   // Switch to "right" preconditioning.
          175   ierr = KSPSetPCSide(m_KSP, PC_RIGHT);
          176   PISM_CHK(ierr, "KSPSetPCSide");
          177 
          178   // Get the PC from the KSP solver:
          179   ierr = KSPGetPC(m_KSP, &pc);
          180   PISM_CHK(ierr, "KSPGetPC");
          181 
          182   // Set the PC type:
          183   ierr = PCSetType(pc, PCASM);
          184   PISM_CHK(ierr, "PCSetType");
          185 
          186   // Set the sub-KSP object to "preonly"
          187   KSP *sub_ksp;
          188   ierr = PCSetUp(pc);
          189   PISM_CHK(ierr, "PCSetUp");
          190 
          191   ierr = PCASMGetSubKSP(pc, NULL, NULL, &sub_ksp);
          192   PISM_CHK(ierr, "PCASMGetSubKSP");
          193 
          194   ierr = KSPSetType(*sub_ksp, KSPPREONLY);
          195   PISM_CHK(ierr, "KSPSetType");
          196 
          197   // Set the PC of the sub-KSP to "LU".
          198   ierr = KSPGetPC(*sub_ksp, &sub_pc);
          199   PISM_CHK(ierr, "KSPGetPC");
          200 
          201   ierr = PCSetType(sub_pc, PCLU);
          202   PISM_CHK(ierr, "PCSetType");
          203 
          204   // Let the user override all this:
          205   // Process options:
          206   ierr = KSPSetFromOptions(m_KSP);
          207   PISM_CHK(ierr, "KSPSetFromOptions");
          208 }
          209 
          210 void SSAFD::init_impl() {
          211   SSA::init_impl();
          212 
          213   m_log->message(2,
          214              "  [using the KSP-based finite difference implementation]\n");
          215 
          216   // options
          217   options::Integer viewer_size("-ssa_nuh_viewer_size", "nuH viewer size",
          218                                m_nuh_viewer_size);
          219   m_nuh_viewer_size = viewer_size;
          220   m_view_nuh = options::Bool("-ssa_view_nuh", "Enable the SSAFD nuH runtime viewer");
          221 
          222   if (m_config->get_flag("stress_balance.calving_front_stress_bc")) {
          223     m_log->message(2,
          224                "  using PISM-PIK calving-front stress boundary condition ...\n");
          225   }
          226 
          227   m_default_pc_failure_count     = 0;
          228   m_default_pc_failure_max_count = 5;
          229 }
          230 
          231 //! \brief Computes the right-hand side ("rhs") of the linear problem for the
          232 //! Picard iteration and finite-difference implementation of the SSA equations.
          233 /*!
          234 The right side of the SSA equations is just the driving stress term
          235    \f[ - \rho g H \nabla h. \f]
          236 The basal stress is put on the left side of the system.  This method builds the
          237 discrete approximation of the right side.  For more about the discretization
          238 of the SSA equations, see comments for assemble_matrix().
          239 
          240 The values of the driving stress on the i,j grid come from a call to
          241 compute_driving_stress().
          242 
          243 In the case of Dirichlet boundary conditions, the entries on the right-hand side
          244 come from known velocity values.  The fields m_bc_values and m_bc_mask are used for
          245 this.
          246  */
          247 void SSAFD::assemble_rhs(const Inputs &inputs) {
          248   const IceModelVec2S
          249     &thickness             = inputs.geometry->ice_thickness,
          250     &bed                   = inputs.geometry->bed_elevation,
          251     &surface               = inputs.geometry->ice_surface_elevation,
          252     &sea_level             = inputs.geometry->sea_level_elevation,
          253     *melange_back_pressure = inputs.melange_back_pressure;
          254 
          255   const double
          256     dx                     = m_grid->dx(),
          257     dy                     = m_grid->dy(),
          258     standard_gravity       = m_config->get_number("constants.standard_gravity"),
          259     rho_ocean              = m_config->get_number("constants.sea_water.density"),
          260     rho_ice                = m_config->get_number("constants.ice.density");
          261 
          262   // This constant is for debugging: simulations should not depend on the choice of
          263   // velocity used in ice-free areas.
          264   const Vector2 ice_free_velocity(0.0, 0.0);
          265 
          266   const bool
          267     use_cfbc          = m_config->get_flag("stress_balance.calving_front_stress_bc"),
          268     is_dry_simulation = m_config->get_flag("ocean.always_grounded");
          269 
          270   // FIXME: bedrock_boundary is a misleading name
          271   bool bedrock_boundary = m_config->get_flag("stress_balance.ssa.dirichlet_bc");
          272 
          273   compute_driving_stress(inputs.geometry->ice_thickness,
          274                          inputs.geometry->ice_surface_elevation,
          275                          m_mask,
          276                          inputs.no_model_mask,
          277                          m_taud);
          278 
          279   IceModelVec::AccessList list{&m_taud, &m_b};
          280 
          281   if (inputs.bc_values and inputs.bc_mask) {
          282     list.add({inputs.bc_values, inputs.bc_mask});
          283   }
          284 
          285   if (use_cfbc) {
          286     list.add({&thickness, &bed, &surface, &m_mask, &sea_level});
          287   }
          288 
          289   if (use_cfbc and melange_back_pressure) {
          290     list.add(*melange_back_pressure);
          291   }
          292 
          293   m_b.set(0.0);
          294 
          295   for (Points p(*m_grid); p; p.next()) {
          296     const int i = p.i(), j = p.j();
          297 
          298     if (inputs.bc_values and inputs.bc_mask->as_int(i, j) == 1) {
          299       m_b(i, j).u = m_scaling * (*inputs.bc_values)(i, j).u;
          300       m_b(i, j).v = m_scaling * (*inputs.bc_values)(i, j).v;
          301       continue;
          302     }
          303 
          304     if (use_cfbc) {
          305       double H_ij = thickness(i,j);
          306 
          307       auto M = m_mask.int_star(i, j);
          308 
          309       // Note: this sets velocities at both ice-free ocean and ice-free
          310       // bedrock to zero. This means that we need to set boundary conditions
          311       // at both ice/ice-free-ocean and ice/ice-free-bedrock interfaces below
          312       // to be consistent.
          313       if (ice_free(M.ij)) {
          314         m_b(i, j) = m_scaling * ice_free_velocity;
          315         continue;
          316       }
          317 
          318       if (is_marginal(i, j, bedrock_boundary)) {
          319         // weights at the west, east, south, and north cell faces
          320         int W = 0, E = 0, S = 0, N = 0;
          321         // direct neighbors
          322         if (bedrock_boundary) {
          323           if (ice_free_ocean(M.e))
          324             E = 1;
          325           if (ice_free_ocean(M.w))
          326             W = 1;
          327           if (ice_free_ocean(M.n))
          328             N = 1;
          329           if (ice_free_ocean(M.s))
          330             S = 1;
          331         } else {
          332           if (ice_free(M.e))
          333             E = 1;
          334           if (ice_free(M.w))
          335             W = 1;
          336           if (ice_free(M.n))
          337             N = 1;
          338           if (ice_free(M.s))
          339             S = 1;
          340         }
          341 
          342         double delta_p = 0.0;
          343         if (not (grid_edge(*m_grid, i, j) and mask::grounded(M.ij))) {
          344           // In regional setups grounded ice may extend to the edge of the domain. This
          345           // condition ensures that at a domain edge the ice behaves as if it extends past
          346           // the edge without a change in geometry.
          347           //
          348           // We don't treat floating ice the same way because doing so would affect an
          349           // existing "flowline" setup.
          350           delta_p = margin_pressure_difference(ocean(M.ij), is_dry_simulation,
          351                                                H_ij, bed(i, j), sea_level(i, j),
          352                                                rho_ice, rho_ocean, standard_gravity);
          353         }
          354 
          355         if (melange_back_pressure) {
          356           double lambda = (*melange_back_pressure)(i, j);
          357 
          358           // adjust the "pressure difference term" using the provided
          359           // "melange back pressure fraction".
          360           delta_p *= (1.0 - lambda);
          361         }
          362 
          363         {
          364           // fjord walls, nunataks, etc
          365           //
          366           // Override weights if we are at the margin and the grid cell on the other side
          367           // of the interface is ice-free and above the level of the ice surface.
          368           //
          369           // This effectively sets the pressure difference at the corresponding interface
          370           // to zero, which is exactly what we need.
          371           auto b = bed.star(i, j);
          372           double h = surface(i, j);
          373 
          374           if (ice_free(M.n) and b.n > h) {
          375             N = 0;
          376           }
          377           if (ice_free(M.e) and b.e > h) {
          378             E = 0;
          379           }
          380           if (ice_free(M.s) and b.s > h) {
          381             S = 0;
          382           }
          383           if (ice_free(M.w) and b.w > h) {
          384             W = 0;
          385           }
          386         }
          387 
          388         // Note that if the current cell is "marginal" but not a CFBC
          389         // location, the following two lines are equaivalent to the "usual
          390         // case" below.
          391         //
          392         // Note: signs below (+E, -W, etc) are explained by directions of outward
          393         // normal vectors at corresponding cell faces.
          394         m_b(i, j).u = m_taud(i,j).u + (E - W) * delta_p / dx;
          395         m_b(i, j).v = m_taud(i,j).v + (N - S) * delta_p / dy;
          396 
          397         continue;
          398       } // end of "if (is_marginal(i, j))"
          399 
          400         // If we reached this point, then CFBC are enabled, but we are in the
          401         // interior of a sheet or shelf. See "usual case" below.
          402 
          403     }   // end of "if (use_cfbc)"
          404 
          405     // usual case: use already computed driving stress
          406     m_b(i, j).u = m_taud(i, j).u;
          407     m_b(i, j).v = m_taud(i, j).v;
          408   }
          409 }
          410 
          411 
          412 //! \brief Assemble the left-hand side matrix for the KSP-based, Picard iteration,
          413 //! and finite difference implementation of the SSA equations.
          414 /*!
          415 Recall the SSA equations are
          416 \f{align*}
          417  - 2 \left[\nu H \left(2 u_x + v_y\right)\right]_x
          418         - \left[\nu H \left(u_y + v_x\right)\right]_y
          419         - \tau_{(b)1}  &= - \rho g H h_x, \\
          420    - \left[\nu H \left(u_y + v_x\right)\right]_x
          421         - 2 \left[\nu H \left(u_x + 2 v_y\right)\right]_y
          422         - \tau_{(b)2}  &= - \rho g H h_y,
          423 \f}
          424 where \f$u\f$ is the \f$x\f$-component of the velocity and \f$v\f$ is the
          425 \f$y\f$-component of the velocity.
          426 
          427 The coefficient \f$\nu\f$ is the vertically-averaged effective viscosity.
          428 (The product \f$\nu H\f$ is computed by compute_nuH_staggered().)
          429 The Picard iteration idea is that, to solve the nonlinear equations in which
          430 the effective viscosity depends on the velocity, we freeze the effective
          431 viscosity using its value at the current estimate of the velocity and we solve
          432 the linear equations which come from this viscosity.  In abstract symbols, the
          433 Picard iteration replaces the above nonlinear SSA equations by a sequence of
          434 linear problems
          435 
          436 \f[ A(U^{(k)}) U^{(k+1)} = b \f]
          437 
          438 where \f$A(U)\f$ is the matrix from discretizing the SSA equations supposing
          439 the viscosity is a function of known velocities \f$U\f$, and where \f$U^{(k)}\f$
          440 denotes the \f$k\f$th iterate in the process.  The current method assembles \f$A(U)\f$.
          441 
          442 For ice shelves \f$\tau_{(b)i} = 0\f$ [\ref MacAyealetal].
          443 For ice streams with a basal till modelled as a plastic material,
          444 \f$\tau_{(b)i} = - \tau_c u_i/|\mathbf{u}|\f$ where
          445 \f$\mathbf{u} = (u,v)\f$, \f$|\mathbf{u}| = \left(u^2 + v^2\right)^{1/2}\f$,
          446 where \f$\tau_c(t,x,y)\f$ is the yield stress of the till [\ref SchoofStream].
          447 More generally, ice streams can be modeled with a pseudo-plastic basal till;
          448 see IceModel::initBasalTillModel() and IceModel::updateYieldStressUsingBasalWater()
          449 and reference [\ref BKAJS].  The pseudo-plastic till model includes all power law
          450 sliding relations and the linearly-viscous model for sliding,
          451 \f$\tau_{(b)i} = - \beta u_i\f$ where \f$\beta\f$ is the basal drag
          452 (friction) parameter [\ref MacAyeal].  In any case, PISM assumes that the basal shear
          453 stress can be factored this way, *even if the coefficient depends on the
          454 velocity*, \f$\beta(u,v)\f$.  Such factoring is possible even in the case of
          455 (regularized) plastic till.  This scalar coefficient \f$\beta\f$ is what is
          456 returned by IceBasalResistancePlasticLaw::drag().
          457 
          458 Note that the basal shear stress appears on the \em left side of the
          459 linear system we actually solve. We believe this is crucial, because
          460 of its effect on the spectrum of the linear approximations of each
          461 stage. The effect on spectrum is clearest in the linearly-viscous till
          462 case but there seems to be an analogous effect in the plastic till case.
          463 
          464 This method assembles the matrix for the left side of the above SSA equations.
          465 The numerical method is finite difference.  Suppose we use difference notation
          466 \f$\delta_{+x}f^{i,j} = f^{i+1,j}-f^{i,j}\f$,
          467 \f$\delta_{-x}f^{i,j} = f^{i,j}-f^{i-1,j}\f$, and
          468 \f$\Delta_{x}f^{i,j} = f^{i+1,j}-f^{i-1,j}\f$, and corresponding notation for
          469 \f$y\f$ differences, and that we write \f$N = \nu H\f$ then the first of the
          470 two "concrete" SSA equations above has this discretization:
          471 \f{align*}
          472 - &2 \frac{N^{i+\frac{1}{2},j}}{\Delta x} \left[2\frac{\delta_{+x}u^{i,j}}{\Delta x} + \frac{\Delta_{y} v^{i+1,j} + \Delta_{y} v^{i,j}}{4 \Delta y}\right] + 2 \frac{N^{i-\frac{1}{2},j}}{\Delta x} \left[2\frac{\delta_{-x}u^{i,j}}{\Delta x} + \frac{\Delta_y v^{i,j} + \Delta_y v^{i-1,j}}{4 \Delta y}\right] \\
          473 &\qquad- \frac{N^{i,j+\frac{1}{2}}}{\Delta y} \left[\frac{\delta_{+y} u^{i,j}}{\Delta y} + \frac{\Delta_x v^{i,j+1} + \Delta_x v^{i,j}}{4 \Delta x}\right] + \frac{N^{i,j-\frac{1}{2}}}{\Delta y} \left[\frac{\delta_{-y}u^{i,j}}{\Delta y} + \frac{\Delta_x v^{i,j} + \Delta_x v^{i,j-1}}{4 \Delta x}\right] - \tau_{(b)1}^{i,j} = - \rho g H^{i,j} \frac{\Delta_x h^{i,j}}{2\Delta x}.
          474 \f}
          475 As a picture, see Figure \ref ssastencil.
          476 
          477 \image html ssastencil.png "\b ssastencil:  Stencil for our finite difference discretization of the first of the two scalar SSA equations.  Triangles show staggered grid points where N = nu * H is evaluated.  Circles and squares show where u and v are approximated, respectively."
          478 \anchor ssastencil
          479 
          480 It follows immediately that the matrix we assemble in the current method has
          481 13 nonzeros entries per row because, for this first SSA equation, there are 5
          482 grid values of \f$u\f$ and 8 grid values of \f$v\f$ used in this scheme.  For
          483 the second equation we also have 13 nonzeros per row.
          484 
          485 FIXME:  document use of DAGetMatrix and MatStencil and MatSetValuesStencil
          486 
          487 */
          488 void SSAFD::assemble_matrix(const Inputs &inputs,
          489                             bool include_basal_shear, Mat A) {
          490   PetscErrorCode ierr = 0;
          491 
          492   // shortcut:
          493   const IceModelVec2V &vel = m_velocity;
          494 
          495   const IceModelVec2S
          496     &thickness         = inputs.geometry->ice_thickness,
          497     &bed               = inputs.geometry->bed_elevation,
          498     &surface           = inputs.geometry->ice_surface_elevation,
          499     &grounded_fraction = inputs.geometry->cell_grounded_fraction,
          500     &tauc              = *inputs.basal_yield_stress;
          501 
          502   const double
          503     dx                    = m_grid->dx(),
          504     dy                    = m_grid->dy(),
          505     beta_lateral_margin   = m_config->get_number("basal_resistance.beta_lateral_margin"),
          506     beta_ice_free_bedrock = m_config->get_number("basal_resistance.beta_ice_free_bedrock");
          507 
          508   const bool use_cfbc     = m_config->get_flag("stress_balance.calving_front_stress_bc");
          509   const bool replace_zero_diagonal_entries =
          510     m_config->get_flag("stress_balance.ssa.fd.replace_zero_diagonal_entries");
          511 
          512   // FIXME: bedrock_boundary is a misleading name
          513   const bool bedrock_boundary = m_config->get_flag("stress_balance.ssa.dirichlet_bc");
          514 
          515   ierr = MatZeroEntries(A);
          516   PISM_CHK(ierr, "MatZeroEntries");
          517 
          518   IceModelVec::AccessList list{&m_nuH, &tauc, &vel, &m_mask, &bed, &surface};
          519 
          520   if (inputs.bc_values && inputs.bc_mask) {
          521     list.add(*inputs.bc_mask);
          522   }
          523 
          524   const bool sub_gl = m_config->get_flag("geometry.grounded_cell_fraction");
          525   if (sub_gl) {
          526     list.add(grounded_fraction);
          527   }
          528 
          529   // handles friction of the ice cell along ice-free bedrock margins when bedrock higher than ice
          530   // surface (in simplified setups)
          531   bool lateral_drag_enabled=m_config->get_flag("stress_balance.ssa.fd.lateral_drag.enabled");
          532   if (lateral_drag_enabled) {
          533     list.add({&thickness, &bed, &surface});
          534   }
          535   double lateral_drag_viscosity=m_config->get_number("stress_balance.ssa.fd.lateral_drag.viscosity");
          536   double HminFrozen=0.0;
          537 
          538   /* matrix assembly loop */
          539   ParallelSection loop(m_grid->com);
          540   try {
          541     for (Points p(*m_grid); p; p.next()) {
          542       const int i = p.i(), j = p.j();
          543 
          544       // Handle the easy case: provided Dirichlet boundary conditions
          545       if (inputs.bc_values && inputs.bc_mask && inputs.bc_mask->as_int(i,j) == 1) {
          546         // set diagonal entry to one (scaled); RHS entry will be known velocity;
          547         set_diagonal_matrix_entry(A, i, j, 0, m_scaling);
          548         set_diagonal_matrix_entry(A, i, j, 1, m_scaling);
          549         continue;
          550       }
          551 
          552       /* Provide shorthand for the following staggered coefficients  nu H:
          553        *      c_n
          554        *  c_w     c_e
          555        *      c_s
          556        */
          557       // const
          558       double c_w = m_nuH(i-1,j,0);
          559       double c_e = m_nuH(i,j,0);
          560       double c_s = m_nuH(i,j-1,1);
          561       double c_n = m_nuH(i,j,1);
          562 
          563       if (lateral_drag_enabled) {
          564         // if option is set, the viscosity at ice-bedrock boundary layer will
          565         // be prescribed and is a temperature-independent free (user determined) parameter
          566 
          567         // direct neighbors
          568         auto M = m_mask.int_star(i, j);
          569         auto H = thickness.star(i, j);
          570         auto b = bed.star(i, j);
          571         double h = surface(i, j);
          572 
          573         if (H.ij > HminFrozen) {
          574           if (b.w > h and ice_free_land(M.w)) {
          575             c_w = lateral_drag_viscosity * 0.5 * (H.ij + H.w);
          576           }
          577           if (b.e > h and ice_free_land(M.e)) {
          578             c_e = lateral_drag_viscosity * 0.5 * (H.ij + H.e);
          579           }
          580           if (b.n > h and ice_free_land(M.n)) {
          581             c_n = lateral_drag_viscosity * 0.5 * (H.ij + H.n);
          582           }
          583           if (b.s > h and ice_free_land(M.s)) {
          584             c_s = lateral_drag_viscosity * 0.5 * (H.ij + H.s);
          585           }
          586         }
          587       }
          588 
          589       // We use DAGetMatrix to obtain the SSA matrix, which means that all 18
          590       // non-zeros get allocated, even though we use only 13 (or 14). The
          591       // remaining 5 (or 4) coefficients are zeros, but we set them anyway,
          592       // because this makes the code easier to understand.
          593       const int n_nonzeros = 18;
          594       MatStencil row, col[n_nonzeros];
          595 
          596       // |-----+-----+---+-----+-----|
          597       // | NW  | NNW | N | NNE | NE  |
          598       // | WNW |     | | |     | ENE |
          599       // | W   |-----|-o-|-----| E   |
          600       // | WSW |     | | |     | ESE |
          601       // | SW  | SSW | S | SSE | SE  |
          602       // |-----+-----+---+-----+-----|
          603       //
          604       // We use compass rose notation for weights corresponding to interfaces between
          605       // cells around the current one (i, j). Here N corresponds to the interface between
          606       // the cell (i, j) and the one to the north of it.
          607       int N = 1, E = 1, S = 1, W = 1;
          608 
          609       // Similarly, we use compass rose notation for weights used to switch between
          610       // centered and one-sided finite differences. Here NNE is the interface between
          611       // cells N and NE, ENE - between E and NE, etc.
          612       int NNW = 1, NNE = 1, SSW = 1, SSE = 1;
          613       int WNW = 1, ENE = 1, WSW = 1, ESE = 1;
          614 
          615       int M_ij = m_mask.as_int(i,j);
          616 
          617       if (use_cfbc) {
          618         auto M = m_mask.int_box(i, j);
          619 
          620         // Note: this sets velocities at both ice-free ocean and ice-free
          621         // bedrock to zero. This means that we need to set boundary conditions
          622         // at both ice/ice-free-ocean and ice/ice-free-bedrock interfaces below
          623         // to be consistent.
          624         if (ice_free(M.ij)) {
          625           set_diagonal_matrix_entry(A, i, j, 0, m_scaling);
          626           set_diagonal_matrix_entry(A, i, j, 1, m_scaling);
          627           continue;
          628         }
          629 
          630         if (is_marginal(i, j, bedrock_boundary)) {
          631           // If at least one of the following four conditions is "true", we're
          632           // at a CFBC location.
          633           if (bedrock_boundary) {
          634 
          635             if (ice_free_ocean(M.e))
          636               E = 0;
          637             if (ice_free_ocean(M.w))
          638               W = 0;
          639             if (ice_free_ocean(M.n))
          640               N = 0;
          641             if (ice_free_ocean(M.s))
          642               S = 0;
          643 
          644             // decide whether to use centered or one-sided differences
          645             if (ice_free_ocean(M.n) || ice_free_ocean(M.ne))
          646               NNE = 0;
          647             if (ice_free_ocean(M.e) || ice_free_ocean(M.ne))
          648               ENE = 0;
          649             if (ice_free_ocean(M.e) || ice_free_ocean(M.se))
          650               ESE = 0;
          651             if (ice_free_ocean(M.s) || ice_free_ocean(M.se))
          652               SSE = 0;
          653             if (ice_free_ocean(M.s) || ice_free_ocean(M.sw))
          654               SSW = 0;
          655             if (ice_free_ocean(M.w) || ice_free_ocean(M.sw))
          656               WSW = 0;
          657             if (ice_free_ocean(M.w) || ice_free_ocean(M.nw))
          658               WNW = 0;
          659             if (ice_free_ocean(M.n) || ice_free_ocean(M.nw))
          660               NNW = 0;
          661 
          662           } else {                // if (not bedrock_boundary)
          663 
          664             if (ice_free(M.e))
          665               E = 0;
          666             if (ice_free(M.w))
          667               W = 0;
          668             if (ice_free(M.n))
          669               N = 0;
          670             if (ice_free(M.s))
          671               S = 0;
          672 
          673             // decide whether to use centered or one-sided differences
          674             if (ice_free(M.n) || ice_free(M.ne))
          675               NNE = 0;
          676             if (ice_free(M.e) || ice_free(M.ne))
          677               ENE = 0;
          678             if (ice_free(M.e) || ice_free(M.se))
          679               ESE = 0;
          680             if (ice_free(M.s) || ice_free(M.se))
          681               SSE = 0;
          682             if (ice_free(M.s) || ice_free(M.sw))
          683               SSW = 0;
          684             if (ice_free(M.w) || ice_free(M.sw))
          685               WSW = 0;
          686             if (ice_free(M.w) || ice_free(M.nw))
          687               WNW = 0;
          688             if (ice_free(M.n) || ice_free(M.nw))
          689               NNW = 0;
          690 
          691           } // end of the else clause following "if (bedrock_boundary)"
          692         }   // end of "if (is_marginal(i, j, bedrock_boundary))"
          693       }     // end of "if (use_cfbc)"
          694 
          695       /* begin Maxima-generated code */
          696       const double dx2 = dx*dx, dy2 = dy*dy, d4 = 4*dx*dy, d2 = 2*dx*dy;
          697 
          698       /* Coefficients of the discretization of the first equation; u first, then v. */
          699       double eq1[] = {
          700         0,  -c_n*N/dy2,  0,
          701         -4*c_w*W/dx2,  (c_n*N+c_s*S)/dy2+(4*c_e*E+4*c_w*W)/dx2,  -4*c_e*E/dx2,
          702         0,  -c_s*S/dy2,  0,
          703         c_w*W*WNW/d2+c_n*NNW*N/d4,  (c_n*NNE*N-c_n*NNW*N)/d4+(c_w*W*N-c_e*E*N)/d2,  -c_e*E*ENE/d2-c_n*NNE*N/d4,
          704         (c_w*W*WSW-c_w*W*WNW)/d2+(c_n*W*N-c_s*W*S)/d4,  (c_n*E*N-c_n*W*N-c_s*E*S+c_s*W*S)/d4+(c_e*E*N-c_w*W*N-c_e*E*S+c_w*W*S)/d2,  (c_e*E*ENE-c_e*E*ESE)/d2+(c_s*E*S-c_n*E*N)/d4,
          705         -c_w*W*WSW/d2-c_s*SSW*S/d4,  (c_s*SSW*S-c_s*SSE*S)/d4+(c_e*E*S-c_w*W*S)/d2,  c_e*E*ESE/d2+c_s*SSE*S/d4,
          706       };
          707 
          708       /* Coefficients of the discretization of the second equation; u first, then v. */
          709       double eq2[] = {
          710         c_w*W*WNW/d4+c_n*NNW*N/d2,  (c_n*NNE*N-c_n*NNW*N)/d2+(c_w*W*N-c_e*E*N)/d4,  -c_e*E*ENE/d4-c_n*NNE*N/d2,
          711         (c_w*W*WSW-c_w*W*WNW)/d4+(c_n*W*N-c_s*W*S)/d2,  (c_n*E*N-c_n*W*N-c_s*E*S+c_s*W*S)/d2+(c_e*E*N-c_w*W*N-c_e*E*S+c_w*W*S)/d4,  (c_e*E*ENE-c_e*E*ESE)/d4+(c_s*E*S-c_n*E*N)/d2,
          712         -c_w*W*WSW/d4-c_s*SSW*S/d2,  (c_s*SSW*S-c_s*SSE*S)/d2+(c_e*E*S-c_w*W*S)/d4,  c_e*E*ESE/d4+c_s*SSE*S/d2,
          713         0,  -4*c_n*N/dy2,  0,
          714         -c_w*W/dx2,  (4*c_n*N+4*c_s*S)/dy2+(c_e*E+c_w*W)/dx2,  -c_e*E/dx2,
          715         0,  -4*c_s*S/dy2,  0,
          716       };
          717 
          718       /* i indices */
          719       const int I[] = {
          720         i-1,  i,  i+1,
          721         i-1,  i,  i+1,
          722         i-1,  i,  i+1,
          723         i-1,  i,  i+1,
          724         i-1,  i,  i+1,
          725         i-1,  i,  i+1,
          726       };
          727 
          728       /* j indices */
          729       const int J[] = {
          730         j+1,  j+1,  j+1,
          731         j,  j,  j,
          732         j-1,  j-1,  j-1,
          733         j+1,  j+1,  j+1,
          734         j,  j,  j,
          735         j-1,  j-1,  j-1,
          736       };
          737 
          738       /* component indices */
          739       const int C[] = {
          740         0,  0,  0,
          741         0,  0,  0,
          742         0,  0,  0,
          743         1,  1,  1,
          744         1,  1,  1,
          745         1,  1,  1,
          746       };
          747       /* end Maxima-generated code */
          748 
          749       /* Dragging ice experiences friction at the bed determined by the
          750        *    IceBasalResistancePlasticLaw::drag() methods.  These may be a plastic,
          751        *    pseudo-plastic, or linear friction law.  Dragging is done implicitly
          752        *    (i.e. on left side of SSA eqns).  */
          753       double beta_u = 0.0, beta_v = 0.0;
          754       if (include_basal_shear) {
          755         double beta = 0.0;
          756         if (grounded_ice(M_ij)) {
          757           beta = m_basal_sliding_law->drag(tauc(i,j), vel(i,j).u, vel(i,j).v);
          758         } else if (ice_free_land(M_ij)) {
          759           // apply drag even in this case, to help with margins; note ice free
          760           // areas already have a strength extension
          761           beta = beta_ice_free_bedrock;
          762         }
          763         if (sub_gl) {
          764           // reduce the basal drag at grid cells that are partially grounded:
          765           if (icy(M_ij)) {
          766             beta = grounded_fraction(i,j) * m_basal_sliding_law->drag(tauc(i,j), vel(i,j).u, vel(i,j).v);
          767           }
          768         }
          769         beta_u = beta;
          770         beta_v = beta;
          771       }
          772 
          773       {
          774         // Set very high basal drag *in the direction along the boundary* at locations
          775         // bordering "fjord walls".
          776 
          777         auto M = m_mask.int_star(i, j);
          778         auto b = bed.star(i, j);
          779         double h = surface(i, j);
          780 
          781         if ((ice_free(M.n) and b.n > h) or (ice_free(M.s) and b.s > h)) {
          782           beta_u += beta_lateral_margin;
          783         }
          784 
          785         if ((ice_free(M.e) and b.e > h) or (ice_free(M.w) and b.w > h)) {
          786           beta_v += beta_lateral_margin;
          787         }
          788       }
          789 
          790       // add beta to diagonal entries
          791       eq1[4]  += beta_u;
          792       eq2[13] += beta_v;
          793 
          794       // check diagonal entries:
          795       const double eps = 1e-16;
          796       if (fabs(eq1[4]) < eps) {
          797         if (replace_zero_diagonal_entries) {
          798           eq1[4] = beta_ice_free_bedrock;
          799         } else {
          800           throw RuntimeError::formatted(PISM_ERROR_LOCATION, "first  (X) equation in the SSAFD system:"
          801                                         " zero diagonal entry at a regular (not Dirichlet B.C.)"
          802                                         " location: i = %d, j = %d\n", i, j);
          803         }
          804       }
          805       if (fabs(eq2[13]) < eps) {
          806         if (replace_zero_diagonal_entries) {
          807           eq2[13] = beta_ice_free_bedrock;
          808         } else {
          809           throw RuntimeError::formatted(PISM_ERROR_LOCATION, "second (Y) equation in the SSAFD system:"
          810                                         " zero diagonal entry at a regular (not Dirichlet B.C.)"
          811                                         " location: i = %d, j = %d\n", i, j);
          812         }
          813       }
          814 
          815       row.i = i;
          816       row.j = j;
          817       for (int m = 0; m < n_nonzeros; m++) {
          818         col[m].i = I[m];
          819         col[m].j = J[m];
          820         col[m].c = C[m];
          821       }
          822 
          823       // set coefficients of the first equation:
          824       row.c = 0;
          825       ierr = MatSetValuesStencil(A, 1, &row, n_nonzeros, col, eq1, INSERT_VALUES);
          826       PISM_CHK(ierr, "MatSetValuesStencil");
          827 
          828       // set coefficients of the second equation:
          829       row.c = 1;
          830       ierr = MatSetValuesStencil(A, 1, &row, n_nonzeros, col, eq2, INSERT_VALUES);
          831       PISM_CHK(ierr, "MatSetValuesStencil");
          832     } // i,j-loop
          833   } catch (...) {
          834     loop.failed();
          835   }
          836   loop.check();
          837 
          838   ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
          839   PISM_CHK(ierr, "MatAssemblyBegin");
          840 
          841   ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
          842   PISM_CHK(ierr, "MatAssemblyEnd");
          843 #if (Pism_DEBUG==1)
          844   ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
          845   PISM_CHK(ierr, "MatSetOption");
          846 #endif
          847 }
          848 
          849 //! \brief Compute the vertically-averaged horizontal velocity from the shallow
          850 //! shelf approximation.
          851 /*!
          852 This is the main procedure in the SSAFD.  It manages the nonlinear solve process
          853 and the Picard iteration.
          854 
          855 The outer loop (over index `k`) is the nonlinear iteration.  In this loop the effective
          856 viscosity is computed by compute_nuH_staggered() and then the linear system is
          857 set up and solved.
          858 
          859 Specifically, we call the PETSc procedure KSPSolve() to solve the linear system.
          860 Solving the linear system is also a loop, an iteration, but it occurs
          861 inside KSPSolve().  The user has full control of the KSP solve through the PETSc
          862 interface.  The default choicess for KSP type `-ksp_type` and preconditioner type
          863 `-pc_type` are GMRES(30) for the former and block Jacobi with ILU on the
          864 blocks for the latter.  The defaults usually work.  These choices are important
          865 but poorly understood.  The eigenvalues of the linearized
          866 SSA vary with ice sheet geometry and temperature in ways that are not well-studied.
          867 Nonetheless these eigenvalues determine the convergence of
          868 this (inner) linear iteration.  A well-chosen preconditioner can put the eigenvalues
          869 in the right place so that the KSP can converge quickly.
          870 
          871 The preconditioner will behave differently on different numbers of
          872 processors.  If the user wants the results of SSA calculations to be
          873 independent of the number of processors, then `-pc_type none` could
          874 be used, but performance will be poor.
          875 
          876 If you want to test different KSP methods, it may be helpful to see how many
          877 iterations were necessary.  Use `-ksp_monitor`.
          878 Initial testing implies that CGS takes roughly half the iterations of
          879 GMRES(30), but is not significantly faster because the iterations are each
          880 roughly twice as slow.  The outputs of PETSc options `-ksp_monitor_singular_value`,
          881 `-ksp_compute_eigenvalues` and `-ksp_plot_eigenvalues -draw_pause N`
          882 (the last holds plots for N seconds) may be useful to diagnose.
          883 
          884 The outer loop terminates when the effective viscosity times thickness
          885 is no longer changing much, according to the tolerance set by the
          886 option `-ssafd_picard_rtol`. The outer loop also terminates when a maximum
          887 number of iterations is exceeded. We save the velocity from the last
          888 time step in order to have a better estimate of the effective
          889 viscosity than the u=v=0 result.
          890 
          891 In truth there is an "outer outer" loop (over index `l`).  This attempts to
          892 over-regularize the effective viscosity if the nonlinear iteration (the "outer"
          893 loop over `k`) is not converging with the default regularization.  The same
          894 over-regularization is attempted if the KSP object reports that it has not
          895 converged.
          896 
          897 (An alternative for recovery in the KSP diverged case, suggested by Jed, is to
          898 revert to a direct linear solve, either for the whole domain (not scalable) or
          899 on the subdomains.  This recovery alternative requires a more nontrivial choice
          900 but it may be worthwhile.  Note the user can already do `-pc_type asm
          901 -sub_pc_type lu` at the command line, forcing subdomain direct solves.)
          902 
          903 FIXME: update this doxygen comment
          904 */
          905 void SSAFD::solve(const Inputs &inputs) {
          906 
          907   // Store away old SSA velocity (it might be needed in case a solver
          908   // fails).
          909   m_velocity_old.copy_from(m_velocity);
          910 
          911   // These computations do not depend on the solution, so they need to
          912   // be done once.
          913   {
          914     assemble_rhs(inputs);
          915     compute_hardav_staggered(inputs);
          916   }
          917 
          918   for (unsigned int k = 0; k < 3; ++k) {
          919     try {
          920       if (k == 0) {
          921         // default strategy
          922         picard_iteration(inputs, m_config->get_number("stress_balance.ssa.epsilon"), 1.0);
          923 
          924         break;
          925       } else if (k == 1) {
          926         // try underrelaxing the iteration
          927         const double underrelax = m_config->get_number("stress_balance.ssa.fd.nuH_iter_failure_underrelaxation");
          928         m_log->message(1,
          929                    "  re-trying with effective viscosity under-relaxation (parameter = %.2f) ...\n",
          930                    underrelax);
          931         picard_iteration(inputs, m_config->get_number("stress_balance.ssa.epsilon"), underrelax);
          932 
          933         break;
          934       } else if (k == 2) {
          935         // try over-regularization
          936         picard_strategy_regularization(inputs);
          937 
          938         break;
          939       } else {
          940         // if we reached this, then all strategies above failed
          941         write_system_petsc("all_strategies_failed");
          942         throw RuntimeError(PISM_ERROR_LOCATION, "all SSAFD strategies failed");
          943       }
          944     } catch (PicardFailure &f) {
          945       // proceed to the next strategy
          946     }
          947   }
          948 
          949   // Post-process velocities if the user asked for it:
          950   if (m_config->get_flag("stress_balance.ssa.fd.brutal_sliding")) {
          951     const double brutal_sliding_scaleFactor = m_config->get_number("stress_balance.ssa.fd.brutal_sliding_scale");
          952     m_velocity.scale(brutal_sliding_scaleFactor);
          953 
          954     m_velocity.update_ghosts();
          955   }
          956 }
          957 
          958 void SSAFD::picard_iteration(const Inputs &inputs,
          959                              double nuH_regularization,
          960                              double nuH_iter_failure_underrelax) {
          961 
          962   if (m_default_pc_failure_count < m_default_pc_failure_max_count) {
          963     // Give BJACOBI another shot if we haven't tried it enough yet
          964 
          965     try {
          966       pc_setup_bjacobi();
          967       picard_manager(inputs, nuH_regularization,
          968                      nuH_iter_failure_underrelax);
          969 
          970     } catch (KSPFailure &f) {
          971 
          972       m_default_pc_failure_count += 1;
          973 
          974       m_log->message(1,
          975                  "  re-trying using the Additive Schwarz preconditioner...\n");
          976 
          977       pc_setup_asm();
          978 
          979       m_velocity.copy_from(m_velocity_old);
          980 
          981       picard_manager(inputs, nuH_regularization,
          982                      nuH_iter_failure_underrelax);
          983     }
          984 
          985   } else {
          986     // otherwise use ASM
          987     pc_setup_asm();
          988 
          989     picard_manager(inputs, nuH_regularization,
          990                    nuH_iter_failure_underrelax);
          991   }
          992 }
          993 
          994 //! \brief Manages the Picard iteration loop.
          995 void SSAFD::picard_manager(const Inputs &inputs,
          996                            double nuH_regularization,
          997                            double nuH_iter_failure_underrelax) {
          998   PetscErrorCode ierr;
          999   double   nuH_norm, nuH_norm_change;
         1000   // ksp_iterations should be a PetscInt because it is used in the
         1001   // KSPGetIterationNumber() call below
         1002   PetscInt    ksp_iterations, ksp_iterations_total = 0, outer_iterations;
         1003   KSPConvergedReason  reason;
         1004 
         1005   unsigned int max_iterations = static_cast<int>(m_config->get_number("stress_balance.ssa.fd.max_iterations"));
         1006   double ssa_relative_tolerance = m_config->get_number("stress_balance.ssa.fd.relative_convergence");
         1007   char tempstr[100] = "";
         1008   bool verbose = m_log->get_threshold() >= 2,
         1009     very_verbose = m_log->get_threshold() > 2;
         1010 
         1011   // set the initial guess:
         1012   m_velocity_global.copy_from(m_velocity);
         1013 
         1014   m_stdout_ssa.clear();
         1015 
         1016   bool use_cfbc = m_config->get_flag("stress_balance.calving_front_stress_bc");
         1017 
         1018   if (use_cfbc == true) {
         1019     compute_nuH_staggered_cfbc(*inputs.geometry, nuH_regularization, m_nuH);
         1020   } else {
         1021     compute_nuH_staggered(*inputs.geometry, nuH_regularization, m_nuH);
         1022   }
         1023   update_nuH_viewers();
         1024 
         1025   // outer loop
         1026   for (unsigned int k = 0; k < max_iterations; ++k) {
         1027 
         1028     if (very_verbose) {
         1029       snprintf(tempstr, 100, "  %2d:", k);
         1030       m_stdout_ssa += tempstr;
         1031     }
         1032 
         1033     // in preparation of measuring change of effective viscosity:
         1034     m_nuH_old.copy_from(m_nuH);
         1035 
         1036     // assemble (or re-assemble) matrix, which depends on updated viscosity
         1037     assemble_matrix(inputs, true, m_A);
         1038 
         1039     if (very_verbose) {
         1040 
         1041       m_stdout_ssa += "A:";
         1042     }
         1043 
         1044     // Call PETSc to solve linear system by iterative method; "inner iteration":
         1045     ierr = KSPSetOperators(m_KSP, m_A, m_A);
         1046     PISM_CHK(ierr, "KSPSetOperator");
         1047 
         1048     ierr = KSPSolve(m_KSP, m_b.vec(), m_velocity_global.vec());
         1049     PISM_CHK(ierr, "KSPSolve");
         1050 
         1051     // Check if diverged; report to standard out about iteration
         1052     ierr = KSPGetConvergedReason(m_KSP, &reason);
         1053     PISM_CHK(ierr, "KSPGetConvergedReason");
         1054 
         1055     if (reason < 0) {
         1056       // KSP diverged
         1057       m_log->message(1,
         1058                  "PISM WARNING:  KSPSolve() reports 'diverged'; reason = %d = '%s'\n",
         1059                  reason, KSPConvergedReasons[reason]);
         1060 
         1061       write_system_petsc("kspdivergederror");
         1062 
         1063       // Tell the caller that we failed. (The caller might try again,
         1064       // though.)
         1065       throw KSPFailure(KSPConvergedReasons[reason]);
         1066     }
         1067 
         1068     // report on KSP success; the "inner" iteration is done
         1069     ierr = KSPGetIterationNumber(m_KSP, &ksp_iterations);
         1070     PISM_CHK(ierr, "KSPGetIterationNumber");
         1071 
         1072     ksp_iterations_total += ksp_iterations;
         1073 
         1074     if (very_verbose) {
         1075       snprintf(tempstr, 100, "S:%d,%d: ", (int)ksp_iterations, reason);
         1076       m_stdout_ssa += tempstr;
         1077     }
         1078 
         1079     // limit ice speed
         1080     {
         1081       auto max_speed = m_config->get_number("stress_balance.ssa.fd.max_speed", "m second-1");
         1082       int high_speed_counter = 0;
         1083 
         1084       IceModelVec::AccessList list{&m_velocity_global};
         1085 
         1086       for (Points p(*m_grid); p; p.next()) {
         1087         const int i = p.i(), j = p.j();
         1088 
         1089         auto speed = m_velocity_global(i, j).magnitude();
         1090 
         1091         if (speed > max_speed) {
         1092           m_velocity_global(i, j) *= max_speed / speed;
         1093           high_speed_counter += 1;
         1094         }
         1095       }
         1096 
         1097       high_speed_counter = GlobalSum(m_grid->com, high_speed_counter);
         1098 
         1099       if (high_speed_counter > 0) {
         1100         m_log->message(2, "  SSA speed was capped at %d locations\n", high_speed_counter);
         1101       }
         1102     }
         1103 
         1104     // Communicate so that we have stencil width for evaluation of effective
         1105     // viscosity on next "outer" iteration (and geometry etc. if done):
         1106     // Note that copy_from() updates ghosts of m_velocity.
         1107     m_velocity.copy_from(m_velocity_global);
         1108 
         1109     // update viscosity and check for viscosity convergence
         1110     if (use_cfbc == true) {
         1111       compute_nuH_staggered_cfbc(*inputs.geometry, nuH_regularization, m_nuH);
         1112     } else {
         1113       compute_nuH_staggered(*inputs.geometry, nuH_regularization, m_nuH);
         1114     }
         1115 
         1116     if (nuH_iter_failure_underrelax != 1.0) {
         1117       m_nuH.scale(nuH_iter_failure_underrelax);
         1118       m_nuH.add(1.0 - nuH_iter_failure_underrelax, m_nuH_old);
         1119     }
         1120     compute_nuH_norm(nuH_norm, nuH_norm_change);
         1121 
         1122     update_nuH_viewers();
         1123 
         1124     if (very_verbose) {
         1125       snprintf(tempstr, 100, "|nu|_2, |Delta nu|_2/|nu|_2 = %10.3e %10.3e\n",
         1126                nuH_norm, nuH_norm_change/nuH_norm);
         1127 
         1128       m_stdout_ssa += tempstr;
         1129 
         1130       // assume that high verbosity shows interest in immediate
         1131       // feedback about SSA iterations
         1132       m_log->message(2, m_stdout_ssa);
         1133 
         1134       m_stdout_ssa.clear();
         1135     }
         1136 
         1137     outer_iterations = k + 1;
         1138 
         1139     if (nuH_norm == 0 || nuH_norm_change / nuH_norm < ssa_relative_tolerance) {
         1140       goto done;
         1141     }
         1142 
         1143   } // outer loop (k)
         1144 
         1145   // If we're here, it means that we exceeded max_iterations and still
         1146   // failed.
         1147 
         1148   throw PicardFailure(pism::printf("effective viscosity not converged after %d iterations\n"
         1149                                    "with nuH_regularization=%8.2e.",
         1150                                    max_iterations, nuH_regularization));
         1151 
         1152  done:
         1153 
         1154   if (very_verbose) {
         1155     snprintf(tempstr, 100, "... =%5d outer iterations, ~%3.1f KSP iterations each\n",
         1156              (int)outer_iterations, ((double) ksp_iterations_total) / outer_iterations);
         1157 
         1158     m_stdout_ssa += tempstr;
         1159   } else if (verbose) {
         1160     // at default verbosity, just record last nuH_norm_change and iterations
         1161     snprintf(tempstr, 100, "%5d outer iterations, ~%3.1f KSP iterations each\n",
         1162              (int)outer_iterations, ((double) ksp_iterations_total) / outer_iterations);
         1163 
         1164     m_stdout_ssa += tempstr;
         1165   }
         1166 
         1167   if (verbose) {
         1168     m_stdout_ssa = "  SSA: " + m_stdout_ssa;
         1169   }
         1170 }
         1171 
         1172 //! Old SSAFD recovery strategy: increase the SSA regularization parameter.
         1173 void SSAFD::picard_strategy_regularization(const Inputs &inputs) {
         1174   // this has no units; epsilon goes up by this ratio when previous value failed
         1175   const double DEFAULT_EPSILON_MULTIPLIER_SSA = 4.0;
         1176   double nuH_regularization = m_config->get_number("stress_balance.ssa.epsilon");
         1177   unsigned int k = 0, max_tries = 5;
         1178 
         1179   if (nuH_regularization <= 0.0) {
         1180     throw PicardFailure("will not attempt over-regularization of nuH\n"
         1181                         "because nuH_regularization == 0.0.");
         1182   }
         1183 
         1184   while (k < max_tries) {
         1185     m_velocity.copy_from(m_velocity_old);
         1186     m_log->message(1,
         1187                "  re-trying with nuH_regularization multiplied by %8.2f...\n",
         1188                DEFAULT_EPSILON_MULTIPLIER_SSA);
         1189 
         1190     nuH_regularization *= DEFAULT_EPSILON_MULTIPLIER_SSA;
         1191 
         1192     try {
         1193       // 1.0 is the under-relaxation parameter
         1194       picard_iteration(inputs, nuH_regularization, 1.0);
         1195       // if this call succeeded, stop over-regularizing
         1196       break;
         1197     }
         1198     catch (PicardFailure &f) {
         1199       k += 1;
         1200 
         1201       if (k == max_tries) {
         1202         throw PicardFailure("exceeded the max. number of nuH over-regularization attempts");
         1203       }
         1204     }
         1205   }
         1206 }
         1207 
         1208 //! \brief Compute the norm of nu H and the change in nu H.
         1209 /*!
         1210 Verification and PST experiments
         1211 suggest that an \f$L^1\f$ criterion for convergence is best.  For verification
         1212 there seems to be little difference, presumably because the solutions are smooth
         1213 and the norms are roughly equivalent on a subspace of smooth functions.  For PST,
         1214 the \f$L^1\f$ criterion gives faster runs with essentially the same results.
         1215 Presumably that is because rapid (temporal and spatial) variation in
         1216 \f$\nu H\f$ occurs at margins, occupying very few horizontal grid cells.
         1217 For the significant (e.g.~in terms of flux) parts of the flow, it is o.k. to ignore
         1218 a bit of bad behavior at these few places, and \f$L^1\f$ ignores it more than
         1219 \f$L^2\f$ (much less \f$L^\infty\f$, which might not work at all).
         1220  */
         1221 void SSAFD::compute_nuH_norm(double &norm, double &norm_change) {
         1222 
         1223   const double area = m_grid->cell_area();
         1224   const NormType MY_NORM = NORM_1;
         1225 
         1226   // Test for change in nu
         1227   m_nuH_old.add(-1, m_nuH);
         1228 
         1229   std::vector<double>
         1230     nuNorm   = m_nuH.norm_all(MY_NORM),
         1231     nuChange = m_nuH_old.norm_all(MY_NORM);
         1232 
         1233   nuChange[0] *= area;
         1234   nuChange[1] *= area;
         1235   nuNorm[0]   *= area;
         1236   nuNorm[1]   *= area;
         1237 
         1238   norm_change = sqrt(PetscSqr(nuChange[0]) + PetscSqr(nuChange[1]));
         1239   norm = sqrt(PetscSqr(nuNorm[0]) + PetscSqr(nuNorm[1]));
         1240 }
         1241 
         1242 //! \brief Computes vertically-averaged ice hardness on the staggered grid.
         1243 void SSAFD::compute_hardav_staggered(const Inputs &inputs) {
         1244   const IceModelVec2S
         1245     &thickness = inputs.geometry->ice_thickness;
         1246 
         1247   const IceModelVec3 &enthalpy = *inputs.enthalpy;
         1248 
         1249   const double
         1250     *E_ij     = NULL,
         1251     *E_offset = NULL;
         1252 
         1253   std::vector<double> E(m_grid->Mz());
         1254 
         1255   IceModelVec::AccessList list{&thickness, &enthalpy, &m_hardness, &m_mask};
         1256 
         1257   ParallelSection loop(m_grid->com);
         1258   try {
         1259     for (Points p(*m_grid); p; p.next()) {
         1260       const int i = p.i(), j = p.j();
         1261 
         1262       E_ij = enthalpy.get_column(i,j);
         1263       for (int o=0; o<2; o++) {
         1264         const int oi = 1-o, oj=o;
         1265         double H;
         1266 
         1267         if (m_mask.icy(i,j) && m_mask.icy(i+oi,j+oj)) {
         1268           H = 0.5 * (thickness(i,j) + thickness(i+oi,j+oj));
         1269         } else if (m_mask.icy(i,j)) {
         1270           H = thickness(i,j);
         1271         }  else {
         1272           H = thickness(i+oi,j+oj);
         1273         }
         1274 
         1275         if (H == 0) {
         1276           m_hardness(i,j,o) = -1e6; // an obviously impossible value
         1277           continue;
         1278         }
         1279 
         1280         E_offset = enthalpy.get_column(i+oi,j+oj);
         1281         // build a column of enthalpy values a the current location:
         1282         for (unsigned int k = 0; k < m_grid->Mz(); ++k) {
         1283           E[k] = 0.5 * (E_ij[k] + E_offset[k]);
         1284         }
         1285 
         1286         m_hardness(i,j,o) = rheology::averaged_hardness(*m_flow_law,
         1287                                                         H, m_grid->kBelowHeight(H),
         1288                                                         &(m_grid->z()[0]), &E[0]);
         1289       } // o
         1290     } // loop over points
         1291   } catch (...) {
         1292     loop.failed();
         1293   }
         1294   loop.check();
         1295 
         1296   fracture_induced_softening(inputs.fracture_density);
         1297 }
         1298 
         1299 
         1300 /*! @brief Correct vertically-averaged hardness using a
         1301     parameterization of the fracture-induced softening.
         1302 
         1303   See T. Albrecht, A. Levermann; Fracture-induced softening for
         1304   large-scale ice dynamics; (2013), The Cryosphere Discussions 7;
         1305   4501-4544; DOI:10.5194/tcd-7-4501-2013
         1306 
         1307   Note that this paper proposes an adjustment of the enhancement factor:
         1308 
         1309   \f[E_{\text{effective}} = E \cdot (1 - (1-\epsilon) \phi)^{-n}.\f]
         1310 
         1311   Let \f$E_{\text{effective}} = E\cdot C\f$, where \f$C\f$ is the
         1312   factor defined by the formula above.
         1313 
         1314   Recall that the effective viscosity is defined by
         1315 
         1316   \f[\nu(D) = \frac12 B D^{(1-n)/(2n)}\f]
         1317 
         1318   and the viscosity form of the flow law is
         1319 
         1320   \f[\sigma'_{ij} = E_{\text{effective}}^{-\frac1n}2\nu(D) D_{ij}.\f]
         1321 
         1322   Then
         1323 
         1324   \f[\sigma'_{ij} = E_{\text{effective}}^{-\frac1n}BD^{(1-n)/(2n)}D_{ij}.\f]
         1325 
         1326   Using the fact that \f$E_{\text{effective}} = E\cdot C\f$, this can be rewritten as
         1327 
         1328   \f[\sigma'_{ij} = E^{-\frac1n} \left(C^{-\frac1n}B\right) D^{(1-n)/(2n)}D_{ij}.\f]
         1329 
         1330   So scaling the enhancement factor by \f$C\f$ is equivalent to scaling
         1331   ice hardness \f$B\f$ by \f$C^{-\frac1n}\f$.
         1332 */
         1333 void SSAFD::fracture_induced_softening(const IceModelVec2S *fracture_density) {
         1334   if (not fracture_density) {
         1335     return;
         1336   }
         1337 
         1338   const double
         1339     epsilon = m_config->get_number("fracture_density.softening_lower_limit"),
         1340     n_glen  = m_flow_law->exponent();
         1341 
         1342   IceModelVec::AccessList list{&m_hardness, fracture_density};
         1343 
         1344   for (Points p(*m_grid); p; p.next()) {
         1345     const int i = p.i(), j = p.j();
         1346 
         1347     for (int o=0; o<2; o++) {
         1348       const int oi = 1-o, oj=o;
         1349 
         1350       const double
         1351         // fracture density on the staggered grid:
         1352         phi       = 0.5 * ((*fracture_density)(i,j) + (*fracture_density)(i+oi,j+oj)),
         1353         // the line below implements equation (6) in the paper
         1354         softening = pow((1.0-(1.0-epsilon)*phi), -n_glen);
         1355 
         1356       m_hardness(i,j,o) *= pow(softening,-1.0/n_glen);
         1357     }
         1358   }
         1359 }
         1360 
         1361 //! \brief Compute the product of ice thickness and effective viscosity (on the
         1362 //! staggered grid).
         1363 /*!
         1364 In PISM the product \f$\nu H\f$ can be
         1365   - constant, or
         1366   - can be computed with a constant ice hardness \f$\bar B\f$ (temperature-independent)
         1367     but with dependence of the viscosity on the strain rates, or
         1368   - it can depend on the strain rates \e and have a vertically-averaged ice
         1369     hardness depending on temperature or enthalpy.
         1370 
         1371 The flow law in ice stream and ice shelf regions must, for now, be a
         1372 (temperature-dependent) Glen law.  This is the only flow law we know how to
         1373 invert to ``viscosity form''.  More general forms like Goldsby-Kohlstedt are
         1374 not yet inverted.
         1375 
         1376 The viscosity form of a Glen law is
         1377    \f[ \nu(T^*,D) = \frac{1}{2} B(T^*) D^{(1/n)-1}\, D_{ij} \f]
         1378 where
         1379    \f[  D_{ij} = \frac{1}{2} \left(\frac{\partial U_i}{\partial x_j} +
         1380                                    \frac{\partial U_j}{\partial x_i}\right) \f]
         1381 is the strain rate tensor and \f$B\f$ is an ice hardness related to
         1382 the ice softness \f$A(T^*)\f$ by
         1383    \f[ B(T^*)=A(T^*)^{-1/n}  \f]
         1384 in the case of a temperature dependent Glen-type law.  (Here \f$T^*\f$ is the
         1385 pressure-adjusted temperature.)
         1386 
         1387 The effective viscosity is then
         1388    \f[ \nu = \frac{\bar B}{2} \left[\left(\frac{\partial u}{\partial x}\right)^2 +
         1389                                \left(\frac{\partial v}{\partial y}\right)^2 +
         1390                                \frac{\partial u}{\partial x} \frac{\partial v}{\partial y} +
         1391                                \frac{1}{4} \left(\frac{\partial u}{\partial y}
         1392                                                  + \frac{\partial v}{\partial x}\right)^2
         1393                                \right]^{(1-n)/(2n)}  \f]
         1394 where in the temperature-dependent case
         1395    \f[ \bar B = \frac{1}{H}\,\int_b^h B(T^*)\,dz\f]
         1396 This integral is approximately computed by the trapezoid rule.
         1397 
         1398 The result of this procedure is \f$\nu H\f$, not just \f$\nu\f$, this it is
         1399 a vertical integral, not average, of viscosity.
         1400 
         1401 The resulting effective viscosity times thickness is regularized by ensuring that
         1402 its minimum is at least \f$\epsilon\f$.  This regularization constant is an argument.
         1403 
         1404 In this implementation we set \f$\nu H\f$ to a constant anywhere the ice is
         1405 thinner than a certain minimum. See SSAStrengthExtension and compare how this
         1406 issue is handled when -cfbc is set.
         1407 */
         1408 void SSAFD::compute_nuH_staggered(const Geometry &geometry,
         1409                                   double nuH_regularization,
         1410                                   IceModelVec2Stag &result) {
         1411 
         1412   const IceModelVec2V &uv = m_velocity; // shortcut
         1413 
         1414   IceModelVec::AccessList list{&result, &uv, &m_hardness, &geometry.ice_thickness};
         1415 
         1416   double ssa_enhancement_factor = m_flow_law->enhancement_factor(),
         1417     n_glen = m_flow_law->exponent(),
         1418     nu_enhancement_scaling = 1.0 / pow(ssa_enhancement_factor, 1.0/n_glen);
         1419 
         1420   const double dx = m_grid->dx(), dy = m_grid->dy();
         1421 
         1422   for (int o=0; o<2; ++o) {
         1423     const int oi = 1 - o, oj=o;
         1424     for (Points p(*m_grid); p; p.next()) {
         1425       const int i = p.i(), j = p.j();
         1426 
         1427       const double H = 0.5 * (geometry.ice_thickness(i,j) + geometry.ice_thickness(i+oi,j+oj));
         1428 
         1429       if (H < strength_extension->get_min_thickness()) {
         1430         result(i,j,o) = strength_extension->get_notional_strength();
         1431         continue;
         1432       }
         1433 
         1434       double u_x, u_y, v_x, v_y;
         1435       // Check the offset to determine how to differentiate velocity
         1436       if (o == 0) {
         1437         u_x = (uv(i+1,j).u - uv(i,j).u) / dx;
         1438         u_y = (uv(i,j+1).u + uv(i+1,j+1).u - uv(i,j-1).u - uv(i+1,j-1).u) / (4*dy);
         1439         v_x = (uv(i+1,j).v - uv(i,j).v) / dx;
         1440         v_y = (uv(i,j+1).v + uv(i+1,j+1).v - uv(i,j-1).v - uv(i+1,j-1).v) / (4*dy);
         1441       } else {
         1442         u_x = (uv(i+1,j).u + uv(i+1,j+1).u - uv(i-1,j).u - uv(i-1,j+1).u) / (4*dx);
         1443         u_y = (uv(i,j+1).u - uv(i,j).u) / dy;
         1444         v_x = (uv(i+1,j).v + uv(i+1,j+1).v - uv(i-1,j).v - uv(i-1,j+1).v) / (4*dx);
         1445         v_y = (uv(i,j+1).v - uv(i,j).v) / dy;
         1446       }
         1447 
         1448       double nu = 0.0;
         1449       m_flow_law->effective_viscosity(m_hardness(i,j,o),
         1450                                       secondInvariant_2D(Vector2(u_x, v_x),
         1451                                                          Vector2(u_y, v_y)),
         1452                                       &nu, NULL);
         1453 
         1454       result(i,j,o) = nu * H;
         1455 
         1456       // include the SSA enhancement factor; in most cases ssa_enhancement_factor is 1
         1457       result(i,j,o) *= nu_enhancement_scaling;
         1458 
         1459       // We ensure that nuH is bounded below by a positive constant.
         1460       result(i,j,o) += nuH_regularization;
         1461 
         1462     } // i,j-loop
         1463   } // o-loop
         1464 
         1465 
         1466   // Some communication
         1467   result.update_ghosts();
         1468 }
         1469 
         1470 /**
         1471  * @brief Compute the product of ice viscosity and thickness on the
         1472  * staggered grid. Used when CFBC is enabled.
         1473  *
         1474  * @param[out] result nu*H product
         1475  * @param[in] nuH_regularization regularization parameter (added to nu*H to keep it away from zero)
         1476  *
         1477  * m_work storage scheme:
         1478  *
         1479  * m_work(i,j,0) - u_x on the i-offset
         1480  * m_work(i,j,1) - v_x on the i-offset
         1481  * m_work(i,j,2) - i-offset weight
         1482  * m_work(i,j,3) - u_y on the j-offset
         1483  * m_work(i,j,4) - v_y on the j-offset
         1484  * m_work(i,j,5) - j-offset weight
         1485  *
         1486  * @return 0 on success
         1487  */
         1488 void SSAFD::compute_nuH_staggered_cfbc(const Geometry &geometry,
         1489                                        double nuH_regularization,
         1490                                        IceModelVec2Stag &result) {
         1491 
         1492   const IceModelVec2S &thickness = geometry.ice_thickness;
         1493 
         1494   const IceModelVec2V &uv = m_velocity; // shortcut
         1495   double ssa_enhancement_factor = m_flow_law->enhancement_factor(),
         1496     n_glen = m_flow_law->exponent(),
         1497     nu_enhancement_scaling = 1.0 / pow(ssa_enhancement_factor, 1.0/n_glen);
         1498 
         1499   const unsigned int U_X = 0, V_X = 1, W_I = 2, U_Y = 3, V_Y = 4, W_J = 5;
         1500 
         1501   const double dx = m_grid->dx(), dy = m_grid->dy();
         1502 
         1503   IceModelVec::AccessList list{&m_mask, &m_work, &m_velocity};
         1504 
         1505   assert(m_velocity.stencil_width() >= 2);
         1506   assert(m_mask.stencil_width()    >= 2);
         1507   assert(m_work.stencil_width()     >= 1);
         1508 
         1509   for (PointsWithGhosts p(*m_grid); p; p.next()) {
         1510     const int i = p.i(), j = p.j();
         1511 
         1512     // x-derivative, i-offset
         1513     {
         1514       if (m_mask.icy(i,j) && m_mask.icy(i+1,j)) {
         1515         m_work(i,j,U_X) = (uv(i+1,j).u - uv(i,j).u) / dx; // u_x
         1516         m_work(i,j,V_X) = (uv(i+1,j).v - uv(i,j).v) / dx; // v_x
         1517         m_work(i,j,W_I) = 1.0;
         1518       } else {
         1519         m_work(i,j,U_X) = 0.0;
         1520         m_work(i,j,V_X) = 0.0;
         1521         m_work(i,j,W_I) = 0.0;
         1522       }
         1523     }
         1524 
         1525     // y-derivative, j-offset
         1526     {
         1527       if (m_mask.icy(i,j) && m_mask.icy(i,j+1)) {
         1528         m_work(i,j,U_Y) = (uv(i,j+1).u - uv(i,j).u) / dy; // u_y
         1529         m_work(i,j,V_Y) = (uv(i,j+1).v - uv(i,j).v) / dy; // v_y
         1530         m_work(i,j,W_J) = 1.0;
         1531       } else {
         1532         m_work(i,j,U_Y) = 0.0;
         1533         m_work(i,j,V_Y) = 0.0;
         1534         m_work(i,j,W_J) = 0.0;
         1535       }
         1536     }
         1537   }
         1538 
         1539   list.add({&result, &m_hardness, &thickness});
         1540 
         1541   for (Points p(*m_grid); p; p.next()) {
         1542     const int i = p.i(), j = p.j();
         1543 
         1544     double u_x, u_y, v_x, v_y, H, nu, W;
         1545     // i-offset
         1546     {
         1547       if (m_mask.icy(i,j) && m_mask.icy(i+1,j)) {
         1548         H = 0.5 * (thickness(i,j) + thickness(i+1,j));
         1549       }
         1550       else if (m_mask.icy(i,j)) {
         1551         H = thickness(i,j);
         1552       } else {
         1553         H = thickness(i+1,j);
         1554       }
         1555 
         1556       if (H >= strength_extension->get_min_thickness()) {
         1557         u_x = m_work(i,j,U_X);
         1558         v_x = m_work(i,j,V_X);
         1559 
         1560         W = m_work(i,j,W_J) + m_work(i,j-1,W_J) + m_work(i+1,j-1,W_J) + m_work(i+1,j,W_J);
         1561         if (W > 0) {
         1562           u_y = 1.0/W * (m_work(i,j,U_Y) + m_work(i,j-1,U_Y) +
         1563                          m_work(i+1,j-1,U_Y) + m_work(i+1,j,U_Y));
         1564           v_y = 1.0/W * (m_work(i,j,V_Y) + m_work(i,j-1,V_Y) +
         1565                          m_work(i+1,j-1,V_Y) + m_work(i+1,j,V_Y));
         1566         } else {
         1567           u_y = 0.0;
         1568           v_y = 0.0;
         1569         }
         1570 
         1571         m_flow_law->effective_viscosity(m_hardness(i,j,0),
         1572                                         secondInvariant_2D(Vector2(u_x, v_x),
         1573                                                            Vector2(u_y, v_y)),
         1574                                         &nu, NULL);
         1575         result(i,j,0) = nu * H;
         1576       } else {
         1577         result(i,j,0) = strength_extension->get_notional_strength();
         1578       }
         1579     }
         1580 
         1581     // j-offset
         1582     {
         1583       if (m_mask.icy(i,j) && m_mask.icy(i,j+1)) {
         1584         H = 0.5 * (thickness(i,j) + thickness(i,j+1));
         1585       } else if (m_mask.icy(i,j)) {
         1586         H = thickness(i,j);
         1587       } else {
         1588         H = thickness(i,j+1);
         1589       }
         1590 
         1591       if (H >= strength_extension->get_min_thickness()) {
         1592         u_y = m_work(i,j,U_Y);
         1593         v_y = m_work(i,j,V_Y);
         1594 
         1595         W = m_work(i,j,W_I) + m_work(i-1,j,W_I) + m_work(i-1,j+1,W_I) + m_work(i,j+1,W_I);
         1596         if (W > 0.0) {
         1597           u_x = 1.0/W * (m_work(i,j,U_X) + m_work(i-1,j,U_X) +
         1598                          m_work(i-1,j+1,U_X) + m_work(i,j+1,U_X));
         1599           v_x = 1.0/W * (m_work(i,j,V_X) + m_work(i-1,j,V_X) +
         1600                          m_work(i-1,j+1,V_X) + m_work(i,j+1,V_X));
         1601         } else {
         1602           u_x = 0.0;
         1603           v_x = 0.0;
         1604         }
         1605 
         1606         m_flow_law->effective_viscosity(m_hardness(i,j,1),
         1607                                         secondInvariant_2D(Vector2(u_x, v_x),
         1608                                                            Vector2(u_y, v_y)),
         1609                                         &nu, NULL);
         1610         result(i,j,1) = nu * H;
         1611       } else {
         1612         result(i,j,1) = strength_extension->get_notional_strength();
         1613       }
         1614     }
         1615 
         1616     // adjustments:
         1617     for (unsigned int o = 0; o < 2; ++o) {
         1618       // include the SSA enhancement factor; in most cases ssa_enhancement_factor is 1
         1619       result(i,j,o) *= nu_enhancement_scaling;
         1620 
         1621       // We ensure that nuH is bounded below by a positive constant.
         1622       result(i,j,o) += nuH_regularization;
         1623     }
         1624   }
         1625 
         1626   // Some communication
         1627   result.update_ghosts();
         1628 }
         1629 
         1630 //! Update the nuH viewer, which shows log10(nu H).
         1631 void SSAFD::update_nuH_viewers() {
         1632 
         1633   if (not m_view_nuh) {
         1634     return;
         1635   }
         1636 
         1637   IceModelVec2S tmp;
         1638   tmp.create(m_grid, "nuH", WITHOUT_GHOSTS);
         1639   tmp.set_attrs("temporary",
         1640                 "log10 of (viscosity * thickness)",
         1641                 "Pa s m", "Pa s m", "", 0);
         1642 
         1643   IceModelVec::AccessList list{&m_nuH, &tmp};
         1644 
         1645   for (Points p(*m_grid); p; p.next()) {
         1646     const int i = p.i(), j = p.j();
         1647 
         1648     double avg_nuH = 0.5 * (m_nuH(i,j,0) + m_nuH(i,j,1));
         1649     if (avg_nuH > 1.0e14) {
         1650       tmp(i,j) = log10(avg_nuH);
         1651     } else {
         1652       tmp(i,j) = 14.0;
         1653     }
         1654   }
         1655 
         1656   if (not m_nuh_viewer) {
         1657     m_nuh_viewer.reset(new petsc::Viewer(m_grid->com, "nuH", m_nuh_viewer_size,
         1658                                          m_grid->Lx(), m_grid->Ly()));
         1659   }
         1660 
         1661   tmp.view(m_nuh_viewer, petsc::Viewer::Ptr());
         1662 }
         1663 
         1664 void SSAFD::set_diagonal_matrix_entry(Mat A, int i, int j, int component,
         1665                                       double value) {
         1666   MatStencil row, col;
         1667 
         1668   row.i = i;
         1669   row.j = j;
         1670   row.c = component;
         1671 
         1672   col.i = i;
         1673   col.j = j;
         1674   col.c = component;
         1675 
         1676   PetscErrorCode ierr = MatSetValuesStencil(A, 1, &row, 1, &col, &value, INSERT_VALUES);
         1677   PISM_CHK(ierr, "MatSetValuesStencil");
         1678 }
         1679 
         1680 //! \brief Checks if a cell is near or at the ice front.
         1681 /*!
         1682  * You need to create IceModelVec::AccessList object and add mask to it.
         1683  *
         1684  * Note that a cell is a CFBC location of one of four direct neighbors is ice-free.
         1685  *
         1686  * If one of the diagonal neighbors is ice-free we don't use the CFBC, but we
         1687  * do need to compute weights used in the SSA discretization (see
         1688  * assemble_matrix()) to avoid differencing across interfaces between icy
         1689  * and ice-free cells.
         1690  *
         1691  * This method ensures that checks in assemble_rhs() and assemble_matrix() are
         1692  * consistent.
         1693  */
         1694 bool SSAFD::is_marginal(int i, int j, bool ssa_dirichlet_bc) {
         1695 
         1696   auto M = m_mask.int_box(i, j);
         1697 
         1698   if (ssa_dirichlet_bc) {
         1699     return icy(M.ij) &&
         1700       (ice_free(M.e) || ice_free(M.w) || ice_free(M.n) || ice_free(M.s) ||
         1701        ice_free(M.ne) || ice_free(M.se) || ice_free(M.nw) || ice_free(M.sw));
         1702   } else {
         1703     return icy(M.ij) &&
         1704       (ice_free_ocean(M.e) || ice_free_ocean(M.w) ||
         1705        ice_free_ocean(M.n) || ice_free_ocean(M.s) ||
         1706        ice_free_ocean(M.ne) || ice_free_ocean(M.se) ||
         1707        ice_free_ocean(M.nw) || ice_free_ocean(M.sw));
         1708   }
         1709 }
         1710 
         1711 void SSAFD::write_system_petsc(const std::string &namepart) {
         1712   PetscErrorCode ierr;
         1713 
         1714   // write a file with a fixed filename; avoid zillions of files
         1715   std::string filename = "SSAFD_" + namepart + ".petsc";
         1716   m_log->message(1,
         1717              "  writing linear system to PETSc binary file %s ...\n", filename.c_str());
         1718 
         1719   petsc::Viewer viewer;       // will be destroyed automatically
         1720   ierr = PetscViewerBinaryOpen(m_grid->com, filename.c_str(), FILE_MODE_WRITE,
         1721                                viewer.rawptr());
         1722   PISM_CHK(ierr, "PetscViewerBinaryOpen");
         1723 
         1724   ierr = MatView(m_A, viewer);
         1725   PISM_CHK(ierr, "MatView");
         1726 
         1727   ierr = VecView(m_b.vec(), viewer);
         1728   PISM_CHK(ierr, "VecView");
         1729 }
         1730 
         1731 SSAFD_nuH::SSAFD_nuH(const SSAFD *m)
         1732   : Diag<SSAFD>(m) {
         1733 
         1734   // set metadata:
         1735   m_vars = {SpatialVariableMetadata(m_sys, "nuH[0]"),
         1736             SpatialVariableMetadata(m_sys, "nuH[1]")};
         1737 
         1738   set_attrs("ice thickness times effective viscosity, i-offset", "",
         1739             "Pa s m", "kPa s m", 0);
         1740   set_attrs("ice thickness times effective viscosity, j-offset", "",
         1741             "Pa s m", "kPa s m", 1);
         1742 }
         1743 
         1744 IceModelVec::Ptr SSAFD_nuH::compute_impl() const {
         1745 
         1746   IceModelVec2Stag::Ptr result(new IceModelVec2Stag);
         1747   result->create(m_grid, "nuH", WITH_GHOSTS);
         1748   result->metadata(0) = m_vars[0];
         1749   result->metadata(1) = m_vars[1];
         1750 
         1751   result->copy_from(model->integrated_viscosity());
         1752 
         1753   return result;
         1754 }
         1755 
         1756 DiagnosticList SSAFD::diagnostics_impl() const {
         1757   DiagnosticList result = SSA::diagnostics_impl();
         1758 
         1759   result["nuH"] = Diagnostic::Ptr(new SSAFD_nuH(this));
         1760 
         1761   return result;
         1762 }
         1763 
         1764 const IceModelVec2Stag & SSAFD::integrated_viscosity() const {
         1765   return m_nuH;
         1766 }
         1767 
         1768 
         1769 } // end of namespace stressbalance
         1770 } // end of namespace pism