ice_dyn_vp: add global_norm, global_dot_product functions

The VP solver uses a linear solver, FGMRES, as part of the non-linear iteration. The FGMRES algorithm involves computing the norm of a distributed vector field, thus performing global sums. These norms are computed by first summing the squared X and Y components of a vector field in subroutine 'calc_L2norm_squared', summing these over the local blocks, and then doing a global (MPI) sum using 'global_sum'. This approach does not lead to reproducible results when the MPI distribution, or the number of local blocks, is changed, for reasons explained in the "Reproducible sums" section of the Developer Guide (mostly, floating point addition is not associative). This was partly pointed out in [1] but I failed to realize it at the time. Introduce a new function, 'global_dot_product', to encapsulate the computation of the dot product of two grid vectors, each split into two arrays (for the X and Y components). Compute the reduction locally as is done in 'calc_L2norm_squared', but throw away the result and use the existing 'global_sum' function when 'bfbflag' is active, passing it the temporary array used to compute the element-by-element product. This approach avoids a performance regression from the added work done in 'global_sum', such that non-bfbflag runs are as fast as before. Note that since 'global_sum' loops on the whole array (and not just ice points as 'global_dot_product'), make sure to zero-initialize the 'prod' local array. Also add a 'global_norm' function implemented using 'global_dot_product'. Both functions will be used in subsequent commits to ensure bit-for-bit reproducibility.
CICE-Consortium · Oct 5, 2022 · b96ea5b · b96ea5b
1 parent fd54d5d
commit b96ea5b
Showing 1 changed file with 118 additions and 0 deletions.
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -2531,6 +2531,124 @@ end subroutine calc_L2norm_squared
 
 !=======================================================================
 
+! Compute global l^2 norm of a vector field (field_x, field_y)
+
+      function global_norm (nx_block, ny_block, &
+                            icellu  ,           &
+                            indxui  , indxuj  , &
+                            field_x , field_y ) &
+               result(norm)
+
+      use ice_domain, only: distrb_info
+      use ice_domain_size, only: max_blocks
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block    ! block dimensions
+
+      integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
+         icellu                ! total count when iceumask is true
+
+      integer (kind=int_kind), dimension (nx_block*ny_block,max_blocks), intent(in) :: &
+         indxui  , & ! compressed index in i-direction
+         indxuj      ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
+         field_x     , & ! x-component of vector field
+         field_y         ! y-component of vector field
+
+      real (kind=dbl_kind) :: &
+         norm      ! l^2 norm of vector field
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij, iblk
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
+         squared      ! temporary array for summed squared components
+
+      character(len=*), parameter :: subname = '(global_norm)'
+
+      norm = sqrt(global_dot_product (nx_block  , ny_block , &
+                                      icellu    ,            &
+                                      indxui    , indxuj   , &
+                                      field_x   , field_y  , &
+                                      field_x   , field_y  ))
+
+      end function global_norm
+
+!=======================================================================
+
+! Compute global dot product of two grid vectors, each split into X and Y components
+
+      function global_dot_product (nx_block  , ny_block , &
+                                   icellu    ,            &
+                                   indxui    , indxuj   , &
+                                   vector1_x , vector1_y, &
+                                   vector2_x , vector2_y) &
+               result(dot_product)
+
+      use ice_domain, only: distrb_info
+      use ice_domain_size, only: max_blocks
+      use ice_fileunits, only: bfbflag
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block    ! block dimensions
+
+      integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
+         icellu                ! total count when iceumask is true
+
+      integer (kind=int_kind), dimension (nx_block*ny_block,max_blocks), intent(in) :: &
+         indxui  , & ! compressed index in i-direction
+         indxuj      ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
+         vector1_x     , & ! x-component of first vector
+         vector1_y     , & ! y-component of first vector
+         vector2_x     , & ! x-component of second vector
+         vector2_y         ! y-component of second vector
+
+      real (kind=dbl_kind) :: &
+         dot_product      ! l^2 norm of vector field
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij, iblk
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
+         prod      ! temporary array
+
+      real (kind=dbl_kind), dimension(max_blocks) :: &
+         dot       ! temporary scalar for accumulating the result
+
+      character(len=*), parameter :: subname = '(global_dot_product)'
+
+      prod = c0
+      dot  = c0
+
+      !$OMP PARALLEL DO PRIVATE(i, j, ij, iblk)
+      do iblk = 1, nblocks
+         do ij = 1, icellu(iblk)
+            i = indxui(ij, iblk)
+            j = indxuj(ij, iblk)
+            prod(i,j,iblk) = vector1_x(i,j,iblk)*vector2_x(i,j,iblk) + vector1_y(i,j,iblk)*vector2_y(i,j,iblk)
+            dot(iblk) = dot(iblk) + prod(i,j,iblk)
+         enddo ! ij
+      enddo
+      !$OMP END PARALLEL DO
+
+      ! Use local summation result unless bfbflag is active
+      if (bfbflag == 'off') then
+         dot_product = global_sum(sum(dot), distrb_info)
+      else
+         dot_product = global_sum(prod, distrb_info, field_loc_NEcorner)
+      endif
+
+      end function global_dot_product
+
+!=======================================================================
+
 ! Convert a grid function (tpu,tpv) to a one dimensional vector
 
       subroutine arrays_to_vec (nx_block, ny_block  , &