Skip to content

Commit

Permalink
ice_dyn_vp: use global_{norm,dot_product} for bit-for-bit output repr…
Browse files Browse the repository at this point in the history
…oducibility

Make the results of the VP solver reproducible if desired by refactoring
the code to use the subroutines 'global_norm' and 'global_dot_product'
added in the previous commit.

The same pattern appears in the FGMRES solver (subroutine 'fgmres'), the
preconditioner 'pgmres' which uses the same algorithm, and the
Classical and Modified Gram-Schmidt algorithms in 'orthogonalize'.

These modifications do not change the number of global sums in the fgmres,
pgmres and the MGS algorithm. For the CGS algorithm, there is a slight
performance impact as 'global_dot_product' is called inside the loop,
whereas previously we called 'global_allreduce_sum' after the loop to
compute all 'initer' sums at the same time.

To keep that optimization, we would have to implement a new interface
'global_allreduce_sum' which would take an array of shape
(nx_block,ny_block,max_blocks,k) and sum over their first three
dimensions before performing the global reduction over the k dimension.

We choose to not go that route for now mostly because anyway the CGS
algorithm is (by default) only used for the PGMRES preconditioner, and
so the cost should be relatively low as 'initer' corresponds to
'dim_pgmres' in the namelist, which should be kept low for efficiency
(default 5).

These changes lead to bit-for-bit reproducibility (the decomp_suite
passes) when using 'precond=ident' and 'precond=diag' along with
'bfbflag=reprosum'. 'precond=pgmres' is still not bit-for-bit because
some halo updates are skipped for efficiency. This will be addressed in
a following commit.

[1] #491 (comment)
  • Loading branch information
phil-blain committed Oct 5, 2022
1 parent b96ea5b commit c37c645
Showing 1 changed file with 38 additions and 100 deletions.
138 changes: 38 additions & 100 deletions cicecore/cicedynB/dynamics/ice_dyn_vp.F90
Original file line number Diff line number Diff line change
Expand Up @@ -2939,18 +2939,10 @@ subroutine fgmres (zetax2 , etax2 , &
arnoldi_basis_y = c0

! solution is zero if RHS is zero
!$OMP PARALLEL DO PRIVATE(iblk)
do iblk = 1, nblocks
call calc_L2norm_squared(nx_block , ny_block , &
icellu (iblk), &
indxui (:,iblk), indxuj(:,iblk), &
bx(:,:,iblk) , &
by(:,:,iblk) , &
norm_squared(iblk))

enddo
!$OMP END PARALLEL DO
norm_rhs = sqrt(global_sum(sum(norm_squared), distrb_info))
norm_rhs = global_norm(nx_block, ny_block, &
icellu , &
indxui , indxuj , &
bx , by )
if (norm_rhs == c0) then
solx = bx
soly = by
Expand Down Expand Up @@ -2988,18 +2980,11 @@ subroutine fgmres (zetax2 , etax2 , &
! Start outer (restarts) loop
do
! Compute norm of initial residual
!$OMP PARALLEL DO PRIVATE(iblk)
do iblk = 1, nblocks
call calc_L2norm_squared(nx_block , ny_block , &
icellu (iblk), &
indxui (:,iblk), indxuj(:,iblk), &
arnoldi_basis_x(:,:,iblk, 1) , &
arnoldi_basis_y(:,:,iblk, 1) , &
norm_squared(iblk))

enddo
!$OMP END PARALLEL DO
norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
norm_residual = global_norm(nx_block, ny_block, &
icellu , &
indxui , indxuj , &
arnoldi_basis_x(:,:,:, 1), &
arnoldi_basis_y(:,:,:, 1))

if (my_task == master_task .and. monitor_fgmres) then
write(nu_diag, '(a,i4,a,d26.16)') "monitor_fgmres: iter_fgmres= ", nbiter, &
Expand Down Expand Up @@ -3093,17 +3078,11 @@ subroutine fgmres (zetax2 , etax2 , &
hessenberg)

! Compute norm of new Arnoldi vector and update Hessenberg matrix
!$OMP PARALLEL DO PRIVATE(iblk)
do iblk = 1, nblocks
call calc_L2norm_squared(nx_block , ny_block , &
icellu (iblk), &
indxui (:,iblk), indxuj(:, iblk) , &
arnoldi_basis_x(:,:,iblk, nextit), &
arnoldi_basis_y(:,:,iblk, nextit), &
norm_squared(iblk))
enddo
!$OMP END PARALLEL DO
hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
hessenberg(nextit,initer) = global_norm(nx_block, ny_block, &
icellu , &
indxui , indxuj , &
arnoldi_basis_x(:,:,:, nextit), &
arnoldi_basis_y(:,:,:, nextit))

! Watch out for happy breakdown
if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
Expand Down Expand Up @@ -3381,18 +3360,11 @@ subroutine pgmres (zetax2 , etax2 , &
! Start outer (restarts) loop
do
! Compute norm of initial residual
!$OMP PARALLEL DO PRIVATE(iblk)
do iblk = 1, nblocks
call calc_L2norm_squared(nx_block , ny_block , &
icellu (iblk), &
indxui (:,iblk), indxuj(:, iblk), &
arnoldi_basis_x(:,:,iblk, 1), &
arnoldi_basis_y(:,:,iblk, 1), &
norm_squared(iblk))

enddo
!$OMP END PARALLEL DO
norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
norm_residual = global_norm(nx_block, ny_block, &
icellu , &
indxui , indxuj , &
arnoldi_basis_x(:,:,:, 1), &
arnoldi_basis_y(:,:,:, 1))

if (my_task == master_task .and. monitor_pgmres) then
write(nu_diag, '(a,i4,a,d26.16)') "monitor_pgmres: iter_pgmres= ", nbiter, &
Expand Down Expand Up @@ -3475,17 +3447,11 @@ subroutine pgmres (zetax2 , etax2 , &
hessenberg)

! Compute norm of new Arnoldi vector and update Hessenberg matrix
!$OMP PARALLEL DO PRIVATE(iblk)
do iblk = 1, nblocks
call calc_L2norm_squared(nx_block , ny_block , &
icellu (iblk), &
indxui (:,iblk), indxuj(:, iblk) , &
arnoldi_basis_x(:,:,iblk, nextit), &
arnoldi_basis_y(:,:,iblk, nextit), &
norm_squared(iblk))
enddo
!$OMP END PARALLEL DO
hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
hessenberg(nextit,initer) = global_norm(nx_block, ny_block, &
icellu , &
indxui , indxuj , &
arnoldi_basis_x(:,:,:, nextit), &
arnoldi_basis_y(:,:,:, nextit))

! Watch out for happy breakdown
if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
Expand Down Expand Up @@ -3764,39 +3730,20 @@ subroutine orthogonalize(ortho_type , initer , &
ij , & ! compressed index
i, j ! grid indices

real (kind=dbl_kind), dimension (max_blocks) :: &
local_dot ! local array value to accumulate dot product of grid function over blocks

real (kind=dbl_kind), dimension(maxinner) :: &
dotprod_local ! local array to accumulate several dot product computations

character(len=*), parameter :: subname = '(orthogonalize)'

if (trim(ortho_type) == 'cgs') then ! Classical Gram-Schmidt
! Classical Gram-Schmidt orthogonalisation process
! First loop of Gram-Schmidt (compute coefficients)
dotprod_local = c0
do it = 1, initer
local_dot = c0

!$OMP PARALLEL DO PRIVATE(iblk,ij,i,j)
do iblk = 1, nblocks
do ij = 1, icellu(iblk)
i = indxui(ij, iblk)
j = indxuj(ij, iblk)

local_dot(iblk) = local_dot(iblk) + &
(arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
(arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
enddo ! ij
enddo
!$OMP END PARALLEL DO

dotprod_local(it) = sum(local_dot)
hessenberg(it, initer) = global_dot_product(nx_block, ny_block, &
icellu , &
indxui , indxuj , &
arnoldi_basis_x(:,:,:, it) , &
arnoldi_basis_y(:,:,:, it) , &
arnoldi_basis_x(:,:,:, nextit), &
arnoldi_basis_y(:,:,:, nextit))
end do

hessenberg(1:initer, initer) = global_allreduce_sum(dotprod_local(1:initer), distrb_info)

! Second loop of Gram-Schmidt (orthonormalize)
do it = 1, initer
!$OMP PARALLEL DO PRIVATE(iblk,ij,i,j)
Expand All @@ -3816,22 +3763,13 @@ subroutine orthogonalize(ortho_type , initer , &
elseif (trim(ortho_type) == 'mgs') then ! Modified Gram-Schmidt
! Modified Gram-Schmidt orthogonalisation process
do it = 1, initer
local_dot = c0

!$OMP PARALLEL DO PRIVATE(iblk,ij,i,j)
do iblk = 1, nblocks
do ij = 1, icellu(iblk)
i = indxui(ij, iblk)
j = indxuj(ij, iblk)

local_dot(iblk) = local_dot(iblk) + &
(arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
(arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
enddo ! ij
enddo
!$OMP END PARALLEL DO

hessenberg(it,initer) = global_sum(sum(local_dot), distrb_info)
hessenberg(it, initer) = global_dot_product(nx_block, ny_block, &
icellu , &
indxui , indxuj , &
arnoldi_basis_x(:,:,:, it) , &
arnoldi_basis_y(:,:,:, it) , &
arnoldi_basis_x(:,:,:, nextit), &
arnoldi_basis_y(:,:,:, nextit))

!$OMP PARALLEL DO PRIVATE(iblk,ij,i,j)
do iblk = 1, nblocks
Expand Down

0 comments on commit c37c645

Please sign in to comment.