Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
mperego committed Sep 26, 2024
1 parent 54dcd9a commit 29fba51
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ getValues( OutputViewType output,
}
case OPERATOR_CURL: {
const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, spaceDim);
ptr += card*npts*spaceDim*get_dimension_scalar(work);
ptr += card*npts*spaceDim*get_dimension_scalar(input);
const ViewType workView(Kokkos::view_wrap(ptr, vcprop), card, npts, spaceDim+1);

Impl::Basis_HGRAD_TET_Cn_FEM_ORTH::
Expand Down Expand Up @@ -593,7 +593,7 @@ Basis_HCURL_TET_In_FEM<DT,OT,PT>::getValues(
const int numPoints = inputPoints.extent(0);
using ScalarType = typename ScalarTraits<typename PointViewType::value_type>::scalar_type;
using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
ordinal_type scalarSizePerPoint = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 5*this->basisCardinality_;
ordinal_type scalarSizePerPoint = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 7*this->basisCardinality_;
ordinal_type sizePerPoint = scalarSizePerPoint*get_dimension_scalar(inputPoints);
WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size());
using range_type = Kokkos::pair<ordinal_type,ordinal_type>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ namespace Intrepid2 {
}
case OPERATOR_CURL: {
const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, spaceDim);
ptr += card*npts*spaceDim*get_dimension_scalar(work);
ptr += card*npts*spaceDim*get_dimension_scalar(input);
const ViewType workView(Kokkos::view_wrap(ptr, vcprop), card, npts, spaceDim+1);

Impl::Basis_HGRAD_TRI_Cn_FEM_ORTH::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ getValues( OutputViewType output,
}
case OPERATOR_CURL: { // only works in 2d. first component is -d/dy, second is d/dx
const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, spaceDim);
ptr += card*npts*spaceDim*get_dimension_scalar(work);
ptr += card*npts*spaceDim*get_dimension_scalar(input);
const ViewType workView(Kokkos::view_wrap(ptr, vcprop), card, npts, spaceDim+1);


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,20 @@ namespace Intrepid2 {
int scratch_space_level =1;
const int vectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
Kokkos::TeamPolicy<DeviceSpaceType> teamPolicy(ncells, Kokkos::AUTO,vectorSize);
{
// avoid using a team size larger than needed, to reduce allocated scrach space memory
ordinal_type team_size = teamPolicy.team_size_recommended(functor, Kokkos::ParallelForTag());
*outStream << "Max Recommended team size: " << team_size << ", Requested team size: " << numPoints <<std::endl;
team_size = std::min(team_size, numPoints);
teamPolicy = Kokkos::TeamPolicy<typename DeviceType::execution_space>(numCells, team_size,vectorSize);
}

{ //compute values
auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy<DeviceSpaceType>::member_type team_member) {
auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level));
};
};

//Get the required size of the scratch space per team and per thread.
int perThreadSpaceSize(0), perTeamSpaceSize(0);
basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ namespace Intrepid2 {
int scratch_space_level =1;
const int vectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
Kokkos::TeamPolicy<DeviceSpaceType> teamPolicy(ncells, Kokkos::AUTO,vectorSize);
{ // avoid using a team size larger than needed, to reduce allocated scrach space memory
ordinal_type team_size = teamPolicy.team_size_recommended(functor, Kokkos::ParallelForTag());
*outStream << "Max Recommended team size: " << team_size << ", Requested team size: " << numPoints <<std::endl;
team_size = std::min(team_size, numPoints);
teamPolicy = Kokkos::TeamPolicy<typename DeviceType::execution_space>(numCells, team_size,vectorSize);
}

{ //compute values
auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy<DeviceSpaceType>::member_type team_member) {
Expand Down

0 comments on commit 29fba51

Please sign in to comment.