Skip to content

Commit

Permalink
GPU geometry transfer fix and stack limit configuration option (#312)
Browse files Browse the repository at this point in the history
- Fix for the copy of the surface model to GPU, which was being called
from all threads
- Removed the hardcoded device stack limit and added a configuration
option in the macro
- Added the line `SET(CMAKE_EXE_LINKER_FLAGS "-Wl,--disable-new-dtags")`
to `Example1/CMakeLists.txt` which solves an issue where Geant4
libraries would not be properly linked in some systems
  • Loading branch information
JuanGonzalezCaminero committed Sep 16, 2024
1 parent c2a5fe8 commit 5ea9d08
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 19 deletions.
5 changes: 4 additions & 1 deletion examples/Example1/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: 2023 CERN
# SPDX-License-Identifier: Apache-2.0

SET(CMAKE_EXE_LINKER_FLAGS "-Wl,--disable-new-dtags")

if(NOT TARGET G4HepEm::g4HepEm)
message(STATUS "Disabling example1 (needs G4HepEm)")
return()
Expand Down Expand Up @@ -65,12 +67,13 @@ target_link_libraries(example1
# Install macros and geometry file
SET(GDML ${PROJECT_BINARY_DIR}/cms2018_sd.gdml)
configure_file("macros/example1.mac.in" "${PROJECT_BINARY_DIR}/example1.mac")
configure_file("macros/example1_large_stack.mac.in" "${PROJECT_BINARY_DIR}/example1_large_stack.mac")
configure_file("macros/example1_ttbar.mac.in" "${PROJECT_BINARY_DIR}/example1_ttbar.mac")
configure_file("macros/example1_ttbar_LHCb.mac.in" "${PROJECT_BINARY_DIR}/example1_ttbar_LHCb.mac")
configure_file("macros/example1_ttbar_noadept.mac.in" "${PROJECT_BINARY_DIR}/example1_ttbar_noadept.mac")

# Tests

add_test(NAME example1
COMMAND $<TARGET_FILE:example1> -m ${PROJECT_BINARY_DIR}/example1.mac
COMMAND $<TARGET_FILE:example1> -m ${PROJECT_BINARY_DIR}/example1_large_stack.mac
)
1 change: 1 addition & 0 deletions examples/Example1/macros/example1.mac.in
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
## Total number of GPU track slots (not per thread)
/adept/setMillionsOfTrackSlots 1
/adept/setMillionsOfHitSlots 1
# /adept/setCUDAStackLimit 4096

# If true, particles are transported on the GPU across the whole geometry, GPU regions are ignored
/adept/setTrackInAllRegions true
Expand Down
78 changes: 78 additions & 0 deletions examples/Example1/macros/example1_large_stack.mac.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# SPDX-FileCopyrightText: 2023 CERN
# SPDX-License-Identifier: Apache-2.0
# example23.in
#

## =============================================================================
## Geant4 macro for modelling simplified sampling calorimeters
## =============================================================================
##
/run/numberOfThreads 1
/control/verbose 0
/run/verbose 0
/process/verbose 0
/tracking/verbose 0
/event/verbose 0
##
/adept/setSeed 1

/detector/filename @GDML@
# Temporary workaround since we don't have a G4 to VecGeom converter
/adept/setVecGeomGDML @GDML@
/adept/setVerbosity 0
## Threshold for buffering tracks before sending to GPU
/adept/setTransportBufferThreshold 2000
## Total number of GPU track slots (not per thread)
/adept/setMillionsOfTrackSlots 1
/adept/setMillionsOfHitSlots 1
/adept/setCUDAStackLimit 8192

# If true, particles are transported on the GPU across the whole geometry, GPU regions are ignored
/adept/setTrackInAllRegions true
# In order to do the GPU transport only in specific regions
/adept/addGPURegion EcalRegion
/adept/addGPURegion HcalRegion


## -----------------------------------------------------------------------------
## Optionally, set a constant magnetic filed:
## -----------------------------------------------------------------------------
/detector/setField 0 0 0 tesla
#/detector/setField 0 0 3.8 tesla

## -----------------------------------------------------------------------------
## Set secondary production threshold, init. the run and set primary properties
## -----------------------------------------------------------------------------
/run/setCut 0.7 mm
/run/initialize

## User-defined Event verbosity: 1 = total edep, 2 = energy deposit per placed sensitive volume
/eventAction/verbose 2

/gun/setDefault
/gun/particle e-
/gun/energy 10 GeV
/gun/number 200
/gun/position 0 0 0
/gun/print true

# If false, the following parameters are ignored
/gun/randomizeGun true
# Usage: /gun/addParticle type ["weight" weight] ["energy" energy unit]
/gun/addParticle e- weight 1 energy 10 GeV
/gun/addParticle proton weight 0 energy 10 GeV
/gun/minPhi 0 deg
/gun/maxPhi 360 deg
/gun/minTheta 10 deg
/gun/maxTheta 170 deg

## -----------------------------------------------------------------------------
## Run the simulation with the given number of events and print list of processes
## -----------------------------------------------------------------------------

/adept/setSeed 1

# run events with parametrised simulation
# by default all created models are active
/run/beamOn 1

1 change: 1 addition & 0 deletions examples/Example1/macros/example1_ttbar.mac.in
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
## Total number of GPU track slots (not per thread)
/adept/setMillionsOfTrackSlots 8
/adept/setMillionsOfHitSlots 1
# /adept/setCUDAStackLimit 4096

# If true, particles are transported on the GPU across the whole geometry, GPU regions are ignored
/adept/setTrackInAllRegions true
Expand Down
2 changes: 2 additions & 0 deletions examples/Example1/macros/example1_ttbar_LHCb.mac.in
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
## Total number of GPU track slots (not per thread)
/adept/setMillionsOfTrackSlots 8
/adept/setMillionsOfHitSlots 1
## Device stack limit
# /adept/setCUDAStackLimit 4096

# If true, particles are transported on the GPU across the whole geometry, GPU regions are ignored
/adept/setTrackInAllRegions true
Expand Down
13 changes: 8 additions & 5 deletions include/AdePT/core/AdePTConfiguration.hh
Original file line number Diff line number Diff line change
Expand Up @@ -21,32 +21,35 @@ public:
void SetMillionsOfTrackSlots(double millionSlots) { fMillionsOfTrackSlots = millionSlots; }
void SetMillionsOfHitSlots(double millionSlots) { fMillionsOfHitSlots = millionSlots; }
void SetHitBufferFlushThreshold(float threshold) { fHitBufferFlushThreshold = threshold; }
void SetCUDAStackLimit(int limit) { fCUDAStackLimit = limit; }

// We temporarily load VecGeom geometry from GDML
void SetVecGeomGDML(std::string filename) { fVecGeomGDML = filename; }

bool GetTrackInAllRegions() { return fTrackInAllRegions; }
std::vector<std::string> *GetGPURegionNames() { return &fGPURegionNames; }
bool IsAdePTActivated() { return fAdePTActivated; }
int GetVerbosity() { return fVerbosity; };
int GetTransportBufferThreshold() { return fTransportBufferThreshold; }
int GetCUDAStackLimit() { return fCUDAStackLimit; }
float GetHitBufferFlushThreshold() { return fHitBufferFlushThreshold; }
double GetMillionsOfTrackSlots() { return fMillionsOfTrackSlots; }
double GetMillionsOfHitSlots() { return fMillionsOfHitSlots; }
float GetHitBufferFlushThreshold() { return fHitBufferFlushThreshold; }
std::vector<std::string> *GetGPURegionNames() { return &fGPURegionNames; }

// Temporary
std::string GetVecGeomGDML() { return fVecGeomGDML; }

private:
int fRandomSeed;
bool fTrackInAllRegions{false};
std::vector<std::string> fGPURegionNames{};
bool fAdePTActivated{true};
int fRandomSeed;
int fVerbosity{0};
int fTransportBufferThreshold{200};
int fCUDAStackLimit{0};
float fHitBufferFlushThreshold{0.8};
double fMillionsOfTrackSlots{1};
double fMillionsOfHitSlots{1};
float fHitBufferFlushThreshold{0.8};
std::vector<std::string> fGPURegionNames{};

std::string fVecGeomGDML{""};

Expand Down
21 changes: 11 additions & 10 deletions include/AdePT/core/AdePTTransport.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,9 @@ __global__ void InitTracks(adeptint::TrackData *trackinfo, int ntracks, int star
track.navState.SetBoundaryState(true);
// nextState is initialized as needed.
#ifndef ADEPT_USE_SURF
int lvolID = track.navState.Top()->GetLogicalVolume()->id();
int lvolID = track.navState.Top()->GetLogicalVolume()->id();
#else
int lvolID = track.navState.GetLogicalId();
int lvolID = track.navState.GetLogicalId();
#endif
assert(auxDataArray[lvolID].fGPUregion);
}
Expand Down Expand Up @@ -202,8 +202,6 @@ __global__ void ClearLeakedQueues(LeakedTracks all)

bool InitializeField(double bz)
{
// Try 16384 if debug mode is crashing
COPCORE_CUDA_CHECK(vecgeom::cxx::CudaDeviceSetStackLimit(8192 * 2));
// Initialize field
COPCORE_CUDA_CHECK(cudaMemcpyToSymbol(BzFieldValue, &bz, sizeof(double)));
return true;
Expand All @@ -225,13 +223,9 @@ void PrepareLeakedBuffers(int numLeaked, adeptint::TrackBuffer &buffer, GPUstate
}
}

GPUstate *InitializeGPU(adeptint::TrackBuffer &buffer, int capacity, int maxbatch)
void CopySurfaceModelToGPU()
{
using TrackData = adeptint::TrackData;
auto gpuState_ptr = new GPUstate;
auto &gpuState = *gpuState_ptr;

// Copy surface data to GPU
// Copy surface data to GPU
#ifdef ADEPT_USE_SURF
#ifdef ADEPT_USE_SURF_SINGLE
using SurfData = vgbrep::SurfData<float>;
Expand All @@ -243,6 +237,13 @@ GPUstate *InitializeGPU(adeptint::TrackBuffer &buffer, int capacity, int maxbatc
BrepCudaManager::Instance().TransferSurfData(SurfData::Instance());
printf("== Surface data transferred to GPU\n");
#endif
}

GPUstate *InitializeGPU(adeptint::TrackBuffer &buffer, int capacity, int maxbatch)
{
using TrackData = adeptint::TrackData;
auto gpuState_ptr = new GPUstate;
auto &gpuState = *gpuState_ptr;
// Allocate track managers, streams and synchronization events.
const size_t kQueueSize = MParrayTracks::SizeOfInstance(capacity);
// Create a stream to synchronize kernels of all particle types.
Expand Down
3 changes: 3 additions & 0 deletions include/AdePT/core/AdePTTransport.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ class AdePTTransport {
bool GetTrackInAllRegions() { return fTrackInAllRegions; }
/// @brief Set Geant4 region to which it applies
void SetGPURegionNames(std::vector<std::string> *regionNames) { fGPURegionNames = regionNames; }
/// @brief Set CUDA device stack limit
void SetCUDAStackLimit(int limit) { fCUDAStackLimit = limit; }
std::vector<std::string> *GetGPURegionNames() { return fGPURegionNames; }
/// @brief Create material-cut couple index array
/// @brief Initialize service and copy geometry & physics data on device
Expand All @@ -83,6 +85,7 @@ class AdePTTransport {
int fNumSensitive{0}; ///< Total number of sensitive volumes
int fBufferThreshold{20}; ///< Buffer threshold for flushing AdePT transport buffer
int fDebugLevel{1}; ///< Debug level
int fCUDAStackLimit{0}; ///< CUDA device stack limit
GPUstate *fGPUstate{nullptr}; ///< CUDA state placeholder
AdeptScoring *fScoring{nullptr}; ///< User scoring object
AdeptScoring *fScoring_dev{nullptr}; ///< Device ptr for scoring data
Expand Down
14 changes: 11 additions & 3 deletions include/AdePT/core/AdePTTransport.icc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ void FreeVolAuxArray(adeptint::VolAuxArray &);
G4HepEmState *InitG4HepEm();
GPUstate *InitializeGPU(TrackBuffer &, int, int);
AdeptScoring *InitializeScoringGPU(AdeptScoring *scoring);
void CopySurfaceModelToGPU();
void FreeGPU(GPUstate &, G4HepEmState *);
template <typename IntegrationLayer>
void ShowerGPU(IntegrationLayer &integration, int event, TrackBuffer &buffer, GPUstate &gpuState, AdeptScoring *scoring,
Expand Down Expand Up @@ -67,11 +68,18 @@ template <typename IntegrationLayer>
bool AdePTTransport<IntegrationLayer>::InitializeGeometry(const vecgeom::cxx::VPlacedVolume *world)
{
auto &cudaManager = vecgeom::cxx::CudaManager::Instance();
bool success = true;
if(fCUDAStackLimit > 0)
{
std::cout << "CUDA Device stack limit: " << fCUDAStackLimit << "\n";
cudaDeviceSetLimit(cudaLimitStackSize, fCUDAStackLimit);
}
bool success = true;
#ifdef ADEPT_USE_SURF
#ifdef ADEPT_USE_SURF_SINGLE
using SurfData = vgbrep::SurfData<float>;
using BrepHelper = vgbrep::BrepHelper<float>;
#else
using SurfData = vgbrep::SurfData<double>;
using BrepHelper = vgbrep::BrepHelper<double>;
#endif
vecgeom::Stopwatch timer;
Expand All @@ -81,15 +89,15 @@ bool AdePTTransport<IntegrationLayer>::InitializeGeometry(const vecgeom::cxx::VP
std::cout << "== Conversion to surface model done in " << timer.Stop() << " [s]\n";
// Upload only navigation table to the GPU
cudaManager.SynchronizeNavigationTable();
adept_impl::CopySurfaceModelToGPU();
#else
// Upload solid geometry to GPU.
cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 8);
cudaManager.LoadGeometry(world);
auto world_dev = cudaManager.Synchronize();
success = world_dev != nullptr;
#endif
// Initialize BVH
InitBVH();
#endif
return success;
}

Expand Down
1 change: 1 addition & 0 deletions include/AdePT/integration/AdePTConfigurationMessenger.hh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ private:

G4UIdirectory *fDir;
G4UIcmdWithAnInteger *fSetSeedCmd;
G4UIcmdWithAnInteger *fSetCUDAStackLimitCmd;
G4UIcmdWithABool *fSetTrackInAllRegionsCmd;
G4UIcmdWithAString *fAddRegionCmd;
G4UIcmdWithABool *fActivateAdePTCmd;
Expand Down
6 changes: 6 additions & 0 deletions src/AdePTConfigurationMessenger.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ AdePTConfigurationMessenger::AdePTConfigurationMessenger(AdePTConfiguration *ade

fSetGDMLCmd = new G4UIcmdWithAString("/adept/setVecGeomGDML", this);
fSetGDMLCmd->SetGuidance("Temporary method for setting the geometry to use with VecGeom");

fSetCUDAStackLimitCmd = new G4UIcmdWithAnInteger("/adept/setCUDAStackLimit", this);
fSetCUDAStackLimitCmd->SetGuidance("Set the CUDA device stack limit");
}

//....oooOO0OOooo........oooOO0OOooo........oooOO0OOooo........oooOO0OOooo......
Expand All @@ -66,6 +69,7 @@ AdePTConfigurationMessenger::~AdePTConfigurationMessenger()
{
delete fDir;
delete fSetSeedCmd;
delete fSetCUDAStackLimitCmd;
delete fSetTrackInAllRegionsCmd;
delete fAddRegionCmd;
delete fActivateAdePTCmd;
Expand Down Expand Up @@ -101,6 +105,8 @@ void AdePTConfigurationMessenger::SetNewValue(G4UIcommand *command, G4String new
fAdePTConfiguration->SetHitBufferFlushThreshold(fSetHitBufferFlushThresholdCmd->GetNewDoubleValue(newValue));
} else if (command == fSetGDMLCmd) {
fAdePTConfiguration->SetVecGeomGDML(newValue);
} else if (command == fSetCUDAStackLimitCmd) {
fAdePTConfiguration->SetCUDAStackLimit(fSetCUDAStackLimitCmd->GetNewIntValue(newValue));
}
}

Expand Down
1 change: 1 addition & 0 deletions src/AdePTTrackingManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ void AdePTTrackingManager::InitializeAdePT()
fAdeptTransport->SetMaxBatch(2 * fAdePTConfiguration->GetTransportBufferThreshold());
fAdeptTransport->SetTrackInAllRegions(fAdePTConfiguration->GetTrackInAllRegions());
fAdeptTransport->SetGPURegionNames(fAdePTConfiguration->GetGPURegionNames());
fAdeptTransport->SetCUDAStackLimit(fAdePTConfiguration->GetCUDAStackLimit());

// Check if this is a sequential run
G4RunManager::RMType rmType = G4RunManager::GetRunManager()->GetRunManagerType();
Expand Down

0 comments on commit 5ea9d08

Please sign in to comment.