From 1b7ea0d239a50f49592b4a90b3b68c3c198f220d Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Tue, 27 Jun 2023 09:56:11 +0200 Subject: [PATCH] ITS-GPU: put standalone version at state of the art (#11558) > Fine for me to merge it as is for now. We should just keep the ossibility to run it inside the gpuworkflow operational. Agreed, my actual plan is to move asap to have the GpuWF in a working state and to have the standalone version as a backup. I am therefore merging this for now. * Add skeleton for road finding * Add gpu array keeping all pointers to separate buffers * Add arrays for idxd access to cells and friends * Fix deltaphi calculation on gpu code * Make Road a template class * Improve metrics logging * Add tracker metrics * add cmakelist entry --- .../GPU/ITStrackingGPU/TimeFrameGPU.h | 13 + .../ITS/tracking/GPU/cuda/TimeFrameGPU.cu | 24 ++ .../ITS/tracking/GPU/cuda/TrackerTraitsGPU.cu | 234 +++++++++++++++--- .../ITSMFT/ITS/tracking/GPU/cuda/Utils.cu | 2 - .../tracking/GPU/cuda/VertexerTraitsGPU.cu | 18 +- .../include/ITStracking/Configuration.h | 55 ++-- .../ITS/tracking/include/ITStracking/Road.h | 12 - .../tracking/include/ITStracking/TimeFrame.h | 48 ++++ .../tracking/include/ITStracking/Tracker.h | 2 +- .../include/ITStracking/TrackingConfigParam.h | 3 +- .../ITSMFT/ITS/tracking/src/TrackerTraits.cxx | 27 +- .../ITS/tracking/src/VertexerTraits.cxx | 27 +- 12 files changed, 362 insertions(+), 103 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h index 6c865550423c6..f06029eada764 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h @@ -103,6 +103,8 @@ class GpuTimeFrameChunk int* getDeviceTrackletsLookupTables(const int); Cell* getDeviceCells(const int); int* getDeviceCellsLookupTables(const int); + Road* getDeviceRoads() { return mRoadsDevice; } + int* getDeviceRoadsLookupTables(const int); TimeFrameGPUParameters* getTimeFrameGPUParameters() const { return mTFGPUParams; } int* getDeviceCUBTmpBuffer() { return mCUBTmpBufferDevice; } @@ -110,6 +112,9 @@ class GpuTimeFrameChunk int* getDeviceNFoundCells() { return mNFoundCellsDevice; } int* getDeviceCellNeigboursLookupTables(const int); int* getDeviceCellNeighbours(const int); + Cell** getDeviceArrayCells() const { return mCellsDeviceArray; } + int** getDeviceArrayNeighboursCell() const { return mNeighboursCellDeviceArray; } + int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLookupTablesDeviceArray; } /// Vertexer only int* getDeviceNTrackletCluster(const int combid) { return mNTrackletsPerClusterDevice[combid]; } @@ -133,10 +138,18 @@ class GpuTimeFrameChunk std::array mTrackletsDevice; std::array mTrackletsLookupTablesDevice; std::array mCellsDevice; + Road* mRoadsDevice; std::array mCellsLookupTablesDevice; std::array mNeighboursCellDevice; std::array mNeighboursCellLookupTablesDevice; + std::array mRoadsLookupTablesDevice; + // These are to make them accessible using layer index + Cell** mCellsDeviceArray; + int** mNeighboursCellDeviceArray; + int** mNeighboursCellLookupTablesDeviceArray; + + // Small accessory buffers int* mCUBTmpBufferDevice; int* mFoundTrackletsDevice; int* mNFoundCellsDevice; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu index 1950bed9138f8..fe38b30e6f9d9 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu @@ -55,6 +55,7 @@ GpuTimeFrameChunk::~GpuTimeFrameChunk() if (i < nLayers - 2) { checkGPUError(cudaFree(mCellsDevice[i])); checkGPUError(cudaFree(mCellsLookupTablesDevice[i])); + checkGPUError(cudaFree(mRoadsLookupTablesDevice[i])); if (i < nLayers - 3) { checkGPUError(cudaFree(mNeighboursCellLookupTablesDevice[i])); checkGPUError(cudaFree(mNeighboursCellDevice[i])); @@ -62,9 +63,13 @@ GpuTimeFrameChunk::~GpuTimeFrameChunk() } } } + checkGPUError(cudaFree(mRoadsDevice)); checkGPUError(cudaFree(mCUBTmpBufferDevice)); checkGPUError(cudaFree(mFoundTrackletsDevice)); checkGPUError(cudaFree(mNFoundCellsDevice)); + checkGPUError(cudaFree(mCellsDeviceArray)); + checkGPUError(cudaFree(mNeighboursCellDeviceArray)); + checkGPUError(cudaFree(mNeighboursCellLookupTablesDeviceArray)); } } @@ -84,6 +89,7 @@ void GpuTimeFrameChunk::allocate(const size_t nrof, Stream& stream) if (i < nLayers - 2) { checkGPUError(cudaMallocAsync(reinterpret_cast(&(mCellsLookupTablesDevice[i])), sizeof(int) * mTFGPUParams->validatedTrackletsCapacity * nrof, stream.get())); checkGPUError(cudaMallocAsync(reinterpret_cast(&(mCellsDevice[i])), sizeof(Cell) * mTFGPUParams->maxNeighboursSize * nrof, stream.get())); + checkGPUError(cudaMallocAsync(reinterpret_cast(&mRoadsLookupTablesDevice[i]), sizeof(int) * mTFGPUParams->maxNeighboursSize * nrof, stream.get())); if (i < nLayers - 3) { checkGPUError(cudaMallocAsync(reinterpret_cast(&(mNeighboursCellLookupTablesDevice[i])), sizeof(int) * mTFGPUParams->maxNeighboursSize * nrof, stream.get())); checkGPUError(cudaMallocAsync(reinterpret_cast(&(mNeighboursCellDevice[i])), sizeof(int) * mTFGPUParams->maxNeighboursSize * nrof, stream.get())); @@ -100,10 +106,19 @@ void GpuTimeFrameChunk::allocate(const size_t nrof, Stream& stream) checkGPUError(cudaMallocAsync(reinterpret_cast(&mNExclusiveFoundLinesDevice), sizeof(int) * mTFGPUParams->clustersPerROfCapacity * nrof + 1, stream.get())); // + 1 for cub::DeviceScan::ExclusiveSum, to cover cases where we have maximum number of clusters per ROF checkGPUError(cudaMallocAsync(reinterpret_cast(&mUsedTrackletsDevice), sizeof(unsigned char) * mTFGPUParams->maxTrackletsPerCluster * mTFGPUParams->clustersPerROfCapacity * nrof, stream.get())); checkGPUError(cudaMallocAsync(reinterpret_cast(&mClusteredLinesDevice), sizeof(int) * mTFGPUParams->clustersPerROfCapacity * mTFGPUParams->maxTrackletsPerCluster * nrof, stream.get())); + checkGPUError(cudaMallocAsync(reinterpret_cast(&mRoadsDevice), sizeof(Road) * mTFGPUParams->maxRoadPerRofSize * nrof, stream.get())); /// Invariant allocations checkGPUError(cudaMallocAsync(reinterpret_cast(&mFoundTrackletsDevice), (nLayers - 1) * sizeof(int) * nrof, stream.get())); // No need to reset, we always read it after writing checkGPUError(cudaMallocAsync(reinterpret_cast(&mNFoundCellsDevice), (nLayers - 2) * sizeof(int) * nrof, stream.get())); + checkGPUError(cudaMallocAsync(reinterpret_cast(&mCellsDeviceArray), (nLayers - 2) * sizeof(Cell*), stream.get())); + checkGPUError(cudaMallocAsync(reinterpret_cast(&mNeighboursCellDeviceArray), (nLayers - 3) * sizeof(int*), stream.get())); + checkGPUError(cudaMallocAsync(reinterpret_cast(&mNeighboursCellLookupTablesDeviceArray), (nLayers - 3) * sizeof(int*), stream.get())); + + /// Copy pointers of allocated memory to regrouping arrays + checkGPUError(cudaMemcpyAsync(mCellsDeviceArray, mCellsDevice.data(), (nLayers - 2) * sizeof(Cell*), cudaMemcpyHostToDevice, stream.get())); + checkGPUError(cudaMemcpyAsync(mNeighboursCellDeviceArray, mNeighboursCellDevice.data(), (nLayers - 3) * sizeof(int*), cudaMemcpyHostToDevice, stream.get())); + checkGPUError(cudaMemcpyAsync(mNeighboursCellLookupTablesDeviceArray, mNeighboursCellLookupTablesDevice.data(), (nLayers - 3) * sizeof(int*), cudaMemcpyHostToDevice, stream.get())); mAllocated = true; } @@ -130,6 +145,7 @@ void GpuTimeFrameChunk::reset(const Task task, Stream& stream) thrust::fill(THRUST_NAMESPACE::par.on(stream.get()), thrustTrackletsBegin, thrustTrackletsEnd, Tracklet{}); if (i < nLayers - 2) { checkGPUError(cudaMemsetAsync(mCellsLookupTablesDevice[i], 0, sizeof(int) * mTFGPUParams->cellsLUTsize * mNRof, stream.get())); + checkGPUError(cudaMemsetAsync(mRoadsLookupTablesDevice[i], 0, sizeof(int) * mTFGPUParams->maxNeighboursSize * mNRof, stream.get())); if (i < nLayers - 3) { checkGPUError(cudaMemsetAsync(mNeighboursCellLookupTablesDevice[i], 0, sizeof(int) * mTFGPUParams->maxNeighboursSize * mNRof, stream.get())); checkGPUError(cudaMemsetAsync(mNeighboursCellDevice[i], 0, sizeof(int) * mTFGPUParams->maxNeighboursSize * mNRof, stream.get())); @@ -157,6 +173,8 @@ size_t GpuTimeFrameChunk::computeScalingSizeBytes(const int nrof, const rofsize += (nLayers - 2) * sizeof(Cell) * config.maxNeighboursSize; // cells rofsize += (nLayers - 3) * sizeof(int) * config.maxNeighboursSize; // cell neighbours lookup tables rofsize += (nLayers - 3) * sizeof(int) * config.maxNeighboursSize; // cell neighbours + rofsize += sizeof(Road) * config.maxRoadPerRofSize; // roads + rofsize += (nLayers - 2) * sizeof(int) * config.maxNeighboursSize; // road LUT rofsize += sizeof(Line) * config.maxTrackletsPerCluster * config.clustersPerROfCapacity; // lines rofsize += sizeof(int) * config.clustersPerROfCapacity; // found lines rofsize += sizeof(int) * config.clustersPerROfCapacity; // found lines exclusive sum @@ -243,6 +261,12 @@ int* GpuTimeFrameChunk::getDeviceCellNeighbours(const int layer) return mNeighboursCellDevice[layer]; } +template +int* GpuTimeFrameChunk::getDeviceRoadsLookupTables(const int layer) +{ + return mRoadsLookupTablesDevice[layer]; +} + // Load data template size_t GpuTimeFrameChunk::loadDataOnDevice(const size_t startRof, const size_t maxRof, const int maxLayers, Stream& stream) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cu index de24ffc947127..883b3c9c6fa54 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cu @@ -51,18 +51,65 @@ using namespace constants::its2; namespace gpu { -GPUg() void printBufferLayerOnThread(const int layer, const int* v, size_t size, const int len = 150, const unsigned int tId = 0) +class GpuTimer { - if (blockIdx.x * blockDim.x + threadIdx.x == tId) { - for (int i{0}; i < size; ++i) { - if (!(i % len)) { - printf("\n layer %d: ===>%d/%d\t", layer, i, (int)size); - } - printf("%d\t", v[i]); - } - printf("\n"); + public: + GpuTimer() = delete; + GpuTimer(const int offset, cudaStream_t stream = 0) : mOffset(offset) + { + mStream = stream; + cudaEventCreateWithFlags(&mStart, cudaEventBlockingSync); + cudaEventCreateWithFlags(&mStop, cudaEventBlockingSync); + cudaEventCreateWithFlags(&mLifetimeStart, cudaEventBlockingSync); + cudaEventCreateWithFlags(&mLifetimeEnd, cudaEventBlockingSync); + cudaEventRecord(mLifetimeStart, mStream); + } + ~GpuTimer() + { + cudaEventDestroy(mStart); + cudaEventDestroy(mStop); + cudaEventDestroy(mLifetimeStart); + cudaEventDestroy(mLifetimeEnd); + } + void Start(std::string task = "undefined") + { + mTask = task; + cudaEventRecord(mStart, mStream); + } + void Stop() + { + cudaEventRecord(mStop, mStream); + cudaEventSynchronize(mStop); + cudaEventElapsedTime(&mElapsedTimeMS, mStart, mStop); + } + void Print() + { + printf("%s\t%d\t%f\t#?#\n", (mTask + "_" + std::to_string(mOffset)).c_str(), mStream, mElapsedTimeMS); + } + void PrintLifetime(size_t mem = 0) + { + cudaEventRecord(mLifetimeEnd, mStream); + cudaEventSynchronize(mLifetimeEnd); + float lifetime; + cudaEventElapsedTime(&lifetime, mLifetimeStart, mLifetimeEnd); + printf("trLifeTime_%d\t%d\t%f\t%lu\t#?#\n", mOffset, mStream, lifetime, mem); + } + float getElapsedTimeMS() + { + return mElapsedTimeMS; } -} + + private: + std::string mTask; + cudaEvent_t mStart; + cudaEvent_t mStop; + cudaEvent_t mLifetimeStart; + cudaEvent_t mLifetimeEnd; + cudaStream_t mStream; + float mElapsedTimeMS; + float mData; + int mOffset; +}; GPUd() const int4 getBinsRect(const Cluster& currentCluster, const int layerIndex, const o2::its::IndexTableUtils& utils, @@ -85,11 +132,54 @@ GPUd() const int4 getBinsRect(const Cluster& currentCluster, const int layerInde utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))}; } +// template +// GPUd() void traverseCellsTreeDevice( +// const int currentCellId, +// const int currentLayerId, +// const int startingCellId, // used to compile LUT +// int& nRoadsStartingCell, +// int* roadsLookUpTable, +// Cell** cells, +// int** cellNeighbours, +// int** cellNeighboursLUT, +// Road* roads) +// { +// Cell& currentCell{cells[currentLayerId][currentCellId]}; +// const int currentCellLevel = currentCell.getLevel(); +// if constexpr (dryRun) { +// ++nRoadsStartingCell; // in dry run I just want to count the total number of roads +// } else { +// // mTimeFrame->getRoads().back().addCell(currentLayerId, currentCellId); +// } +// if (currentLayerId > 0 && currentCellLevel > 1) { +// const int cellNeighboursNum{cellNeighboursLUT[currentLayerId - 1][currentCellId + 1] - cellNeighboursLUT[currentLayerId - 1][currentCellId]}; // careful! +// bool isFirstValidNeighbour{true}; + +// for (int iNeighbourCell{0}; iNeighbourCell < cellNeighboursNum; ++iNeighbourCell) { +// const int neighbourCellId =cellNeighbours[currentLayerId - 1][currentCellId][iNeighbourCell]; +// const Cell& neighbourCell = mTimeFrame->getCells()[currentLayerId - 1][neighbourCellId]; + +// if (currentCellLevel - 1 != neighbourCell.getLevel()) { +// continue; +// } + +// if (isFirstValidNeighbour) { +// isFirstValidNeighbour = false; +// } else { +// // mTimeFrame->getRoads().push_back(mTimeFrame->getRoads().back()); +// } +// traverseCellsTree(neighbourCellId, currentLayerId - 1, cells, cellNeighbours, cellNeighboursLUT, roads); +// ++nRoadsStartingCell; +// } +// } +// } + GPUhd() float Sq(float q) { return q * q; } +// Functors to sort tracklets template struct trackletSortEmptyFunctor : public thrust::binary_function { GPUhd() bool operator()(const T& lhs, const T& rhs) const @@ -106,6 +196,20 @@ struct trackletSortIndexFunctor : public thrust::binary_function { } }; +// Print layer buffer +GPUg() void printBufferLayerOnThread(const int layer, const int* v, size_t size, const int len = 150, const unsigned int tId = 0) +{ + if (blockIdx.x * blockDim.x + threadIdx.x == tId) { + for (int i{0}; i < size; ++i) { + if (!(i % len)) { + printf("\n layer %d: ===> %d/%d\t", layer, i, (int)size); + } + printf("%d\t", v[i]); + } + printf("\n"); + } +} + // Dump vertices GPUg() void printVertices(const Vertex* v, size_t size, const unsigned int tId = 0) { @@ -249,9 +353,9 @@ GPUg() void computeLayerTrackletsKernelSingleRof( } } } - if (storedTracklets > maxTrackletsPerCluster) { - printf("its-gpu-tracklet finder: found more tracklets per clusters (%d) than maximum set (%d), check the configuration!\n", maxTrackletsPerCluster, storedTracklets); - } + // if (storedTracklets > maxTrackletsPerCluster) { + // printf("its-gpu-tracklet finder: found more tracklets per clusters (%d) than maximum set (%d), check the configuration!\n", maxTrackletsPerCluster, storedTracklets); + // } } } @@ -300,14 +404,14 @@ GPUg() void computeLayerTrackletsKernelMultipleRof( const int zBins{utils->getNzBins()}; for (unsigned int iRof{blockIdx.x}; iRof < rofSize; iRof += gridDim.x) { auto rof0 = iRof + startRofId; - auto nClustersCurrentLayerRof = roFrameClustersCurrentLayer[rof0 + 1] - roFrameClustersCurrentLayer[rof0]; + auto nClustersCurrentLayerRof = o2::gpu::GPUCommonMath::Min(roFrameClustersCurrentLayer[rof0 + 1] - roFrameClustersCurrentLayer[rof0], (int)maxClustersPerRof); + // if (nClustersCurrentLayerRof > maxClustersPerRof) { + // printf("its-gpu-tracklet finder: on layer %d found more clusters per ROF (%d) than maximum set (%d), check the configuration!\n", layerIndex, nClustersCurrentLayerRof, maxClustersPerRof); + // } auto* clustersCurrentLayerRof = clustersCurrentLayer + (roFrameClustersCurrentLayer[rof0] - roFrameClustersCurrentLayer[startRofId]); auto nVerticesRof0 = nVertices[rof0 + 1] - nVertices[rof0]; auto trackletsRof0 = tracklets + maxTrackletsPerCluster * maxClustersPerRof * iRof; for (int currentClusterIndex = threadIdx.x; currentClusterIndex < nClustersCurrentLayerRof; currentClusterIndex += blockDim.x) { - if (nClustersCurrentLayerRof > maxClustersPerRof) { - printf("its-gpu-tracklet finder: on layer %d found more clusters per ROF (%d) than maximum set (%d), check the configuration!\n", layerIndex, nClustersCurrentLayerRof, maxClustersPerRof); - } unsigned int storedTracklets{0}; const Cluster& currentCluster{clustersCurrentLayerRof[currentClusterIndex]}; const int currentSortedIndex{roFrameClustersCurrentLayer[rof0] + currentClusterIndex}; @@ -486,6 +590,55 @@ GPUg() void computeLayerCellNeighboursKernel(Cell* cellsCurrentLayer, } } +template +GPUg() void computeLayerRoadsKernel( + const int level, + const int layerIndex, + Cell** cells, + const int* nCells, + int** neighbours, + int** neighboursLUT, + Road* roads, + int* roadsLookupTable) +{ + for (int iCurrentCellIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentCellIndex < nCells[layerIndex]; iCurrentCellIndex += blockDim.x * gridDim.x) { + auto& currentCell{cells[layerIndex][iCurrentCellIndex]}; + if (currentCell.getLevel() != level) { + continue; + } + int nRoadsCurrentCell{0}; + if constexpr (dryRun) { + roadsLookupTable[iCurrentCellIndex]++; + } else { + roads[roadsLookupTable[iCurrentCellIndex] + nRoadsCurrentCell++] = Road{layerIndex, iCurrentCellIndex}; + } + if (level == 1) { + continue; + } + // **** check! I'm tired + const auto currentCellNeighOffset{neighboursLUT[layerIndex - 1][iCurrentCellIndex]}; + const int cellNeighboursNum{neighboursLUT[layerIndex - 1][iCurrentCellIndex + 1] - currentCellNeighOffset}; + bool isFirstValidNeighbour{true}; + for (int iNeighbourCell{0}; iNeighbourCell < cellNeighboursNum; ++iNeighbourCell) { + const int neighbourCellId = neighbours[layerIndex - 1][currentCellNeighOffset + iNeighbourCell]; + const Cell& neighbourCell = cells[layerIndex - 1][neighbourCellId]; + if (level - 1 != neighbourCell.getLevel()) { + continue; + } + if (isFirstValidNeighbour) { + isFirstValidNeighbour = false; + } else { + if constexpr (dryRun) { + roadsLookupTable[iCurrentCellIndex]++; // dry run we just count the number of roads + } else { + roads[roadsLookupTable[iCurrentCellIndex] + nRoadsCurrentCell++] = Road{layerIndex, iCurrentCellIndex}; + } + } + // traverseCellsTreeDevice(neighbourCellId, layerIndex - 1, iCurrentCellIndex, nRoadsCurrentCell, roadsLookupTable, cells, roads); + } + } +} + } // namespace gpu template @@ -503,7 +656,7 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration) const Vertex diamondVert({mTrkParams[iteration].Diamond[0], mTrkParams[iteration].Diamond[1], mTrkParams[iteration].Diamond[2]}, {25.e-6f, 0.f, 0.f, 25.e-6f, 0.f, 36.f}, 1, 1.f); gsl::span diamondSpan(&diamondVert, 1); std::vector threads(mTimeFrameGPU->getNChunks()); - // std::array, nLayers - 1> totTrackletsChunk{std::array{0, 0, 0}, std::array{0, 0, 0}, std::array{0, 0, 0}, std::array{0, 0, 0}, std::array{0, 0, 0}}; + for (int chunkId{0}; chunkId < mTimeFrameGPU->getNChunks(); ++chunkId) { int maxTracklets{static_cast(mTimeFrameGPU->getChunk(chunkId).getTimeFrameGPUParameters()->clustersPerROfCapacity) * static_cast(mTimeFrameGPU->getChunk(chunkId).getTimeFrameGPUParameters()->maxTrackletsPerCluster)}; @@ -514,7 +667,10 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration) auto maxROF = offset + maxRofPerChunk; while (offset < maxROF) { auto rofs = mTimeFrameGPU->loadChunkData(chunkId, offset, maxROF); - RANGE("chunk_gpu_tracking", 1); + //////////////////// + /// Tracklet finding + gpu::GpuTimer timer{offset, mTimeFrameGPU->getStream(chunkId).get()}; + // timer.Start("trTrackletFinder"); for (int iLayer{0}; iLayer < nLayers - 1; ++iLayer) { auto nclus = mTimeFrameGPU->getTotalClustersPerROFrange(offset, rofs, iLayer); const float meanDeltaR{mTrkParams[iteration].LayerRadii[iLayer + 1] - mTrkParams[iteration].LayerRadii[iLayer]}; @@ -601,6 +757,9 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration) checkGPUError(cudaHostUnregister(tracklets.data())); } } + + //////////////// + /// Cell finding for (int iLayer{0}; iLayer < nLayers - 2; ++iLayer) { // Compute layer cells. gpu::computeLayerCellsKernel<<<10, 1024, 0, mTimeFrameGPU->getStream(chunkId).get()>>>( @@ -641,7 +800,9 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration) (nLayers - 2) * sizeof(int), cudaMemcpyDeviceToHost, mTimeFrameGPU->getStream(chunkId).get())); - // Create cells labels TODO: make it work after fixing the tracklets labels + + // Create cells labels + // TODO: make it work after fixing the tracklets labels if (mTimeFrameGPU->hasMCinformation()) { for (int iLayer{0}; iLayer < nLayers - 2; ++iLayer) { std::vector cells(mTimeFrameGPU->getHostNCells(chunkId)[iLayer]); @@ -655,6 +816,8 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration) } } + ///////////////////// + /// Neighbour finding for (int iLayer{0}; iLayer < nLayers - 3; ++iLayer) { gpu::computeLayerCellNeighboursKernel<<<10, 1024, 0, mTimeFrameGPU->getStream(chunkId).get()>>>( mTimeFrameGPU->getChunk(chunkId).getDeviceCells(iLayer), @@ -683,12 +846,28 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration) mTimeFrameGPU->getChunk(chunkId).getDeviceCellNeighbours(iLayer), mTimeFrameGPU->getChunk(chunkId).getDeviceNFoundCells(), mTimeFrameGPU->getChunk(chunkId).getTimeFrameGPUParameters()->maxNeighboursSize); + // if (!chunkId) { // gpu::printBufferLayerOnThread<<<1, 1, 0, mTimeFrameGPU->getStream(chunkId).get()>>>(iLayer, // mTimeFrameGPU->getChunk(chunkId).getDeviceCellNeighbours(iLayer), // mTimeFrameGPU->getChunk(chunkId).getTimeFrameGPUParameters()->maxNeighboursSize * rofs); // } } + // Download cells into vectors + + for (int iLevel{nLayers - 2}; iLevel >= mTrkParams[iteration].CellMinimumLevel(); --iLevel) { + const int minimumLevel{iLevel - 1}; + for (int iLayer{nLayers - 3}; iLayer >= minimumLevel; --iLayer) { + // gpu::computeLayerRoadsKernel<<<1, 1, 0, mTimeFrameGPU->getStream(chunkId).get()>>>(iLevel, // const int level, + // iLayer, // const int layerIndex, + // mTimeFrameGPU->getChunk(chunkId).getDeviceArrayCells(), // const Cell** cells, + // mTimeFrameGPU->getChunk(chunkId).getDeviceNFoundCells(), // const int* nCells, + // mTimeFrameGPU->getChunk(chunkId).getDeviceArrayNeighboursCell(), // const int** neighbours, + // mTimeFrameGPU->getChunk(chunkId).getDeviceArrayNeighboursCellLUT(), // const int** neighboursLUT, + // mTimeFrameGPU->getChunk(chunkId).getDeviceRoads(), // Road* roads, + // mTimeFrameGPU->getChunk(chunkId).getDeviceRoadsLookupTables(iLayer)); // int* roadsLookupTable + } + } // End of tracking for this chunk offset += rofs; @@ -708,21 +887,6 @@ void TrackerTraitsGPU::computeLayerCells(const int iteration) { } -// void TrackerTraitsGPU::refitTracks(const std::vector>& tf, std::vector& tracks) -// { -// PrimaryVertexContextNV* pvctx = static_cast(nullptr); //TODO: FIX THIS with Time Frames -// std::array cells; -// for (int iLayer = 0; iLayer < 5; iLayer++) { -// cells[iLayer] = pvctx->getDeviceCells()[iLayer].get(); -// } -// std::array clusters; -// for (int iLayer = 0; iLayer < 7; iLayer++) { -// clusters[iLayer] = pvctx->getDeviceClusters()[iLayer].get(); -// } -// //TODO: restore this -// // mChainRunITSTrackFit(*mChain, mPrimaryVertexContext->getRoads(), clusters, cells, tf, tracks); -// } - template void TrackerTraitsGPU::findCellsNeighbours(const int iteration){}; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/Utils.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/Utils.cu index 75b17c815b9a4..d49837c6272e1 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/Utils.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/Utils.cu @@ -22,7 +22,6 @@ namespace { - int roundUp(const int numToRound, const int multiple) { if (multiple == 0) { @@ -59,7 +58,6 @@ namespace its using constants::GB; namespace gpu { - GPUh() void gpuThrowOnError() { cudaError_t error = cudaGetLastError(); diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexerTraitsGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexerTraitsGPU.cu index cefad5117ff9c..bffb6b47157f0 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexerTraitsGPU.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexerTraitsGPU.cu @@ -48,8 +48,14 @@ using constants::its::VertexerHistogramVolume; using constants::math::TwoPi; using gpu::utils::checkGPUError; using math_utils::getNormalizedPhi; - using namespace constants::its2; + +GPUd() float smallestAngleDifference(float a, float b) +{ + float diff = fmod(b - a + constants::math::Pi, constants::math::TwoPi) - constants::math::Pi; + return (diff < -constants::math::Pi) ? diff + constants::math::TwoPi : ((diff > constants::math::Pi) ? diff - constants::math::TwoPi : diff); +} + GPUd() const int4 getBinsRect(const Cluster& currentCluster, const int layerIndex, const float z1, float maxdeltaz, float maxdeltaphi) { @@ -270,7 +276,7 @@ GPUg() void trackleterKernelMultipleRof( // loop on clusters next layer for (int iNextLayerClusterIndex{firstRowClusterIndex}; iNextLayerClusterIndex < maxRowClusterIndex && iNextLayerClusterIndex < nClustersNextLayerRof; ++iNextLayerClusterIndex) { const Cluster& nextCluster = clustersNextLayerRof[iNextLayerClusterIndex]; - if (o2::gpu::GPUCommonMath::Abs(currentCluster.phi - nextCluster.phi) < phiCut) { + if (o2::gpu::GPUCommonMath::Abs(smallestAngleDifference(currentCluster.phi, nextCluster.phi)) < phiCut) { if (storedTracklets < maxTrackletsPerCluster) { if constexpr (Mode == TrackletMode::Layer0Layer1) { new (TrackletsRof + stride + storedTracklets) Tracklet{iNextLayerClusterIndex, iCurrentLayerClusterIndex, nextCluster, currentCluster, static_cast(rof), static_cast(rof)}; @@ -314,7 +320,7 @@ GPUg() void trackletSelectionKernelSingleRof( for (int iTracklet12{0}; iTracklet12 < nFoundTracklet12[iCurrentLayerClusterIndex]; ++iTracklet12) { for (int iTracklet01{0}; iTracklet01 < nFoundTracklet01[iCurrentLayerClusterIndex] && validTracklets < maxTrackletsPerCluster; ++iTracklet01) { const float deltaTanLambda{o2::gpu::GPUCommonMath::Abs(tracklets01[stride + iTracklet01].tanLambda - tracklets12[stride + iTracklet12].tanLambda)}; - const float deltaPhi{o2::gpu::GPUCommonMath::Abs(tracklets01[stride + iTracklet01].phi - tracklets12[stride + iTracklet12].phi)}; + const float deltaPhi{o2::gpu::GPUCommonMath::Abs(smallestAngleDifference(tracklets01[stride + iTracklet01].phi, tracklets12[stride + iTracklet12].phi))}; if (!usedTracklets[stride + iTracklet01] && deltaTanLambda < tanLambdaCut && deltaPhi < phiCut && validTracklets != maxTrackletsPerCluster) { usedTracklets[stride + iTracklet01] = true; if constexpr (!initRun) { @@ -618,6 +624,8 @@ void VertexerTraitsGPU::computeTracklets() while (offset < maxROF) { auto rofs = mTimeFrameGPU->loadChunkData(chunkId, offset, maxROF); RANGE("chunk_gpu_vertexing", 1); + // gpu::GpuTimer timer{offset, mTimeFrameGPU->getStream(chunkId).get()}; + // timer.Start("vtTrackletFinder"); gpu::trackleterKernelMultipleRof<<getStream(chunkId).get()>>>( mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(0), // const Cluster* clustersNextLayer, // 0 2 mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(1), // const Cluster* clustersCurrentLayer, // 1 1 @@ -631,6 +639,7 @@ void VertexerTraitsGPU::computeTracklets() rofs, // const unsigned int rofSize, mVrtParams.phiCut, // const float phiCut, mVrtParams.maxTrackletsPerCluster); // const size_t maxTrackletsPerCluster = 1e2 + gpu::trackleterKernelMultipleRof<<getStream(chunkId).get()>>>( mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(2), // const Cluster* clustersNextLayer, // 0 2 mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(1), // const Cluster* clustersCurrentLayer, // 1 1 @@ -645,7 +654,6 @@ void VertexerTraitsGPU::computeTracklets() mVrtParams.phiCut, // const float phiCut, mVrtParams.maxTrackletsPerCluster); // const size_t maxTrackletsPerCluster = 1e2 - // Tracklet selection gpu::trackletSelectionKernelMultipleRof<<getStream(chunkId).get()>>>( mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(0), // const Cluster* clusters0, // Clusters on layer 0 mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(1), // const Cluster* clusters1, // Clusters on layer 1 @@ -734,7 +742,7 @@ void VertexerTraitsGPU::computeTracklets() mTimeFrameGPU->getBeamXY(), mTimeFrameGPU->getVerticesInChunks()[chunkId], mTimeFrameGPU->getNVerticesInChunks()[chunkId], - mTimeFrameGPU->hasMCinformation() ? mTimeFrameGPU : nullptr, + mTimeFrameGPU, mTimeFrameGPU->hasMCinformation() ? &mTimeFrameGPU->getLabelsInChunks()[chunkId] : nullptr); } offset += rofs; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h index b81b8b1705a21..3a42efa6e80d2 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h @@ -120,23 +120,24 @@ struct VertexingParameters { struct TimeFrameGPUParameters { TimeFrameGPUParameters() = default; - TimeFrameGPUParameters(size_t cubBufferSize, - size_t maxTrkClu, - size_t cluLayCap, - size_t cluROfCap, - size_t maxTrkCap, - size_t maxVertCap, - size_t maxROFs); + // TimeFrameGPUParameters(size_t cubBufferSize, + // size_t maxTrkClu, + // size_t cluLayCap, + // size_t cluROfCap, + // size_t maxTrkCap, + // size_t maxVertCap, + // size_t maxROFs); size_t tmpCUBBufferSize = 1e5; // In average in pp events there are required 4096 bytes size_t maxTrackletsPerCluster = 1e2; size_t clustersPerLayerCapacity = 2.5e5; size_t clustersPerROfCapacity = 1.5e3; - size_t trackletsCapacity = maxTrackletsPerCluster * clustersPerROfCapacity; + // size_t trackletsCapacity = maxTrackletsPerCluster * clustersPerROfCapacity; size_t validatedTrackletsCapacity = 1e3; size_t cellsLUTsize = validatedTrackletsCapacity; size_t maxNeighboursSize = 1e2; size_t neighboursLUTsize = maxNeighboursSize; + size_t maxRoadPerRofSize = 1e3; // pp! size_t maxLinesCapacity = 1e2; size_t maxVerticesCapacity = 5e4; size_t nMaxROFs = 1e3; @@ -144,22 +145,28 @@ struct TimeFrameGPUParameters { int maxGPUMemoryGB = -1; }; -inline TimeFrameGPUParameters::TimeFrameGPUParameters(size_t cubBufferSize, - size_t maxTrkClu, - size_t cluLayCap, - size_t cluROfCap, - size_t maxTrkCap, - size_t maxVertCap, - size_t maxROFs) : tmpCUBBufferSize{cubBufferSize}, - maxTrackletsPerCluster{maxTrkClu}, - clustersPerLayerCapacity{cluLayCap}, - clustersPerROfCapacity{cluROfCap}, - maxLinesCapacity{maxTrkCap}, - maxVerticesCapacity{maxVertCap}, - nMaxROFs{maxROFs} -{ - trackletsCapacity = maxTrackletsPerCluster * clustersPerLayerCapacity; -} +// inline TimeFrameGPUParameters::TimeFrameGPUParameters(size_t cubBufferSize, +// size_t maxTrkClu, +// size_t cluLayCap, +// size_t cluROfCap, +// size_t maxTrkCap, +// size_t maxVertCap, +// size_t maxROFs, +// size_t validatedTrackletsCapacity, +// size_t cellsLUTsize, +// size_t maxNeighboursSize, +// size_t neighboursLUTsize, +// size_t maxRoadPerRofSize, +// size_t maxLinesCapacity) : tmpCUBBufferSize{cubBufferSize}, +// maxTrackletsPerCluster{maxTrkClu}, +// clustersPerLayerCapacity{cluLayCap}, +// clustersPerROfCapacity{cluROfCap}, +// maxLinesCapacity{maxTrkCap}, +// maxVerticesCapacity{maxVertCap}, +// nMaxROFs{maxROFs} +// { +// trackletsCapacity = maxTrackletsPerCluster * clustersPerLayerCapacity; +// } } // namespace its } // namespace o2 diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Road.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Road.h index 40258c5ddcd15..689ed5f077a49 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Road.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Road.h @@ -82,18 +82,6 @@ inline int Road::getRoadSize() const return mRoadSize; } -// template -// inline int Road::getLabel() const -// { -// return mLabel; -// } - -// template -// inline void Road::setLabel(const int label) -// { -// mLabel = label; -// } - template GPUhdi() int& Road::operator[](const int& i) { diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h index f21bb813af1ba..03997ff5f428e 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h @@ -66,6 +66,52 @@ struct lightVertex { int mTimeStamp; }; +class CpuTimer +{ + public: + CpuTimer() + { + mLifeTimeStart = std::chrono::high_resolution_clock::now(); + } + void Reset(const std::string element = "vt") + { + mElement = element; + mLifeTimeStart = std::chrono::high_resolution_clock::now(); + } + + void Start(const std::string& taskName) + { + mTask = taskName; + mStart = std::chrono::high_resolution_clock::now(); + } + + void Stop() + { + mStop = std::chrono::high_resolution_clock::now(); + } + + void Print() + { + float duration = std::chrono::duration_cast(mStop - mStart).count() / 1000.f; + std::cout << mTask << "\t" << duration << " #?#" << std::endl; + } + + void PrintLifeTime(size_t mem = 0) + { + mLifeTimeStop = std::chrono::high_resolution_clock::now(); + float lifetime = std::chrono::duration_cast(mLifeTimeStop - mLifeTimeStart).count() / 1000.f; + printf("%sLifeTime\t%f\t%lu\t#?#\n", mElement.c_str(), lifetime, mem); + } + + private: + std::string mElement; + std::string mTask; + std::chrono::time_point mStart; + std::chrono::time_point mStop; + std::chrono::time_point mLifeTimeStart; + std::chrono::time_point mLifeTimeStop; +}; + class TimeFrame { public: @@ -231,6 +277,8 @@ class TimeFrame std::vector mROframesPV = {0}; std::vector mPrimaryVertices; + CpuTimer mTimer; + private: float mBz = 5.; int mBeamPosWeight = 0; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h index e5e830c717521..d8871df32f861 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h @@ -72,6 +72,7 @@ class Tracker bool isMatLUT() const; void setNThreads(int n); int getNThreads() const; + std::uint32_t mTimeFrameCounter = 0; private: void initialiseTimeFrame(int& iteration); @@ -95,7 +96,6 @@ class Tracker TimeFrame* mTimeFrame = nullptr; /// Observer pointer, not owned by this class std::vector mTrkParams; - std::uint32_t mTimeFrameCounter = 0; o2::gpu::GPUChainITS* mRecoChain = nullptr; unsigned int mNumberOfRuns{0}; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h index 94c629d6c22d3..43c52d5551bb9 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h @@ -83,11 +83,12 @@ struct GpuRecoParamConfig : public o2::conf::ConfigurableParamHelpermTimer.Reset("tr"); + // mTimeFrame->mTimer.Start("trTrackletFinder"); TimeFrame* tf = mTimeFrame; #ifdef OPTIMISATION_OUTPUT @@ -248,11 +250,13 @@ void TrackerTraits::computeLayerTracklets(const int iteration) } } } + // mTimeFrame->mTimer.Stop(); + // mTimeFrame->mTimer.Print(); } void TrackerTraits::computeLayerCells(const int iteration) { - + // mTimeFrame->mTimer.Start("trCellFinder"); #ifdef OPTIMISATION_OUTPUT static int iter{0}; std::ofstream off(fmt::format("cells{}.txt", iter++)); @@ -335,10 +339,13 @@ void TrackerTraits::computeLayerCells(const int iteration) std::cout << "Cells on layer " << iLayer << " " << tf->getCells()[iLayer].size() << std::endl; } } + // mTimeFrame->mTimer.Stop(); + // mTimeFrame->mTimer.Print(); } void TrackerTraits::findCellsNeighbours(const int iteration) { + // mTimeFrame->mTimer.Start("trNeighbourFinder"); #ifdef OPTIMISATION_OUTPUT std::ofstream off(fmt::format("cellneighs{}.txt", iteration)); #endif @@ -381,6 +388,8 @@ void TrackerTraits::findCellsNeighbours(const int iteration) } } } + // mTimeFrame->mTimer.Stop(); + mTimeFrame->mTimer.PrintLifeTime(); } void TrackerTraits::findRoads(const int iteration) @@ -388,19 +397,13 @@ void TrackerTraits::findRoads(const int iteration) for (int iLevel{mTrkParams[iteration].CellsPerRoad()}; iLevel >= mTrkParams[iteration].CellMinimumLevel(); --iLevel) { CA_DEBUGGER(int nRoads = -mTimeFrame->getRoads().size()); const int minimumLevel{iLevel - 1}; - for (int iLayer{mTrkParams[iteration].CellsPerRoad() - 1}; iLayer >= minimumLevel; --iLayer) { - const int levelCellsNum{static_cast(mTimeFrame->getCells()[iLayer].size())}; - for (int iCell{0}; iCell < levelCellsNum; ++iCell) { - Cell& currentCell{mTimeFrame->getCells()[iLayer][iCell]}; - if (currentCell.getLevel() != iLevel) { continue; } - mTimeFrame->getRoads().emplace_back(iLayer, iCell); /// For 3 clusters roads (useful for cascades and hypertriton) we just store the single cell @@ -408,32 +411,22 @@ void TrackerTraits::findRoads(const int iteration) if (iLevel == 1) { continue; } - const int cellNeighboursNum{static_cast( mTimeFrame->getCellsNeighbours()[iLayer - 1][iCell].size())}; bool isFirstValidNeighbour = true; - for (int iNeighbourCell{0}; iNeighbourCell < cellNeighboursNum; ++iNeighbourCell) { - const int neighbourCellId = mTimeFrame->getCellsNeighbours()[iLayer - 1][iCell][iNeighbourCell]; const Cell& neighbourCell = mTimeFrame->getCells()[iLayer - 1][neighbourCellId]; - if (iLevel - 1 != neighbourCell.getLevel()) { continue; } - if (isFirstValidNeighbour) { - isFirstValidNeighbour = false; - } else { - mTimeFrame->getRoads().emplace_back(iLayer, iCell); } - traverseCellsTree(neighbourCellId, iLayer - 1); } - // TODO: crosscheck for short track iterations // currentCell.setLevel(0); } diff --git a/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx index 94164bdc7ae87..a7abad1de7df2 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx @@ -12,6 +12,9 @@ #include #include +#include +#include +#include #include "ITStracking/VertexerTraits.h" #include "ITStracking/ClusterLines.h" @@ -167,7 +170,8 @@ void VertexerTraits::updateVertexingParameters(const VertexingParameters& vrtPar void VertexerTraits::computeTracklets() { - + // mTimeFrame->mTimer.Reset(); + // mTimeFrame->mTimer.Start("vtTrackletFinder"); #pragma omp parallel num_threads(mNThreads) { #pragma omp for schedule(dynamic) @@ -288,10 +292,13 @@ void VertexerTraits::computeTracklets() out01.close(); out12.close(); #endif + // mTimeFrame->mTimer.Stop(); + // mTimeFrame->mTimer.Print(); } // namespace its void VertexerTraits::computeTrackletMatching() { + // mTimeFrame->mTimer.Start("vtTrackletSel"); #pragma omp parallel for num_threads(mNThreads) schedule(dynamic) for (int rofId = 0; rofId < mTimeFrame->getNrof(); ++rofId) { mTimeFrame->getLines(rofId).reserve(mTimeFrame->getNTrackletsCluster(rofId, 0).size()); @@ -308,7 +315,8 @@ void VertexerTraits::computeTrackletMatching() mVrtParams.tanLambdaCut, mVrtParams.phiCut); } - + // mTimeFrame->mTimer.Stop(); + // mTimeFrame->mTimer.Print(); #ifdef VTX_DEBUG TFile* trackletFile = TFile::Open("artefacts_tf.root", "update"); TTree* ln_tre = new TTree("lines", "tf"); @@ -342,6 +350,7 @@ void VertexerTraits::computeTrackletMatching() void VertexerTraits::computeVertices() { + // mTimeFrame->mTimer.Start("vtFinding"); auto nsigmaCut{std::min(mVrtParams.vertNsigmaCut * mVrtParams.vertNsigmaCut * (mVrtParams.vertRadiusSigma * mVrtParams.vertRadiusSigma + mVrtParams.trackletSigma * mVrtParams.trackletSigma), 1.98f)}; std::vector vertices; #ifdef VTX_DEBUG @@ -488,6 +497,9 @@ void VertexerTraits::computeVertices() ln_clus_lines_tree->Write(); dbg_file->Close(); #endif + // mTimeFrame->mTimer.Stop(); + // mTimeFrame->mTimer.Print(); + // mTimeFrame->mTimer.PrintLifeTime(); } void VertexerTraits::setNThreads(int n) @@ -545,8 +557,9 @@ void VertexerTraits::computeVerticesInRof(int rofId, } } } + if (mVrtParams.allowSingleContribClusters) { - auto beamLine = Line{{mTimeFrame->getBeamX(), mTimeFrame->getBeamY(), -50.f}, {mTimeFrame->getBeamX(), mTimeFrame->getBeamY(), 50.f}}; // use beam position as contributor + auto beamLine = Line{{tf->getBeamX(), tf->getBeamY(), -50.f}, {tf->getBeamX(), tf->getBeamY(), 50.f}}; // use beam position as contributor for (size_t iLine{0}; iLine < numTracklets; ++iLine) { if (!usedLines[iLine]) { auto dca = Line::getDCA(lines[iLine], beamLine); @@ -581,13 +594,15 @@ void VertexerTraits::computeVerticesInRof(int rofId, } } } + std::sort(clusterLines.begin(), clusterLines.end(), [](ClusterLines& cluster1, ClusterLines& cluster2) { return cluster1.getSize() > cluster2.getSize(); }); // ensure clusters are ordered by contributors, so that we can cut after the first. bool atLeastOneFound{false}; for (int iCluster{0}; iCluster < nClusters; ++iCluster) { bool lowMultCandidate{false}; - double beamDistance2{(mTimeFrame->getBeamX() - mTimeFrame->getTrackletClusters(rofId)[iCluster].getVertex()[0]) * (mTimeFrame->getBeamX() - mTimeFrame->getTrackletClusters(rofId)[iCluster].getVertex()[0]) + - (mTimeFrame->getBeamY() - mTimeFrame->getTrackletClusters(rofId)[iCluster].getVertex()[1]) * (mTimeFrame->getBeamY() - mTimeFrame->getTrackletClusters(rofId)[iCluster].getVertex()[1])}; + double beamDistance2{(tf->getBeamX() - clusterLines[iCluster].getVertex()[0]) * (tf->getBeamX() - clusterLines[iCluster].getVertex()[0]) + + (tf->getBeamY() - clusterLines[iCluster].getVertex()[1]) * (tf->getBeamY() - clusterLines[iCluster].getVertex()[1])}; + if (atLeastOneFound && (lowMultCandidate = clusterLines[iCluster].getSize() < mVrtParams.clusterContributorsCut)) { // We might have pile up with nContr > cut. lowMultCandidate &= (beamDistance2 < mVrtParams.lowMultBeamDistCut * mVrtParams.lowMultBeamDistCut); if (!lowMultCandidate) { // Not the first cluster and not a low multiplicity candidate, we can remove it @@ -596,7 +611,7 @@ void VertexerTraits::computeVerticesInRof(int rofId, continue; } } - if (beamDistance2 < nsigmaCut && std::abs(mTimeFrame->getTrackletClusters(rofId)[iCluster].getVertex()[2]) < mVrtParams.maxZPositionAllowed) { + if (beamDistance2 < nsigmaCut && std::abs(clusterLines[iCluster].getVertex()[2]) < mVrtParams.maxZPositionAllowed) { atLeastOneFound = true; ++foundVertices; vertices.emplace_back(o2::math_utils::Point3D(clusterLines[iCluster].getVertex()[0],