Skip to content

Commit ceafd01

Browse files
committed
GPU: Add GPUCommonAlignedAlloc.h for aligned buffers, and use it for TPCFastTransformPOD
1 parent 9cb59f1 commit ceafd01

File tree

24 files changed

+154
-101
lines changed

24 files changed

+154
-101
lines changed

Detectors/Align/Workflow/src/BarrelAlignmentSpec.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ class BarrelAlignmentSpec : public Task
118118

119119
o2::tpc::VDriftHelper mTPCVDriftHelper{};
120120

121-
std::vector<char> mCorrMapBuffer; // buffer to hold the raw map data from CCDB, needed to keep the pointer valid in the CorrectionMapsHelper
121+
o2::gpu::aligned_unique_buffer_ptr<o2::gpu::TPCFastTransformPOD> mCorrMapBuffer; // buffer to hold the raw map data from CCDB, needed to keep the pointer valid in the CorrectionMapsHelper
122122
const o2::gpu::TPCFastTransformPOD* mTPCCorrMaps{};
123123

124124
//

Detectors/TPC/calibration/include/TPCCalibration/CalculatedEdx.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ class CalculatedEdx
228228
std::vector<TPCClRefElem>* mTPCTrackClIdxVecInput{nullptr}; ///< input vector with TPC tracks cluster indicies
229229
const o2::tpc::ClusterNativeAccess* mClusterIndex{nullptr}; ///< needed to access clusternative with tpctracks
230230
const o2::gpu::TPCFastTransformPOD* mTPCCorrMap{nullptr}; ///< cluster correction maps helper
231-
std::vector<char> mTPCCorrMapBuffer;
231+
o2::gpu::aligned_unique_buffer_ptr<o2::gpu::TPCFastTransformPOD> mTPCCorrMapBuffer;
232232
std::vector<unsigned char> mTPCRefitterShMap; ///< externally set TPC clusters sharing map
233233
std::vector<unsigned int> mTPCRefitterOccMap; ///< externally set TPC clusters occupancy map
234234
std::unique_ptr<o2::gpu::GPUO2InterfaceRefit> mRefit{nullptr}; ///< TPC refitter used for TPC tracks refit during the reconstruction
@@ -247,4 +247,4 @@ class CalculatedEdx
247247

248248
} // namespace o2::tpc
249249

250-
#endif
250+
#endif

Detectors/TPC/calibration/include/TPCCalibration/TrackDump.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class TrackDump
7777
float gyc(float vertexTime = 0) const;
7878
float zc(float vertexTime = 0) const;
7979

80-
inline static std::vector<char> corrMapBuffer; // buffer for owning the correction map in case of update during runtime
80+
inline static o2::gpu::aligned_unique_buffer_ptr<o2::gpu::TPCFastTransformPOD> corrMapBuffer; // buffer for owning the correction map in case of update during runtime
8181
inline static const o2::gpu::TPCFastTransformPOD* corrMap{nullptr}; // local copy of the correction map for quick access to the transform functions
8282
static void loadCorrMaps(std::string_view corrMapFile, std::string_view corrMapFileRef = "");
8383
ClassDefNV(ClusterNativeAdd, 1);

Detectors/TPC/calibration/src/CalculatedEdx.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ using namespace o2::tpc;
3232

3333
CalculatedEdx::CalculatedEdx()
3434
{
35-
std::vector<char> buffer;
35+
gpu::aligned_unique_buffer_ptr<gpu::TPCFastTransformPOD> buffer;
3636
gpu::TPCFastTransformPOD::create(buffer, *TPCFastTransformHelperO2::instance()->create(0));
3737
mTPCCorrMapBuffer = std::move(buffer);
38-
mTPCCorrMap = &gpu::TPCFastTransformPOD::get(mTPCCorrMapBuffer.data());
38+
mTPCCorrMap = mTPCCorrMapBuffer.get();
3939
}
4040

4141
void CalculatedEdx::setMembers(std::vector<o2::tpc::TPCClRefElem>* tpcTrackClIdxVecInput, const o2::tpc::ClusterNativeAccess& clIndex, std::vector<o2::tpc::TrackTPC>* vTPCTracksArrayInp)

Detectors/TPC/calibration/src/TrackDump.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,8 @@ float TrackDump::ClusterNativeAdd::zc(float vertexTime) const
236236
void TrackDump::ClusterNativeAdd::loadCorrMaps(std::string_view corrMapFile, std::string_view corrMapFileRef)
237237
{
238238
auto fastTransformTmp = gpu::TPCFastTransform::loadFromFile(corrMapFile.data());
239-
std::vector<char> buffer;
239+
o2::gpu::aligned_unique_buffer_ptr<o2::gpu::TPCFastTransformPOD> buffer;
240240
gpu::TPCFastTransformPOD::create(buffer, *fastTransformTmp);
241241
corrMapBuffer = std::move(buffer);
242-
corrMap = &gpu::TPCFastTransformPOD::get(corrMapBuffer.data());
242+
corrMap = corrMapBuffer.get();
243243
}

Detectors/TPC/reconstruction/test/testGPUCATracking.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,9 @@ BOOST_AUTO_TEST_CASE(CATracking_test1)
7474
config.configWorkflow.outputs.set(gpudatatypes::InOutType::TPCMergedTracks);
7575

7676
auto fastTransformTmp = TPCFastTransformHelperO2::instance()->create(0);
77-
std::vector<char> fastTransformBuf;
77+
aligned_unique_buffer_ptr<TPCFastTransformPOD> fastTransformBuf;
7878
TPCFastTransformPOD::create(fastTransformBuf, *fastTransformTmp);
79-
config.configCalib.fastTransform = &TPCFastTransformPOD::get(fastTransformBuf.data());
79+
config.configCalib.fastTransform = fastTransformBuf.get();
8080

8181
auto dEdxCalibContainer = GPUO2InterfaceUtils::getCalibdEdxContainerDefault();
8282
config.configCalib.dEdxCalibContainer = dEdxCalibContainer.get();

Detectors/TPC/workflow/src/TPCScalerSpec.cxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,8 @@ class TPCScalerSpec : public Task
219219

220220
Output corrMapOutput{header::gDataOriginTPC, "TPCCORRMAP", 0};
221221
auto outputBuffer = o2::pmr::vector<char>(pc.outputs().getMemoryResource(corrMapOutput));
222-
auto* pod = TPCFastTransformPOD::create(outputBuffer, finalMap.getCorrection());
222+
outputBuffer.resize(TPCFastTransformPOD::estimateSize(finalMap.getCorrection()));
223+
auto* pod = TPCFastTransformPOD::create(outputBuffer.data(), outputBuffer.size(), finalMap.getCorrection());
223224
const auto& vd = mTPCVDriftHelper.getVDriftObject();
224225
o2::tpc::TPCFastTransformHelperO2::instance()->updateCalibration(*pod, 0, vd.corrFact, vd.refVDrift, vd.getTimeOffset());
225226
pc.outputs().adoptContainer(corrMapOutput, std::move(outputBuffer));

GPU/Common/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ set(MODULE GPUCommon)
1313

1414
set(HDRS_INSTALL
1515
GPUCommonAlgorithm.h
16+
GPUCommonAlignedAlloc.h
1617
GPUCommonDef.h
1718
GPUCommonDefAPI.h
1819
GPUCommonHelpers.h

GPU/Common/GPUCommonAlignedAlloc.h

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2+
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3+
// All rights not expressly granted are reserved.
4+
//
5+
// This software is distributed under the terms of the GNU General Public
6+
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7+
//
8+
// In applying this license CERN does not waive the privileges and immunities
9+
// granted to it by virtue of its status as an Intergovernmental Organization
10+
// or submit itself to any jurisdiction.
11+
12+
/// \file GPUCommonAlignedAlloc.h
13+
/// \author David Rohr
14+
15+
#ifndef GPUCOMMONAKUGBEDALLOC_H
16+
#define GPUCOMMONAKUGBEDALLOC_H
17+
18+
#include <memory>
19+
20+
namespace o2::gpu
21+
{
22+
23+
template <typename T, std::size_t MIN_ALIGN = 0>
24+
struct alignedDeleter {
25+
void operator()(void* ptr) { ::operator delete(ptr, std::align_val_t(std::max(MIN_ALIGN, alignof(T)))); };
26+
};
27+
28+
template <typename T, std::size_t MIN_ALIGN = 0>
29+
struct alignedAllocator {
30+
using value_type = T;
31+
T* allocate(std::size_t n)
32+
{
33+
return (T*)::operator new(n, std::align_val_t(std::max(MIN_ALIGN, alignof(T))));
34+
}
35+
void deallocate(T* ptr, std::size_t)
36+
{
37+
alignedDeleter<T, MIN_ALIGN>()(ptr);
38+
}
39+
};
40+
41+
template <typename T>
42+
struct aligned_unique_buffer_ptr : public std::unique_ptr<char[], alignedDeleter<T>> {
43+
aligned_unique_buffer_ptr() = default;
44+
aligned_unique_buffer_ptr(size_t n) { alloc(n); }
45+
aligned_unique_buffer_ptr(T* ptr) { std::unique_ptr<char[], alignedDeleter<T>>::reset((char*)ptr); }
46+
char* getraw() { return std::unique_ptr<char[], alignedDeleter<T>>::get(); }
47+
const char* getraw() const { return std::unique_ptr<char[], alignedDeleter<T>>::get(); }
48+
T* get() { return (T*)std::unique_ptr<char[], alignedDeleter<T>>::get(); }
49+
const T* get() const { return (T*)std::unique_ptr<char[], alignedDeleter<T>>::get(); }
50+
T* operator->() { return get(); }
51+
const T* operator->() const { return get(); }
52+
T* alloc(std::size_t n)
53+
{
54+
std::unique_ptr<char[], alignedDeleter<T>>::reset((char*)alignedAllocator<T>().allocate(n));
55+
return get();
56+
}
57+
};
58+
59+
} // namespace o2::gpu
60+
61+
#endif // GPUCOMMONAKUGBEDALLOC_H

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ int32_t GPUReconstruction::Exit()
467467
if (mMemoryResources[i].mReuse >= 0) {
468468
continue;
469469
}
470-
operator delete(mMemoryResources[i].mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT));
470+
::operator delete(mMemoryResources[i].mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT));
471471
mMemoryResources[i].mPtr = mMemoryResources[i].mPtrDevice = nullptr;
472472
}
473473
}
@@ -630,7 +630,7 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res,
630630
if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && (control == nullptr || control->useInternal())) {
631631
if (!(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) {
632632
if (res->mPtrDevice && res->mReuse < 0) {
633-
operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT));
633+
::operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT));
634634
}
635635
res->mSize = std::max((size_t)res->SetPointers((void*)1) - 1, res->mOverrideSize);
636636
if (res->mReuse >= 0) {
@@ -640,7 +640,7 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res,
640640
}
641641
res->mPtrDevice = mMemoryResources[res->mReuse].mPtrDevice;
642642
} else {
643-
res->mPtrDevice = operator new(res->mSize + GPUCA_BUFFER_ALIGNMENT, std::align_val_t(GPUCA_BUFFER_ALIGNMENT));
643+
res->mPtrDevice = ::operator new(res->mSize + GPUCA_BUFFER_ALIGNMENT, std::align_val_t(GPUCA_BUFFER_ALIGNMENT));
644644
}
645645
res->mPtr = GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(res->mPtrDevice);
646646
res->SetPointers(res->mPtr);
@@ -733,9 +733,9 @@ void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type)
733733
if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
734734
char* retVal = new (std::align_val_t(GPUCA_BUFFER_ALIGNMENT)) char[size];
735735
if ((type & GPUMemoryResource::MEMORY_STACK)) {
736-
mNonPersistentIndividualDirectAllocations.emplace_back(retVal, alignedDeleter());
736+
mNonPersistentIndividualDirectAllocations.emplace_back(retVal, alignedDefaultBufferDeleter());
737737
} else {
738-
mDirectMemoryChunks.emplace_back(retVal, alignedDeleter());
738+
mDirectMemoryChunks.emplace_back(retVal, alignedDefaultBufferDeleter());
739739
}
740740
return retVal;
741741
}
@@ -798,7 +798,7 @@ void* GPUReconstruction::AllocateVolatileMemory(size_t size, bool device)
798798
}
799799
char* retVal = new (std::align_val_t(GPUCA_BUFFER_ALIGNMENT)) char[size];
800800
stdspinlock spinlock(mMemoryMutex);
801-
mVolatileChunks.emplace_back(retVal, alignedDeleter());
801+
mVolatileChunks.emplace_back(retVal, alignedDefaultBufferDeleter());
802802
return retVal;
803803
}
804804

@@ -876,7 +876,7 @@ void GPUReconstruction::FreeRegisteredMemory(GPUMemoryResource* res)
876876
std::cout << "Freeing " << res->mName << ": size " << res->mSize << " (reused " << res->mReuse << ")\n";
877877
}
878878
if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && res->mReuse < 0) {
879-
operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT));
879+
::operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT));
880880
}
881881
res->mPtr = nullptr;
882882
res->mPtrDevice = nullptr;
@@ -916,7 +916,7 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag, cons
916916
std::cout << "Freeing NonPersistent " << res->mName << ": size " << res->mSize << " (reused " << res->mReuse << ")\n";
917917
}
918918
if (res->mReuse < 0) {
919-
operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT));
919+
::operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT));
920920
}
921921
res->mPtr = nullptr;
922922
res->mPtrDevice = nullptr;

0 commit comments

Comments
 (0)