9 #include "checkpointing/CheckpointEntryWeightPvp.hpp" 10 #include "utils/PVAssert.hpp" 11 #include "utils/conversions.h" 20 std::string
const &name,
31 patchSizeX, patchSizeY, patchSizeF, preLoc, postLoc, numArbors, sharedWeights, timestamp);
35 std::shared_ptr<PatchGeometry> geometry,
41 "Weights object \"%s\" has already been initialized.\n",
44 mNumArbors = numArbors;
45 mSharedFlag = sharedWeights;
46 mTimestamp = timestamp;
51 mTimestampGPU = timestamp;
73 auto geometry = std::make_shared<PatchGeometry>(
74 mName.c_str(), patchSizeX, patchSizeY, patchSizeF, preLoc, postLoc);
75 initialize(geometry, numArbors, sharedWeights, timestamp);
79 mGeometry->setMargins(preHalo, postHalo);
87 FatalIf(mGeometry ==
nullptr,
"%s has not been initialized.\n", mName.c_str());
88 mGeometry->allocateDataStructures();
90 int numDataPatches = mNumDataPatchesX * mNumDataPatchesY * mNumDataPatchesF;
91 if (numDataPatches != 0) {
93 mData.resize(mNumArbors);
94 for (
int arbor = 0; arbor < mNumArbors; arbor++) {
95 mData[arbor].resize(numDataPatches * numItemsPerPatch);
99 int const numPatches = mGeometry->getNumPatches();
100 dataIndexLookupTable.resize(numPatches);
101 for (
int p = 0; p < numPatches; p++) {
102 dataIndexLookupTable[p] = calcDataIndexFromPatchIndex(p);
107 allocateCudaBuffers();
109 #endif // PV_USE_CUDA 113 void Weights::allocateCudaBuffers() {
115 mCudaDevice ==
nullptr,
116 "Weights::allocateCudaBuffers() called for weights \"%s\" without having set " 119 pvAssert(mDeviceData ==
nullptr);
121 pvAssert(mCUDNNData ==
nullptr);
122 #endif // PV_USE_CUDNN 123 std::string description(mName);
133 std::vector<int> hostPatchToDataLookupVector(numPatches);
135 for (
int patchIndex = 0; patchIndex < numPatches; patchIndex++) {
136 hostPatchToDataLookupVector[patchIndex] = dataIndexLookupTable[patchIndex];
140 for (
int patchIndex = 0; patchIndex < numPatches; patchIndex++) {
141 hostPatchToDataLookupVector[patchIndex] = patchIndex;
144 size = hostPatchToDataLookupVector.size() *
sizeof(hostPatchToDataLookupVector[0]);
145 mDevicePatchToDataLookup = mCudaDevice->createBuffer(size, &description);
147 mDevicePatchToDataLookup->copyToDevice(hostPatchToDataLookupVector.data());
151 mDeviceData = mCudaDevice->createBuffer(size, &description);
152 pvAssert(mDeviceData);
154 mCUDNNData = mCudaDevice->createBuffer(size, &description);
159 #endif // PV_USE_CUDA 161 void Weights::checkpointWeightPvp(
163 char const *objectName,
164 char const *bufferName,
166 auto checkpointEntry = std::make_shared<CheckpointEntryWeightPvp>(
167 std::string(objectName), bufferName, checkpointer->getMPIBlock(),
this, compressFlag);
168 bool registerSucceeded =
169 checkpointer->registerCheckpointEntry(checkpointEntry, !mWeightsArePlastic);
172 "%s failed to register %s for checkpointing.\n",
177 void Weights::initNumDataPatches() {
180 mGeometry->getNumKernelsX(), mGeometry->getNumKernelsY(), mGeometry->getNumKernelsF());
184 mGeometry->getNumPatchesX(), mGeometry->getNumPatchesY(), mGeometry->getNumPatchesF());
188 void Weights::setNumDataPatches(
int numDataPatchesX,
int numDataPatchesY,
int numDataPatchesF) {
189 mNumDataPatchesX = numDataPatchesX;
190 mNumDataPatchesY = numDataPatchesY;
191 mNumDataPatchesF = numDataPatchesF;
202 return &mData[arbor][dataIndex * numItemsPerPatch];
206 int dataIndex = mSharedFlag ? dataIndexLookupTable[patchIndex] : patchIndex;
210 int Weights::calcDataIndexFromPatchIndex(
int patchIndex)
const {
212 int numPatchesX = mGeometry->getNumPatchesX();
213 int numPatchesY = mGeometry->getNumPatchesY();
214 int numPatchesF = mGeometry->getNumPatchesF();
215 int xIndex = kxPos(patchIndex, numPatchesX, numPatchesY, numPatchesF);
216 xIndex = (xIndex - mGeometry->getPreLoc().halo.lt) % mNumDataPatchesX;
218 xIndex += mNumDataPatchesX;
221 int yIndex = kyPos(patchIndex, numPatchesX, numPatchesY, numPatchesF);
222 yIndex = (yIndex - mGeometry->getPreLoc().halo.up) % mNumDataPatchesY;
224 yIndex += mNumDataPatchesY;
227 int fIndex = featureIndex(patchIndex, numPatchesX, numPatchesY, numPatchesF);
230 kIndex(xIndex, yIndex, fIndex, mNumDataPatchesX, mNumDataPatchesY, mNumDataPatchesF);
239 float minWeight = FLT_MAX;
240 for (
int a = 0; a < mNumArbors; a++) {
242 if (arborMin < minWeight) {
243 minWeight = arborMin;
250 float arborMin = FLT_MAX;
252 for (
auto &w : mData[arbor]) {
264 for (
int y = 0; y < patch.ny; y++) {
265 for (
int k = 0; k < nk; k++) {
278 float maxWeight = -FLT_MAX;
279 for (
int a = 0; a < mNumArbors; a++) {
281 if (arborMax > maxWeight) {
282 maxWeight = arborMax;
289 float arborMax = -FLT_MAX;
291 for (
auto &w : mData[arbor]) {
303 for (
int y = 0; y < patch.ny; y++) {
304 for (
int k = 0; k < nk; k++) {
318 if (!(mUsingGPUFlag and mTimestampGPU < mTimestamp)) {
321 pvAssert(mDeviceData);
323 int const numDataPatches = mNumDataPatchesX * mNumDataPatchesY * mNumDataPatchesF;
324 std::size_t
const arborSize = (std::size_t)numDataPatches * (std::size_t)
getPatchSizeOverall();
325 std::size_t
const numArbors = (std::size_t)mNumArbors;
326 for (std::size_t a = 0; a < numArbors; a++) {
327 mDeviceData->copyToDevice(mData[a].data(), arborSize *
sizeof(mData[a][0]), a * arborSize);
330 mCUDNNData->permuteWeightsPVToCudnn(
331 mDeviceData->getPointer(),
337 #endif // PV_USE_CUDNN 338 mTimestampGPU = mTimestamp;
340 #endif // PV_USE_CUDA bool getSharedFlag() const
void setMargins(PVHalo const &preHalo, PVHalo const &postHalo)
float * getData(int arbor)
int getPatchSizeX() const
std::string const & getName() const
void initialize(std::shared_ptr< PatchGeometry > geometry, int numArbors, bool sharedWeights, double timestamp)
int getPatchSizeOverall() const
int getNumDataPatches() const
Patch const & getPatch(int patchIndex) const
float * getDataFromDataIndex(int arbor, int dataIndex)
int getPatchSizeY() const
std::shared_ptr< PatchGeometry > getGeometry() const
float * getDataFromPatchIndex(int arbor, int patchIndex)
double getTimestamp() const
void allocateDataStructures()
int getPatchSizeF() const
float const * getDataReadOnly(int arbor) const