8 #include "PoolingDelivery.hpp" 9 #include "columns/HyPerCol.hpp" 10 #include "columns/ObjectMapComponent.hpp" 11 #include "delivery/accumulate_functions.hpp" 12 #include "utils/MapLookupByType.hpp" 16 PoolingDelivery::PoolingDelivery(
char const *name, HyPerCol *hc) { initialize(name, hc); }
18 PoolingDelivery::PoolingDelivery() {}
20 PoolingDelivery::~PoolingDelivery() {}
22 int PoolingDelivery::initialize(
char const *name, HyPerCol *hc) {
23 return BaseDelivery::initialize(name, hc);
26 void PoolingDelivery::setObjectType() { mObjectType =
"PoolingDelivery"; }
38 PVParams *params = parent->parameters();
40 parent->parameters()->ioParamStringRequired(
41 ioFlag, name,
"pvpatchAccumulateType", &mPvpatchAccumulateTypeString);
42 if (ioFlag == PARAMS_IO_READ) {
45 mAccumulateType == UNDEFINED,
46 "pvpatchAccumulateType \"%s\" is unrecognized.\n" 47 " Allowed values are \"maxpooling\", \"sumpooling\", or \"avgpooling\".\n",
48 mPvpatchAccumulateTypeString);
52 PoolingDelivery::AccumulateType
54 if (poolingTypeString ==
nullptr) {
57 PoolingDelivery::AccumulateType accType;
58 std::string str(poolingTypeString);
61 c = std::tolower(c, std::locale());
65 if (str.size() >= 4 && (str[3] ==
' ' || str[3] ==
'_')) {
69 if (strcmp(str.c_str(),
"maxpooling") == 0) {
72 else if (strcmp(str.c_str(),
"sumpooling") == 0) {
75 else if (strcmp(str.c_str(),
"avgpooling") == 0) {
85 auto *params = parent->parameters();
86 pvAssert(!params->presentAndNotBeenRead(name,
"receiveGpu"));
91 "updateGSynFromPostPerspective",
92 &mUpdateGSynFromPostPerspective,
93 mUpdateGSynFromPostPerspective);
96 mUpdateGSynFromPostPerspective =
true;
97 params->handleUnnecessaryParameter(name,
"updateGSynFromPostPerspective",
true);
102 parent->parameters()->ioParamValue(
103 ioFlag, name,
"needPostIndexLayer", &mNeedPostIndexLayer, mNeedPostIndexLayer);
107 pvAssert(!parent->parameters()->presentAndNotBeenRead(name,
"needPostIndexLayer"));
108 if (mNeedPostIndexLayer) {
109 parent->parameters()->ioParamStringRequired(
110 ioFlag, name,
"postIndexLayerName", &mPostIndexLayerName);
115 PoolingDelivery::communicateInitInfo(std::shared_ptr<CommunicateInitInfoMessage const> message) {
116 auto status = BaseDelivery::communicateInitInfo(message);
121 auto &hierarchy = message->mHierarchy;
123 mPatchSize = mapLookupByType<PatchSize>(hierarchy, getDescription());
124 FatalIf(mPatchSize ==
nullptr,
"%s requires a PatchSize component.\n", getDescription_c());
126 return Response::POSTPONE;
129 mWeightsPair = mapLookupByType<ImpliedWeightsPair>(hierarchy, getDescription());
131 mWeightsPair ==
nullptr,
132 "%s requires an ImpliedWeightsPair component.\n",
135 return Response::POSTPONE;
138 if (mNeedPostIndexLayer) {
139 pvAssert(mPostIndexLayerName);
140 auto *objectMapComponent = mapLookupByType<ObjectMapComponent>(hierarchy, getDescription());
142 objectMapComponent ==
nullptr,
143 "%s requires an ObjectMapComponent.\n",
149 if (mUpdateGSynFromPostPerspective) {
159 getPreLayer()->setAllocDeviceDatastore();
160 getPostLayer()->setAllocDeviceGSyn();
161 Weights *weights = mWeightsPair->getPostWeights();
166 if (!mUpdateGSynFromPostPerspective && getPreLayer()->getSparseFlag()) {
167 getPreLayer()->setAllocDeviceActiveIndices();
170 #endif // PV_USE_CUDA 171 return Response::SUCCESS;
176 PoolingDelivery::setCudaDevice(std::shared_ptr<SetCudaDeviceMessage const> message) {
178 auto status = BaseDelivery::setCudaDevice(message);
179 if (status != Response::SUCCESS) {
182 Weights *weights = mWeightsPair->getPostWeights();
184 weights->setCudaDevice(message->mCudaDevice);
186 return Response::SUCCESS;
188 #endif // PV_USE_CUDA 190 Response::Status PoolingDelivery::allocateDataStructures() {
192 if (parent->getCommunicator()->globalCommRank() == 0) {
194 "%s must wait until postIndexLayer \"%s\" has finished its " 195 "allocateDataStructures stage.\n",
197 mPostIndexLayer->getName());
199 return Response::POSTPONE;
201 auto status = BaseDelivery::allocateDataStructures();
208 return Response::POSTPONE;
211 return Response::POSTPONE;
214 return Response::POSTPONE;
216 initializeDeliverKernelArgs();
218 #endif // PV_USE_CUDA 219 allocateThreadGSyn();
220 return Response::SUCCESS;
224 void PoolingDelivery::initializeDeliverKernelArgs() {
225 PVCuda::CudaBuffer *d_preDatastore = getPreLayer()->getDeviceDatastore();
226 PVCuda::CudaBuffer *d_postGSyn = getPostLayer()->getDeviceGSyn();
227 Weights *weights = mWeightsPair->getPostWeights();
231 cudnnPoolingMode_t poolingMode;
233 switch (mAccumulateType) {
234 case MAXPOOLING: poolingMode = CUDNN_POOLING_MAX;
break;
236 poolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
237 multiplier = nxpPost * nypPost;
239 case AVGPOOLING: poolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
break;
240 default: pvAssert(0);
break;
244 mRecvKernel->setArgs(
245 getPreLayer()->getLayerLoc(),
246 getPostLayer()->getLayerLoc(),
255 #endif // PV_USE_CUDA 257 void PoolingDelivery::allocateThreadGSyn() {
259 int const numThreads = parent->getNumThreads();
260 if (numThreads > 1) {
261 mThreadGSyn.resize(numThreads);
262 mThreadGateIdxBuffer.resize(numThreads);
265 for (
int th = 0; th < numThreads; th++) {
266 mThreadGSyn[th].resize(mPostLayer->getNumNeurons());
267 mThreadGateIdxBuffer[th].resize(mPostLayer->getNumNeurons());
272 void PoolingDelivery::deliver() {
274 if (getChannelCode() == CHANNEL_NOUPDATE) {
281 #endif // PV_USE_CUDA 284 if (mUpdateGSynFromPostPerspective) {
285 deliverPostsynapticPerspective();
288 deliverPresynapticPerspective();
292 mPostLayer->setUpdatedDeviceGSynFlag(!mReceiveGpu);
293 #endif // PV_USE_CUDA 296 void PoolingDelivery::deliverPostsynapticPerspective() {
297 PVLayerLoc const *sourceLoc = mPreLayer->getLayerLoc();
298 PVLayerLoc const *targetLoc = mPostLayer->getLayerLoc();
299 Weights *postWeights = mWeightsPair->getPostWeights();
304 void (*accumulateFunctionPointer)(
305 int kPreRes,
int nk,
float *v,
float *a,
float *w,
void *auxPtr,
int sf) =
nullptr;
306 switch (mAccumulateType) {
307 case MAXPOOLING: accumulateFunctionPointer = pvpatch_max_pooling_from_post;
break;
308 case SUMPOOLING: accumulateFunctionPointer = pvpatch_sum_pooling_from_post;
break;
310 accumulateFunctionPointer = pvpatch_sum_pooling_from_post;
322 if (mAccumulateType == AVGPOOLING) {
323 float relative_XScale = pow(2, (getPostLayer()->getXScale() - getPreLayer()->getXScale()));
324 float relative_YScale = pow(2, (getPostLayer()->getYScale() - getPreLayer()->getYScale()));
325 float nxp = (float)mPatchSize->getPatchSizeX();
326 float nyp = (float)mPatchSize->getPatchSizeY();
327 w = 1.0f / (nxp * relative_XScale * nyp * relative_YScale);
332 float *gSyn = getPostLayer()->getChannel(getChannelCode());
336 int const numPostRestricted = mPostLayer->getNumNeurons();
338 int const sourceNx = sourceLoc->nx;
339 int const sourceNy = sourceLoc->ny;
340 int const sourceNf = sourceLoc->nf;
341 int const targetNx = targetLoc->nx;
342 int const targetNy = targetLoc->ny;
343 int const targetNf = targetLoc->nf;
345 const PVHalo *sourceHalo = &sourceLoc->halo;
346 const PVHalo *targetHalo = &targetLoc->halo;
349 int sy = (sourceNx + sourceHalo->lt + sourceHalo->rt) * sourceNf;
351 clearGateIdxBuffer();
352 float *gatePatchHead =
nullptr;
353 if (mNeedPostIndexLayer) {
354 gatePatchHead = mPostIndexLayer->getChannel(CHANNEL_EXC);
357 float resetVal = 0.0f;
358 if (mAccumulateType == MAXPOOLING) {
359 resetVal = -INFINITY;
362 for (
int b = 0; b < parent->getNBatch(); b++) {
363 #ifdef PV_USE_OPENMP_THREADS 364 #pragma omp parallel for 366 for (
int kTargetRes = 0; kTargetRes < numPostRestricted; kTargetRes++) {
367 float *activityBatch = activityCube.data
368 + b * (sourceNx + sourceHalo->rt + sourceHalo->lt)
369 * (sourceNy + sourceHalo->up + sourceHalo->dn) * sourceNf;
370 float *gSynBatchHead = gSyn + b * targetNx * targetNy * targetNf;
373 int kTargetExt = kIndexExtended(
382 long startSourceExt = postWeights->
getGeometry()->getUnshrunkenStart(kTargetExt);
385 float *gSynPatchPos = gSynBatchHead + kTargetRes;
387 *gSynPatchPos = resetVal;
389 float *gatePatchPos =
nullptr;
390 if (mNeedPostIndexLayer) {
391 gatePatchPos = gatePatchHead + b * mPostIndexLayer->getNumNeurons() + kTargetRes;
393 *gatePatchPos = (float)-1;
396 float *activityStartBuf = &(activityBatch[startSourceExt]);
402 const PVLayerLoc *postLoc = mPostLayer->getLayerLoc();
403 int const kfPost = featureIndex(
405 postLoc->nx + postLoc->halo.lt + postLoc->halo.rt,
406 postLoc->ny + postLoc->halo.dn + postLoc->halo.up,
410 for (
int ky = 0; ky < yPatchSize; ky++) {
411 int kPreExt = startSourceExt + ky * sy + offset;
414 sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt,
415 sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up,
419 sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt,
420 sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up,
422 int const kfPre = featureIndex(
424 sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt,
425 sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up,
427 int const kxPreGlobalExt = kxPreExt + sourceLoc->kx0;
428 int const kyPreGlobalExt = kyPreExt + sourceLoc->ky0;
429 int const kPreGlobalExt = kIndex(
433 sourceLoc->nxGlobal + sourceLoc->halo.lt + sourceLoc->halo.rt,
434 sourceLoc->nyGlobal + sourceLoc->halo.up + sourceLoc->halo.dn,
437 float *activityY = &(activityStartBuf[ky * sy + offset]);
439 (accumulateFunctionPointer)(
440 kPreGlobalExt, numPerStride, gSynPatchPos, activityY, &w, gatePatchPos, sf);
446 void PoolingDelivery::deliverPresynapticPerspective() {
447 PVLayerLoc const *preLoc = getPreLayer()->getLayerLoc();
448 PVLayerLoc const *postLoc = getPostLayer()->getLayerLoc();
449 Weights *preWeights = mWeightsPair->getPreWeights();
454 void (*accumulateFunctionPointer)(
455 int kPreRes,
int nk,
float *v,
float a,
float *w,
void *auxPtr,
int sf) =
nullptr;
456 switch (mAccumulateType) {
457 case MAXPOOLING: accumulateFunctionPointer = pvpatch_max_pooling;
break;
458 case SUMPOOLING: accumulateFunctionPointer = pvpatch_sum_pooling;
break;
460 accumulateFunctionPointer = pvpatch_sum_pooling;
472 if (mAccumulateType == AVGPOOLING) {
473 float relative_XScale = pow(2, (getPostLayer()->getXScale() - getPreLayer()->getXScale()));
474 float relative_YScale = pow(2, (getPostLayer()->getYScale() - getPreLayer()->getYScale()));
475 float nxp = (float)mPatchSize->getPatchSizeX();
476 float nyp = (float)mPatchSize->getPatchSizeY();
477 w = 1.0f / (nxp * relative_XScale * nyp * relative_YScale);
482 float *gSyn = getPostLayer()->getChannel(getChannelCode());
486 if (mAccumulateType == MAXPOOLING) {
487 resetVal = -INFINITY;
488 #ifdef PV_USE_OPENMP_THREADS 489 #pragma omp parallel for 491 for (
int i = 0; i < getPostLayer()->getNumNeuronsAllBatches(); i++) {
496 clearGateIdxBuffer();
498 for (
int b = 0; b < mPreLayer->getLayerLoc()->nbatch; b++) {
499 float *activityBatch = activityCube.data
500 + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt)
501 * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn)
503 float *gSynPatchHeadBatch = gSyn + b * postLoc->nx * postLoc->ny * postLoc->nf;
504 float *gatePatchHeadBatch = NULL;
505 if (mNeedPostIndexLayer) {
507 mPostIndexLayer->getChannel(CHANNEL_EXC) + b * mPostIndexLayer->getNumNeurons();
511 if (activityCube.isSparse) {
513 + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt)
514 * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn)
517 int numLoop = activityCube.isSparse ? activityCube.numActive[b] : mPreLayer->getNumExtended();
519 if (!mThreadGateIdxBuffer.empty()) {
520 #ifdef PV_USE_OPENMP_THREADS 521 #pragma omp parallel for 523 for (
int i = 0; i < parent->getNumThreads() * getPostLayer()->getNumNeurons(); i++) {
524 int ti = i / getPostLayer()->getNumNeurons();
525 int ni = i % getPostLayer()->getNumNeurons();
526 mThreadGateIdxBuffer[ti][ni] = -1;
530 #ifdef PV_USE_OPENMP_THREADS 532 if (!mThreadGSyn.empty()) {
533 int numNeurons = getPostLayer()->getNumNeurons();
534 #ifdef PV_USE_OPENMP_THREADS 535 #pragma omp parallel for 537 for (
int i = 0; i < parent->getNumThreads() * numNeurons; i++) {
538 int ti = i / numNeurons;
539 int ni = i % numNeurons;
540 mThreadGSyn[ti][ni] = resetVal;
543 #endif // PV_USE_OPENMP_THREADS 544 std::size_t
const *gSynPatchStart = preWeights->
getGeometry()->getGSynPatchStart().data();
546 #ifdef PV_USE_OPENMP_THREADS 547 #pragma omp parallel for schedule(static) 549 for (
int loopIndex = 0; loopIndex < numLoop; loopIndex++) {
552 if (activityCube.isSparse) {
553 kPreExt = activeIndicesBatch[loopIndex].index;
554 a = activeIndicesBatch[loopIndex].value;
558 a = activityBatch[kPreExt];
562 float *gSynPatchHead;
563 float *gatePatchHead = NULL;
564 #ifdef PV_USE_OPENMP_THREADS 565 if (!mThreadGSyn.empty()) {
566 int ti = omp_get_thread_num();
567 gSynPatchHead = mThreadGSyn[ti].data();
570 gSynPatchHead = gSynPatchHeadBatch;
573 if (mNeedPostIndexLayer) {
574 if (!mThreadGateIdxBuffer.empty()) {
575 int ti = omp_get_thread_num();
576 gatePatchHead = mThreadGateIdxBuffer[ti].data();
579 gatePatchHead = gatePatchHeadBatch;
582 #else // PV_USE_OPENMP_THREADS 583 gSynPatchHead = gSynPatchHeadBatch;
584 if (mNeedPostIndexLayer) {
585 gatePatchHead = gatePatchHeadBatch;
587 #endif // PV_USE_OPENMP_THREADS 590 int const ny = patch->ny;
591 int const sy = postLoc->nx * postLoc->nf;
592 float *postPatchStart = &gSynPatchHead[gSynPatchStart[kPreExt]];
593 float *postGatePatchStart = &gatePatchHead[gSynPatchStart[kPreExt]];
597 preLoc->nx + preLoc->halo.lt + preLoc->halo.rt,
598 preLoc->ny + preLoc->halo.dn + preLoc->halo.up,
602 preLoc->nx + preLoc->halo.lt + preLoc->halo.rt,
603 preLoc->ny + preLoc->halo.dn + preLoc->halo.up,
605 int const kfPre = featureIndex(
607 preLoc->nx + preLoc->halo.lt + preLoc->halo.rt,
608 preLoc->ny + preLoc->halo.dn + preLoc->halo.up,
611 int const kxPreGlobalExt = kxPreExt + preLoc->kx0;
612 int const kyPreGlobalExt = kyPreExt + preLoc->ky0;
614 int const kPreGlobalExt = kIndex(
618 preLoc->nxGlobal + preLoc->halo.lt + preLoc->halo.rt,
619 preLoc->nyGlobal + preLoc->halo.up + preLoc->halo.dn,
624 void *auxPtr =
nullptr;
625 for (
int y = 0; y < ny; y++) {
626 if (mNeedPostIndexLayer) {
627 auxPtr = &postGatePatchStart[y * sy + offset];
629 (accumulateFunctionPointer)(
630 kPreGlobalExt, nk, postPatchStart + y * sy + offset, a, &w, auxPtr, sf);
633 #ifdef PV_USE_OPENMP_THREADS 637 if (!mThreadGSyn.empty()) {
638 float *gSynPatchHead = gSynPatchHeadBatch;
639 float *gateIdxBuffer =
nullptr;
640 if (mNeedPostIndexLayer && !mThreadGateIdxBuffer.empty()) {
641 gateIdxBuffer = gatePatchHeadBatch;
643 int numNeurons = getPostLayer()->getNumNeurons();
645 #pragma omp parallel for 646 for (
int ni = 0; ni < numNeurons; ni++) {
648 if (mAccumulateType == MAXPOOLING) {
649 for (
int ti = 0; ti < parent->getNumThreads(); ti++) {
650 if (gSynPatchHead[ni] < mThreadGSyn[ti][ni]) {
651 gSynPatchHead[ni] = mThreadGSyn[ti][ni];
652 if (mNeedPostIndexLayer && !mThreadGateIdxBuffer.empty()) {
653 gateIdxBuffer[ni] = mThreadGateIdxBuffer[ti][ni];
659 for (
int ti = 0; ti < parent->getNumThreads(); ti++) {
660 gSynPatchHead[ni] += mThreadGSyn[ti][ni];
667 if (activityCube.isSparse) {
668 for (
int k = 0; k < getPostLayer()->getNumNeuronsAllBatches(); k++) {
669 if (gSyn[k] == -INFINITY) {
676 void PoolingDelivery::clearGateIdxBuffer() {
677 if (mNeedPostIndexLayer) {
679 resetGSynBuffers_PoolingIndexLayer(
680 mPostIndexLayer->getLayerLoc()->nbatch,
681 mPostIndexLayer->getNumNeurons(),
682 mPostIndexLayer->getNumChannels(),
683 mPostIndexLayer->getChannel(CHANNEL_EXC));
689 if (getChannelCode() != CHANNEL_NOUPDATE) {
696 void PoolingDelivery::deliverGPU() {
698 getChannelCode() != CHANNEL_NOUPDATE);
699 pvAssert(mPostLayer->getChannel(getChannelCode()));
701 if (mPreLayer->getUpdatedDeviceDatastoreFlag()) {
703 float *h_preDatastore = activityCube.data;
704 PVCuda::CudaBuffer *d_preDatastore = mPreLayer->getDeviceDatastore();
705 pvAssert(d_preDatastore);
706 d_preDatastore->copyToDevice(h_preDatastore);
708 mPreLayer->setUpdatedDeviceDatastoreFlag(
false);
713 #endif // PV_USE_CUDA virtual bool isAllInputReady() override
void ioParam_needPostIndexLayer(enum ParamsIOFlag ioFlag)
int getPatchSizeX() const
virtual void ioParam_updateGSynFromPostPerspective(enum ParamsIOFlag ioFlag)
updateGSynFromPostPerspective: Specifies if the connection should push from pre or pull from post...
PVLayerCube createCube(int delay=0)
void ioParam_postIndexLayerName(enum ParamsIOFlag ioFlag)
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
bool isExchangeFinished(int delay=0)
bool getDataStructuresAllocatedFlag() const
virtual void ioParam_pvpatchAccumulateType(enum ParamsIOFlag ioFlag)
pvpatchAccumulateType: Specifies the method to accumulate synaptic input
static AccumulateType parseAccumulateTypeString(char const *typestring)
static bool completed(Status &a)
Patch const & getPatch(int patchIndex) const
int getPatchSizeY() const
std::shared_ptr< PatchGeometry > getGeometry() const
int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
int getPatchSizeF() const
bool getInitInfoCommunicatedFlag() const