8 #include "PoolingDelivery.hpp"     9 #include "columns/HyPerCol.hpp"    10 #include "columns/ObjectMapComponent.hpp"    11 #include "delivery/accumulate_functions.hpp"    12 #include "utils/MapLookupByType.hpp"    16 PoolingDelivery::PoolingDelivery(
char const *name, HyPerCol *hc) { initialize(name, hc); }
    18 PoolingDelivery::PoolingDelivery() {}
    20 PoolingDelivery::~PoolingDelivery() {}
    22 int PoolingDelivery::initialize(
char const *name, HyPerCol *hc) {
    23    return BaseDelivery::initialize(name, hc);
    26 void PoolingDelivery::setObjectType() { mObjectType = 
"PoolingDelivery"; }
    38    PVParams *params = parent->parameters();
    40    parent->parameters()->ioParamStringRequired(
    41          ioFlag, name, 
"pvpatchAccumulateType", &mPvpatchAccumulateTypeString);
    42    if (ioFlag == PARAMS_IO_READ) {
    45             mAccumulateType == UNDEFINED,
    46             "pvpatchAccumulateType \"%s\" is unrecognized.\n"    47             "  Allowed values are \"maxpooling\", \"sumpooling\", or \"avgpooling\".\n",
    48             mPvpatchAccumulateTypeString);
    52 PoolingDelivery::AccumulateType
    54    if (poolingTypeString == 
nullptr) {
    57    PoolingDelivery::AccumulateType accType;
    58    std::string str(poolingTypeString);
    61       c = std::tolower(c, std::locale());
    65    if (str.size() >= 4 && (str[3] == 
' ' || str[3] == 
'_')) {
    69    if (strcmp(str.c_str(), 
"maxpooling") == 0) {
    72    else if (strcmp(str.c_str(), 
"sumpooling") == 0) {
    75    else if (strcmp(str.c_str(), 
"avgpooling") == 0) {
    85    auto *params = parent->parameters();
    86    pvAssert(!params->presentAndNotBeenRead(name, 
"receiveGpu"));
    91             "updateGSynFromPostPerspective",
    92             &mUpdateGSynFromPostPerspective,
    93             mUpdateGSynFromPostPerspective);
    96       mUpdateGSynFromPostPerspective = 
true;
    97       params->handleUnnecessaryParameter(name, 
"updateGSynFromPostPerspective", 
true);
   102    parent->parameters()->ioParamValue(
   103          ioFlag, name, 
"needPostIndexLayer", &mNeedPostIndexLayer, mNeedPostIndexLayer);
   107    pvAssert(!parent->parameters()->presentAndNotBeenRead(name, 
"needPostIndexLayer"));
   108    if (mNeedPostIndexLayer) {
   109       parent->parameters()->ioParamStringRequired(
   110             ioFlag, name, 
"postIndexLayerName", &mPostIndexLayerName);
   115 PoolingDelivery::communicateInitInfo(std::shared_ptr<CommunicateInitInfoMessage const> message) {
   116    auto status = BaseDelivery::communicateInitInfo(message);
   121    auto &hierarchy = message->mHierarchy;
   123    mPatchSize = mapLookupByType<PatchSize>(hierarchy, getDescription());
   124    FatalIf(mPatchSize == 
nullptr, 
"%s requires a PatchSize component.\n", getDescription_c());
   126       return Response::POSTPONE;
   129    mWeightsPair = mapLookupByType<ImpliedWeightsPair>(hierarchy, getDescription());
   131          mWeightsPair == 
nullptr,
   132          "%s requires an ImpliedWeightsPair component.\n",
   135       return Response::POSTPONE;
   138    if (mNeedPostIndexLayer) {
   139       pvAssert(mPostIndexLayerName);
   140       auto *objectMapComponent = mapLookupByType<ObjectMapComponent>(hierarchy, getDescription());
   142             objectMapComponent == 
nullptr,
   143             "%s requires an ObjectMapComponent.\n",
   149    if (mUpdateGSynFromPostPerspective) {
   159       getPreLayer()->setAllocDeviceDatastore();
   160       getPostLayer()->setAllocDeviceGSyn();
   161       Weights *weights = mWeightsPair->getPostWeights();
   166       if (!mUpdateGSynFromPostPerspective && getPreLayer()->getSparseFlag()) {
   167          getPreLayer()->setAllocDeviceActiveIndices();
   170 #endif // PV_USE_CUDA   171    return Response::SUCCESS;
   176 PoolingDelivery::setCudaDevice(std::shared_ptr<SetCudaDeviceMessage const> message) {
   178       auto status = BaseDelivery::setCudaDevice(message);
   179       if (status != Response::SUCCESS) {
   182       Weights *weights = mWeightsPair->getPostWeights();
   184       weights->setCudaDevice(message->mCudaDevice);
   186    return Response::SUCCESS;
   188 #endif // PV_USE_CUDA   190 Response::Status PoolingDelivery::allocateDataStructures() {
   192       if (parent->getCommunicator()->globalCommRank() == 0) {
   194                "%s must wait until postIndexLayer \"%s\" has finished its "   195                "allocateDataStructures stage.\n",
   197                mPostIndexLayer->getName());
   199       return Response::POSTPONE;
   201    auto status = BaseDelivery::allocateDataStructures();
   208          return Response::POSTPONE;
   211          return Response::POSTPONE;
   214          return Response::POSTPONE;
   216       initializeDeliverKernelArgs();
   218 #endif // PV_USE_CUDA   219    allocateThreadGSyn();
   220    return Response::SUCCESS;
   224 void PoolingDelivery::initializeDeliverKernelArgs() {
   225    PVCuda::CudaBuffer *d_preDatastore = getPreLayer()->getDeviceDatastore();
   226    PVCuda::CudaBuffer *d_postGSyn     = getPostLayer()->getDeviceGSyn();
   227    Weights *weights                   = mWeightsPair->getPostWeights();
   231    cudnnPoolingMode_t poolingMode;
   233    switch (mAccumulateType) {
   234       case MAXPOOLING: poolingMode = CUDNN_POOLING_MAX; 
break;
   236          poolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
   237          multiplier  = nxpPost * nypPost;
   239       case AVGPOOLING: poolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING; 
break;
   240       default: pvAssert(0); 
break;
   244    mRecvKernel->setArgs(
   245          getPreLayer()->getLayerLoc(),
   246          getPostLayer()->getLayerLoc(),
   255 #endif // PV_USE_CUDA   257 void PoolingDelivery::allocateThreadGSyn() {
   259    int const numThreads = parent->getNumThreads();
   260    if (numThreads > 1) {
   261       mThreadGSyn.resize(numThreads);
   262       mThreadGateIdxBuffer.resize(numThreads);
   265       for (
int th = 0; th < numThreads; th++) {
   266          mThreadGSyn[th].resize(mPostLayer->getNumNeurons());
   267          mThreadGateIdxBuffer[th].resize(mPostLayer->getNumNeurons());
   272 void PoolingDelivery::deliver() {
   274    if (getChannelCode() == CHANNEL_NOUPDATE) {
   281 #endif // PV_USE_CUDA   284       if (mUpdateGSynFromPostPerspective) {
   285          deliverPostsynapticPerspective();
   288          deliverPresynapticPerspective();
   292    mPostLayer->setUpdatedDeviceGSynFlag(!mReceiveGpu);
   293 #endif // PV_USE_CUDA   296 void PoolingDelivery::deliverPostsynapticPerspective() {
   297    PVLayerLoc const *sourceLoc = mPreLayer->getLayerLoc();
   298    PVLayerLoc const *targetLoc = mPostLayer->getLayerLoc();
   299    Weights *postWeights        = mWeightsPair->getPostWeights();
   304    void (*accumulateFunctionPointer)(
   305          int kPreRes, 
int nk, 
float *v, 
float *a, 
float *w, 
void *auxPtr, 
int sf) = 
nullptr;
   306    switch (mAccumulateType) {
   307       case MAXPOOLING: accumulateFunctionPointer = pvpatch_max_pooling_from_post; 
break;
   308       case SUMPOOLING: accumulateFunctionPointer = pvpatch_sum_pooling_from_post; 
break;
   310          accumulateFunctionPointer = pvpatch_sum_pooling_from_post;
   322    if (mAccumulateType == AVGPOOLING) {
   323       float relative_XScale = pow(2, (getPostLayer()->getXScale() - getPreLayer()->getXScale()));
   324       float relative_YScale = pow(2, (getPostLayer()->getYScale() - getPreLayer()->getYScale()));
   325       float nxp             = (float)mPatchSize->getPatchSizeX();
   326       float nyp             = (float)mPatchSize->getPatchSizeY();
   327       w                     = 1.0f / (nxp * relative_XScale * nyp * relative_YScale);
   332    float *gSyn = getPostLayer()->getChannel(getChannelCode());
   336    int const numPostRestricted = mPostLayer->getNumNeurons();
   338    int const sourceNx = sourceLoc->nx;
   339    int const sourceNy = sourceLoc->ny;
   340    int const sourceNf = sourceLoc->nf;
   341    int const targetNx = targetLoc->nx;
   342    int const targetNy = targetLoc->ny;
   343    int const targetNf = targetLoc->nf;
   345    const PVHalo *sourceHalo = &sourceLoc->halo;
   346    const PVHalo *targetHalo = &targetLoc->halo;
   349    int sy = (sourceNx + sourceHalo->lt + sourceHalo->rt) * sourceNf;
   351    clearGateIdxBuffer();
   352    float *gatePatchHead = 
nullptr;
   353    if (mNeedPostIndexLayer) {
   354       gatePatchHead = mPostIndexLayer->getChannel(CHANNEL_EXC);
   357    float resetVal = 0.0f;
   358    if (mAccumulateType == MAXPOOLING) {
   359       resetVal = -INFINITY;
   362    for (
int b = 0; b < parent->getNBatch(); b++) {
   363 #ifdef PV_USE_OPENMP_THREADS   364 #pragma omp parallel for   366       for (
int kTargetRes = 0; kTargetRes < numPostRestricted; kTargetRes++) {
   367          float *activityBatch = activityCube.data
   368                                 + b * (sourceNx + sourceHalo->rt + sourceHalo->lt)
   369                                         * (sourceNy + sourceHalo->up + sourceHalo->dn) * sourceNf;
   370          float *gSynBatchHead = gSyn + b * targetNx * targetNy * targetNf;
   373          int kTargetExt = kIndexExtended(
   382          long startSourceExt = postWeights->
getGeometry()->getUnshrunkenStart(kTargetExt);
   385          float *gSynPatchPos = gSynBatchHead + kTargetRes;
   387          *gSynPatchPos = resetVal;
   389          float *gatePatchPos = 
nullptr;
   390          if (mNeedPostIndexLayer) {
   391             gatePatchPos = gatePatchHead + b * mPostIndexLayer->getNumNeurons() + kTargetRes;
   393             *gatePatchPos = (float)-1;
   396          float *activityStartBuf = &(activityBatch[startSourceExt]);
   402          const PVLayerLoc *postLoc = mPostLayer->getLayerLoc();
   403          int const kfPost          = featureIndex(
   405                postLoc->nx + postLoc->halo.lt + postLoc->halo.rt,
   406                postLoc->ny + postLoc->halo.dn + postLoc->halo.up,
   410          for (
int ky = 0; ky < yPatchSize; ky++) {
   411             int kPreExt = startSourceExt + ky * sy + offset;
   414                         sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt,
   415                         sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up,
   419                         sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt,
   420                         sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up,
   422             int const kfPre = featureIndex(
   424                   sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt,
   425                   sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up,
   427             int const kxPreGlobalExt = kxPreExt + sourceLoc->kx0;
   428             int const kyPreGlobalExt = kyPreExt + sourceLoc->ky0;
   429             int const kPreGlobalExt  = kIndex(
   433                   sourceLoc->nxGlobal + sourceLoc->halo.lt + sourceLoc->halo.rt,
   434                   sourceLoc->nyGlobal + sourceLoc->halo.up + sourceLoc->halo.dn,
   437             float *activityY = &(activityStartBuf[ky * sy + offset]);
   439             (accumulateFunctionPointer)(
   440                   kPreGlobalExt, numPerStride, gSynPatchPos, activityY, &w, gatePatchPos, sf);
   446 void PoolingDelivery::deliverPresynapticPerspective() {
   447    PVLayerLoc const *preLoc  = getPreLayer()->getLayerLoc();
   448    PVLayerLoc const *postLoc = getPostLayer()->getLayerLoc();
   449    Weights *preWeights       = mWeightsPair->getPreWeights();
   454    void (*accumulateFunctionPointer)(
   455          int kPreRes, 
int nk, 
float *v, 
float a, 
float *w, 
void *auxPtr, 
int sf) = 
nullptr;
   456    switch (mAccumulateType) {
   457       case MAXPOOLING: accumulateFunctionPointer = pvpatch_max_pooling; 
break;
   458       case SUMPOOLING: accumulateFunctionPointer = pvpatch_sum_pooling; 
break;
   460          accumulateFunctionPointer = pvpatch_sum_pooling;
   472    if (mAccumulateType == AVGPOOLING) {
   473       float relative_XScale = pow(2, (getPostLayer()->getXScale() - getPreLayer()->getXScale()));
   474       float relative_YScale = pow(2, (getPostLayer()->getYScale() - getPreLayer()->getYScale()));
   475       float nxp             = (float)mPatchSize->getPatchSizeX();
   476       float nyp             = (float)mPatchSize->getPatchSizeY();
   477       w                     = 1.0f / (nxp * relative_XScale * nyp * relative_YScale);
   482    float *gSyn = getPostLayer()->getChannel(getChannelCode());
   486    if (mAccumulateType == MAXPOOLING) {
   487       resetVal = -INFINITY;
   488 #ifdef PV_USE_OPENMP_THREADS   489 #pragma omp parallel for   491       for (
int i = 0; i < getPostLayer()->getNumNeuronsAllBatches(); i++) {
   496    clearGateIdxBuffer();
   498    for (
int b = 0; b < mPreLayer->getLayerLoc()->nbatch; b++) {
   499       float *activityBatch = activityCube.data
   500                              + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt)
   501                                      * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn)
   503       float *gSynPatchHeadBatch = gSyn + b * postLoc->nx * postLoc->ny * postLoc->nf;
   504       float *gatePatchHeadBatch = NULL;
   505       if (mNeedPostIndexLayer) {
   507                mPostIndexLayer->getChannel(CHANNEL_EXC) + b * mPostIndexLayer->getNumNeurons();
   511       if (activityCube.isSparse) {
   513                               + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt)
   514                                       * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn)
   517       int numLoop = activityCube.isSparse ? activityCube.numActive[b] : mPreLayer->getNumExtended();
   519       if (!mThreadGateIdxBuffer.empty()) {
   520 #ifdef PV_USE_OPENMP_THREADS   521 #pragma omp parallel for   523          for (
int i = 0; i < parent->getNumThreads() * getPostLayer()->getNumNeurons(); i++) {
   524             int ti                       = i / getPostLayer()->getNumNeurons();
   525             int ni                       = i % getPostLayer()->getNumNeurons();
   526             mThreadGateIdxBuffer[ti][ni] = -1;
   530 #ifdef PV_USE_OPENMP_THREADS   532       if (!mThreadGSyn.empty()) {
   533          int numNeurons = getPostLayer()->getNumNeurons();
   534 #ifdef PV_USE_OPENMP_THREADS   535 #pragma omp parallel for   537          for (
int i = 0; i < parent->getNumThreads() * numNeurons; i++) {
   538             int ti              = i / numNeurons;
   539             int ni              = i % numNeurons;
   540             mThreadGSyn[ti][ni] = resetVal;
   543 #endif // PV_USE_OPENMP_THREADS   544       std::size_t 
const *gSynPatchStart = preWeights->
getGeometry()->getGSynPatchStart().data();
   546 #ifdef PV_USE_OPENMP_THREADS   547 #pragma omp parallel for schedule(static)   549       for (
int loopIndex = 0; loopIndex < numLoop; loopIndex++) {
   552          if (activityCube.isSparse) {
   553             kPreExt = activeIndicesBatch[loopIndex].index;
   554             a       = activeIndicesBatch[loopIndex].value;
   558             a       = activityBatch[kPreExt];
   562          float *gSynPatchHead;
   563          float *gatePatchHead = NULL;
   564 #ifdef PV_USE_OPENMP_THREADS   565          if (!mThreadGSyn.empty()) {
   566             int ti        = omp_get_thread_num();
   567             gSynPatchHead = mThreadGSyn[ti].data();
   570             gSynPatchHead = gSynPatchHeadBatch;
   573          if (mNeedPostIndexLayer) {
   574             if (!mThreadGateIdxBuffer.empty()) {
   575                int ti        = omp_get_thread_num();
   576                gatePatchHead = mThreadGateIdxBuffer[ti].data();
   579                gatePatchHead = gatePatchHeadBatch;
   582 #else // PV_USE_OPENMP_THREADS   583          gSynPatchHead = gSynPatchHeadBatch;
   584          if (mNeedPostIndexLayer) {
   585             gatePatchHead = gatePatchHeadBatch;
   587 #endif // PV_USE_OPENMP_THREADS   590          int const ny              = patch->ny;
   591          int const sy              = postLoc->nx * postLoc->nf; 
   592          float *postPatchStart     = &gSynPatchHead[gSynPatchStart[kPreExt]];
   593          float *postGatePatchStart = &gatePatchHead[gSynPatchStart[kPreExt]];
   597                      preLoc->nx + preLoc->halo.lt + preLoc->halo.rt,
   598                      preLoc->ny + preLoc->halo.dn + preLoc->halo.up,
   602                      preLoc->nx + preLoc->halo.lt + preLoc->halo.rt,
   603                      preLoc->ny + preLoc->halo.dn + preLoc->halo.up,
   605          int const kfPre = featureIndex(
   607                preLoc->nx + preLoc->halo.lt + preLoc->halo.rt,
   608                preLoc->ny + preLoc->halo.dn + preLoc->halo.up,
   611          int const kxPreGlobalExt = kxPreExt + preLoc->kx0;
   612          int const kyPreGlobalExt = kyPreExt + preLoc->ky0;
   614          int const kPreGlobalExt = kIndex(
   618                preLoc->nxGlobal + preLoc->halo.lt + preLoc->halo.rt,
   619                preLoc->nyGlobal + preLoc->halo.up + preLoc->halo.dn,
   624          void *auxPtr = 
nullptr;
   625          for (
int y = 0; y < ny; y++) {
   626             if (mNeedPostIndexLayer) {
   627                auxPtr = &postGatePatchStart[y * sy + offset];
   629             (accumulateFunctionPointer)(
   630                   kPreGlobalExt, nk, postPatchStart + y * sy + offset, a, &w, auxPtr, sf);
   633 #ifdef PV_USE_OPENMP_THREADS   637       if (!mThreadGSyn.empty()) {
   638          float *gSynPatchHead = gSynPatchHeadBatch;
   639          float *gateIdxBuffer = 
nullptr;
   640          if (mNeedPostIndexLayer && !mThreadGateIdxBuffer.empty()) {
   641             gateIdxBuffer = gatePatchHeadBatch;
   643          int numNeurons = getPostLayer()->getNumNeurons();
   645 #pragma omp parallel for   646          for (
int ni = 0; ni < numNeurons; ni++) {
   648             if (mAccumulateType == MAXPOOLING) {
   649                for (
int ti = 0; ti < parent->getNumThreads(); ti++) {
   650                   if (gSynPatchHead[ni] < mThreadGSyn[ti][ni]) {
   651                      gSynPatchHead[ni] = mThreadGSyn[ti][ni];
   652                      if (mNeedPostIndexLayer && !mThreadGateIdxBuffer.empty()) {
   653                         gateIdxBuffer[ni] = mThreadGateIdxBuffer[ti][ni];
   659                for (
int ti = 0; ti < parent->getNumThreads(); ti++) {
   660                   gSynPatchHead[ni] += mThreadGSyn[ti][ni];
   667    if (activityCube.isSparse) {
   668       for (
int k = 0; k < getPostLayer()->getNumNeuronsAllBatches(); k++) {
   669          if (gSyn[k] == -INFINITY) {
   676 void PoolingDelivery::clearGateIdxBuffer() {
   677    if (mNeedPostIndexLayer) {
   679       resetGSynBuffers_PoolingIndexLayer(
   680             mPostIndexLayer->getLayerLoc()->nbatch,
   681             mPostIndexLayer->getNumNeurons(),
   682             mPostIndexLayer->getNumChannels(),
   683             mPostIndexLayer->getChannel(CHANNEL_EXC));
   689    if (getChannelCode() != CHANNEL_NOUPDATE) {
   696 void PoolingDelivery::deliverGPU() {
   698          getChannelCode() != CHANNEL_NOUPDATE); 
   699    pvAssert(mPostLayer->getChannel(getChannelCode()));
   701    if (mPreLayer->getUpdatedDeviceDatastoreFlag()) {
   703       float *h_preDatastore              = activityCube.data;
   704       PVCuda::CudaBuffer *d_preDatastore = mPreLayer->getDeviceDatastore();
   705       pvAssert(d_preDatastore);
   706       d_preDatastore->copyToDevice(h_preDatastore);
   708       mPreLayer->setUpdatedDeviceDatastoreFlag(
false);
   713 #endif // PV_USE_CUDA virtual bool isAllInputReady() override
void ioParam_needPostIndexLayer(enum ParamsIOFlag ioFlag)
int getPatchSizeX() const 
virtual void ioParam_updateGSynFromPostPerspective(enum ParamsIOFlag ioFlag)
updateGSynFromPostPerspective: Specifies if the connection should push from pre or pull from post...
PVLayerCube createCube(int delay=0)
void ioParam_postIndexLayerName(enum ParamsIOFlag ioFlag)
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
bool isExchangeFinished(int delay=0)
bool getDataStructuresAllocatedFlag() const 
virtual void ioParam_pvpatchAccumulateType(enum ParamsIOFlag ioFlag)
pvpatchAccumulateType: Specifies the method to accumulate synaptic input 
static AccumulateType parseAccumulateTypeString(char const *typestring)
static bool completed(Status &a)
Patch const & getPatch(int patchIndex) const 
int getPatchSizeY() const 
std::shared_ptr< PatchGeometry > getGeometry() const 
int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
int getPatchSizeF() const 
bool getInitInfoCommunicatedFlag() const