PetaVision  Alpha
PoolingDelivery.cpp
1 /*
2  * PoolingDelivery.cpp
3  *
4  * Created on: Aug 24, 2017
5  * Author: Pete Schultz
6  */
7 
8 #include "PoolingDelivery.hpp"
9 #include "columns/HyPerCol.hpp"
10 #include "columns/ObjectMapComponent.hpp"
11 #include "delivery/accumulate_functions.hpp"
12 #include "utils/MapLookupByType.hpp"
13 
14 namespace PV {
15 
16 PoolingDelivery::PoolingDelivery(char const *name, HyPerCol *hc) { initialize(name, hc); }
17 
18 PoolingDelivery::PoolingDelivery() {}
19 
20 PoolingDelivery::~PoolingDelivery() {}
21 
22 int PoolingDelivery::initialize(char const *name, HyPerCol *hc) {
23  return BaseDelivery::initialize(name, hc);
24 }
25 
26 void PoolingDelivery::setObjectType() { mObjectType = "PoolingDelivery"; }
27 
28 int PoolingDelivery::ioParamsFillGroup(enum ParamsIOFlag ioFlag) {
29  int status = BaseDelivery::ioParamsFillGroup(ioFlag);
34  return PV_SUCCESS;
35 }
36 
37 void PoolingDelivery::ioParam_pvpatchAccumulateType(enum ParamsIOFlag ioFlag) {
38  PVParams *params = parent->parameters();
39 
40  parent->parameters()->ioParamStringRequired(
41  ioFlag, name, "pvpatchAccumulateType", &mPvpatchAccumulateTypeString);
42  if (ioFlag == PARAMS_IO_READ) {
43  mAccumulateType = parseAccumulateTypeString(mPvpatchAccumulateTypeString);
44  FatalIf(
45  mAccumulateType == UNDEFINED,
46  "pvpatchAccumulateType \"%s\" is unrecognized.\n"
47  " Allowed values are \"maxpooling\", \"sumpooling\", or \"avgpooling\".\n",
48  mPvpatchAccumulateTypeString);
49  }
50 }
51 
52 PoolingDelivery::AccumulateType
53 PoolingDelivery::parseAccumulateTypeString(char const *poolingTypeString) {
54  if (poolingTypeString == nullptr) {
55  return UNDEFINED;
56  }
57  PoolingDelivery::AccumulateType accType;
58  std::string str(poolingTypeString);
59  // Convert string to lowercase so that capitalization doesn't matter.
60  for (auto &c : str) {
61  c = std::tolower(c, std::locale());
62  }
63  // "max_pooling", "max pooling", "maxpooling" are equally acceptable (same for
64  // sum and avg)
65  if (str.size() >= 4 && (str[3] == ' ' || str[3] == '_')) {
66  str.erase(3, 1);
67  }
68 
69  if (strcmp(str.c_str(), "maxpooling") == 0) {
70  accType = MAXPOOLING;
71  }
72  else if (strcmp(str.c_str(), "sumpooling") == 0) {
73  accType = SUMPOOLING;
74  }
75  else if (strcmp(str.c_str(), "avgpooling") == 0) {
76  accType = AVGPOOLING;
77  }
78  else {
79  accType = UNDEFINED;
80  }
81  return accType;
82 }
83 
85  auto *params = parent->parameters();
86  pvAssert(!params->presentAndNotBeenRead(name, "receiveGpu"));
87  if (!mReceiveGpu) {
88  params->ioParamValue(
89  ioFlag,
90  name,
91  "updateGSynFromPostPerspective",
92  &mUpdateGSynFromPostPerspective,
93  mUpdateGSynFromPostPerspective);
94  }
95  else {
96  mUpdateGSynFromPostPerspective = true;
97  params->handleUnnecessaryParameter(name, "updateGSynFromPostPerspective", true);
98  }
99 }
100 
101 void PoolingDelivery::ioParam_needPostIndexLayer(enum ParamsIOFlag ioFlag) {
102  parent->parameters()->ioParamValue(
103  ioFlag, name, "needPostIndexLayer", &mNeedPostIndexLayer, mNeedPostIndexLayer);
104 }
105 
106 void PoolingDelivery::ioParam_postIndexLayerName(enum ParamsIOFlag ioFlag) {
107  pvAssert(!parent->parameters()->presentAndNotBeenRead(name, "needPostIndexLayer"));
108  if (mNeedPostIndexLayer) {
109  parent->parameters()->ioParamStringRequired(
110  ioFlag, name, "postIndexLayerName", &mPostIndexLayerName);
111  }
112 }
113 
114 Response::Status
115 PoolingDelivery::communicateInitInfo(std::shared_ptr<CommunicateInitInfoMessage const> message) {
116  auto status = BaseDelivery::communicateInitInfo(message);
117  if (!Response::completed(status)) {
118  return status;
119  }
120 
121  auto &hierarchy = message->mHierarchy;
122 
123  mPatchSize = mapLookupByType<PatchSize>(hierarchy, getDescription());
124  FatalIf(mPatchSize == nullptr, "%s requires a PatchSize component.\n", getDescription_c());
125  if (!mPatchSize->getInitInfoCommunicatedFlag()) {
126  return Response::POSTPONE;
127  }
128 
129  mWeightsPair = mapLookupByType<ImpliedWeightsPair>(hierarchy, getDescription());
130  FatalIf(
131  mWeightsPair == nullptr,
132  "%s requires an ImpliedWeightsPair component.\n",
133  getDescription_c());
134  if (!mWeightsPair->getInitInfoCommunicatedFlag()) {
135  return Response::POSTPONE;
136  }
137 
138  if (mNeedPostIndexLayer) {
139  pvAssert(mPostIndexLayerName);
140  auto *objectMapComponent = mapLookupByType<ObjectMapComponent>(hierarchy, getDescription());
141  FatalIf(
142  objectMapComponent == nullptr,
143  "%s requires an ObjectMapComponent.\n",
144  getDescription_c());
145  mPostIndexLayer =
146  objectMapComponent->lookup<PoolingIndexLayer>(std::string(mPostIndexLayerName));
147  }
148 
149  if (mUpdateGSynFromPostPerspective) {
150  mWeightsPair->needPost();
151  }
152  else {
153  mWeightsPair->needPre();
154  }
155 
156 #ifdef PV_USE_CUDA
157  if (mReceiveGpu) {
158  // we need pre datastore, weights, and post gsyn for the channelCode allocated on the GPU.
159  getPreLayer()->setAllocDeviceDatastore();
160  getPostLayer()->setAllocDeviceGSyn();
161  Weights *weights = mWeightsPair->getPostWeights();
162  pvAssert(weights);
163  weights->useGPU();
164 
165  // If recv from pre and pre layer is sparse, allocate activeIndices
166  if (!mUpdateGSynFromPostPerspective && getPreLayer()->getSparseFlag()) {
167  getPreLayer()->setAllocDeviceActiveIndices();
168  }
169  }
170 #endif // PV_USE_CUDA
171  return Response::SUCCESS;
172 }
173 
174 #ifdef PV_USE_CUDA
175 Response::Status
176 PoolingDelivery::setCudaDevice(std::shared_ptr<SetCudaDeviceMessage const> message) {
177  if (mUsingGPUFlag) {
178  auto status = BaseDelivery::setCudaDevice(message);
179  if (status != Response::SUCCESS) {
180  return status;
181  }
182  Weights *weights = mWeightsPair->getPostWeights();
183  pvAssert(weights);
184  weights->setCudaDevice(message->mCudaDevice);
185  }
186  return Response::SUCCESS;
187 }
188 #endif // PV_USE_CUDA
189 
190 Response::Status PoolingDelivery::allocateDataStructures() {
191  if (mPostIndexLayer and !mPostIndexLayer->getDataStructuresAllocatedFlag()) {
192  if (parent->getCommunicator()->globalCommRank() == 0) {
193  InfoLog().printf(
194  "%s must wait until postIndexLayer \"%s\" has finished its "
195  "allocateDataStructures stage.\n",
196  getDescription_c(),
197  mPostIndexLayer->getName());
198  }
199  return Response::POSTPONE;
200  }
201  auto status = BaseDelivery::allocateDataStructures();
202  if (!Response::completed(status)) {
203  return status;
204  }
205 #ifdef PV_USE_CUDA
206  if (mReceiveGpu) {
207  if (!getPreLayer()->getDataStructuresAllocatedFlag()) {
208  return Response::POSTPONE;
209  }
210  if (!getPostLayer()->getDataStructuresAllocatedFlag()) {
211  return Response::POSTPONE;
212  }
213  if (!mWeightsPair->getDataStructuresAllocatedFlag()) {
214  return Response::POSTPONE;
215  }
216  initializeDeliverKernelArgs();
217  }
218 #endif // PV_USE_CUDA
219  allocateThreadGSyn();
220  return Response::SUCCESS;
221 }
222 
223 #ifdef PV_USE_CUDA
224 void PoolingDelivery::initializeDeliverKernelArgs() {
225  PVCuda::CudaBuffer *d_preDatastore = getPreLayer()->getDeviceDatastore();
226  PVCuda::CudaBuffer *d_postGSyn = getPostLayer()->getDeviceGSyn();
227  Weights *weights = mWeightsPair->getPostWeights();
228  pvAssert(weights);
229  int const nxpPost = weights->getPatchSizeX();
230  int const nypPost = weights->getPatchSizeY();
231  cudnnPoolingMode_t poolingMode;
232  int multiplier = 1;
233  switch (mAccumulateType) {
234  case MAXPOOLING: poolingMode = CUDNN_POOLING_MAX; break;
235  case SUMPOOLING:
236  poolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
237  multiplier = nxpPost * nypPost;
238  break;
239  case AVGPOOLING: poolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING; break;
240  default: pvAssert(0); break;
241  }
242 
243  mRecvKernel = new PVCuda::CudaPoolingDeliverKernel(parent->getDevice());
244  mRecvKernel->setArgs(
245  getPreLayer()->getLayerLoc(),
246  getPostLayer()->getLayerLoc(),
247  nxpPost,
248  nypPost,
249  poolingMode,
250  multiplier,
251  d_preDatastore,
252  d_postGSyn,
253  (int)mChannelCode);
254 }
255 #endif // PV_USE_CUDA
256 
257 void PoolingDelivery::allocateThreadGSyn() {
258  // If multithreaded, allocate a GSyn buffer for each thread, to avoid collisions.
259  int const numThreads = parent->getNumThreads();
260  if (numThreads > 1) {
261  mThreadGSyn.resize(numThreads);
262  mThreadGateIdxBuffer.resize(numThreads);
263  // mThreadGSyn is only a buffer for one batch element. We're threading over presynaptic
264  // neuron index, not batch element; so batch elements will be processed serially.
265  for (int th = 0; th < numThreads; th++) {
266  mThreadGSyn[th].resize(mPostLayer->getNumNeurons());
267  mThreadGateIdxBuffer[th].resize(mPostLayer->getNumNeurons());
268  }
269  }
270 }
271 
272 void PoolingDelivery::deliver() {
273  // Check if we need to update based on connection's channel
274  if (getChannelCode() == CHANNEL_NOUPDATE) {
275  return;
276  }
277 
278  if (mReceiveGpu) {
279 #ifdef PV_USE_CUDA
280  deliverGPU();
281 #endif // PV_USE_CUDA
282  }
283  else {
284  if (mUpdateGSynFromPostPerspective) {
285  deliverPostsynapticPerspective();
286  }
287  else {
288  deliverPresynapticPerspective();
289  }
290  }
291 #ifdef PV_USE_CUDA
292  mPostLayer->setUpdatedDeviceGSynFlag(!mReceiveGpu);
293 #endif // PV_USE_CUDA
294 }
295 
296 void PoolingDelivery::deliverPostsynapticPerspective() {
297  PVLayerLoc const *sourceLoc = mPreLayer->getLayerLoc();
298  PVLayerLoc const *targetLoc = mPostLayer->getLayerLoc();
299  Weights *postWeights = mWeightsPair->getPostWeights();
300 
301  // Slightly inefficient to define the function pointer each time deliver() is called;
302  // but the real inefficiency is calling the function pointer in a tight for-loop.
303  // TODO: Use templating instead of function pointer.
304  void (*accumulateFunctionPointer)(
305  int kPreRes, int nk, float *v, float *a, float *w, void *auxPtr, int sf) = nullptr;
306  switch (mAccumulateType) {
307  case MAXPOOLING: accumulateFunctionPointer = pvpatch_max_pooling_from_post; break;
308  case SUMPOOLING: accumulateFunctionPointer = pvpatch_sum_pooling_from_post; break;
309  case AVGPOOLING:
310  accumulateFunctionPointer = pvpatch_sum_pooling_from_post;
311  // Division by the number of weights happens outside the call to the accumulate function.
312  break;
313  default:
314  pvAssert(0);
315  // Only MAXPOOLING, SUMPOOLING, AVGPOOLING are allowed.
316  // UNDEFINED is the only other possible value of mAccumulateType, but the type should be
317  // defined before this function is ever called.
318  break;
319  }
320 
321  float w = 1.0f;
322  if (mAccumulateType == AVGPOOLING) {
323  float relative_XScale = pow(2, (getPostLayer()->getXScale() - getPreLayer()->getXScale()));
324  float relative_YScale = pow(2, (getPostLayer()->getYScale() - getPreLayer()->getYScale()));
325  float nxp = (float)mPatchSize->getPatchSizeX();
326  float nyp = (float)mPatchSize->getPatchSizeY();
327  w = 1.0f / (nxp * relative_XScale * nyp * relative_YScale);
328  }
329 
330  PVLayerCube activityCube = mPreLayer->getPublisher()->createCube(0 /*delay*/);
331 
332  float *gSyn = getPostLayer()->getChannel(getChannelCode());
333  pvAssert(gSyn);
334 
335  // Get number of neurons restricted target
336  int const numPostRestricted = mPostLayer->getNumNeurons();
337 
338  int const sourceNx = sourceLoc->nx;
339  int const sourceNy = sourceLoc->ny;
340  int const sourceNf = sourceLoc->nf;
341  int const targetNx = targetLoc->nx;
342  int const targetNy = targetLoc->ny;
343  int const targetNf = targetLoc->nf;
344 
345  const PVHalo *sourceHalo = &sourceLoc->halo;
346  const PVHalo *targetHalo = &targetLoc->halo;
347 
348  // get source layer's extended y stride
349  int sy = (sourceNx + sourceHalo->lt + sourceHalo->rt) * sourceNf;
350 
351  clearGateIdxBuffer();
352  float *gatePatchHead = nullptr;
353  if (mNeedPostIndexLayer) {
354  gatePatchHead = mPostIndexLayer->getChannel(CHANNEL_EXC);
355  }
356 
357  float resetVal = 0.0f;
358  if (mAccumulateType == MAXPOOLING) {
359  resetVal = -INFINITY;
360  }
361 
362  for (int b = 0; b < parent->getNBatch(); b++) {
363 #ifdef PV_USE_OPENMP_THREADS
364 #pragma omp parallel for
365 #endif
366  for (int kTargetRes = 0; kTargetRes < numPostRestricted; kTargetRes++) {
367  float *activityBatch = activityCube.data
368  + b * (sourceNx + sourceHalo->rt + sourceHalo->lt)
369  * (sourceNy + sourceHalo->up + sourceHalo->dn) * sourceNf;
370  float *gSynBatchHead = gSyn + b * targetNx * targetNy * targetNf;
371 
372  // Change restricted to extended post neuron
373  int kTargetExt = kIndexExtended(
374  kTargetRes,
375  targetNx,
376  targetNy,
377  targetNf,
378  targetHalo->lt,
379  targetHalo->rt,
380  targetHalo->dn,
381  targetHalo->up);
382  long startSourceExt = postWeights->getGeometry()->getUnshrunkenStart(kTargetExt);
383 
384  // Calculate target's start of gsyn
385  float *gSynPatchPos = gSynBatchHead + kTargetRes;
386  // Initialize patch as a huge negative number
387  *gSynPatchPos = resetVal;
388 
389  float *gatePatchPos = nullptr;
390  if (mNeedPostIndexLayer) {
391  gatePatchPos = gatePatchHead + b * mPostIndexLayer->getNumNeurons() + kTargetRes;
392  // Initialize gatePatchPos as a negative number
393  *gatePatchPos = (float)-1;
394  }
395 
396  float *activityStartBuf = &(activityBatch[startSourceExt]);
397 
398  int sf = postWeights->getPatchSizeF();
399  int yPatchSize = postWeights->getPatchSizeY();
400  int numPerStride = postWeights->getPatchSizeX() * postWeights->getPatchSizeF();
401 
402  const PVLayerLoc *postLoc = mPostLayer->getLayerLoc();
403  int const kfPost = featureIndex(
404  kTargetExt,
405  postLoc->nx + postLoc->halo.lt + postLoc->halo.rt,
406  postLoc->ny + postLoc->halo.dn + postLoc->halo.up,
407  postLoc->nf);
408  int offset = kfPost;
409 
410  for (int ky = 0; ky < yPatchSize; ky++) {
411  int kPreExt = startSourceExt + ky * sy + offset;
412  int const kxPreExt =
413  kxPos(kPreExt,
414  sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt,
415  sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up,
416  sourceLoc->nf);
417  int const kyPreExt =
418  kyPos(kPreExt,
419  sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt,
420  sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up,
421  sourceLoc->nf);
422  int const kfPre = featureIndex(
423  kPreExt,
424  sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt,
425  sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up,
426  sourceLoc->nf);
427  int const kxPreGlobalExt = kxPreExt + sourceLoc->kx0;
428  int const kyPreGlobalExt = kyPreExt + sourceLoc->ky0;
429  int const kPreGlobalExt = kIndex(
430  kxPreGlobalExt,
431  kyPreGlobalExt,
432  kfPre,
433  sourceLoc->nxGlobal + sourceLoc->halo.lt + sourceLoc->halo.rt,
434  sourceLoc->nyGlobal + sourceLoc->halo.up + sourceLoc->halo.dn,
435  sourceLoc->nf);
436 
437  float *activityY = &(activityStartBuf[ky * sy + offset]);
438 
439  (accumulateFunctionPointer)(
440  kPreGlobalExt, numPerStride, gSynPatchPos, activityY, &w, gatePatchPos, sf);
441  }
442  }
443  }
444 }
445 
446 void PoolingDelivery::deliverPresynapticPerspective() {
447  PVLayerLoc const *preLoc = getPreLayer()->getLayerLoc();
448  PVLayerLoc const *postLoc = getPostLayer()->getLayerLoc();
449  Weights *preWeights = mWeightsPair->getPreWeights();
450 
451  // Slightly inefficient to define the function pointer each time deliver() is called;
452  // but the real inefficiency is calling the function pointer in a tight for-loop.
453  // TODO: Use templating instead of function pointer.
454  void (*accumulateFunctionPointer)(
455  int kPreRes, int nk, float *v, float a, float *w, void *auxPtr, int sf) = nullptr;
456  switch (mAccumulateType) {
457  case MAXPOOLING: accumulateFunctionPointer = pvpatch_max_pooling; break;
458  case SUMPOOLING: accumulateFunctionPointer = pvpatch_sum_pooling; break;
459  case AVGPOOLING:
460  accumulateFunctionPointer = pvpatch_sum_pooling;
461  // Division by the number of weights happens outside the call to the accumulate function.
462  break;
463  default:
464  pvAssert(0);
465  // Only MAXPOOLING, SUMPOOLING, AVGPOOLING are allowed.
466  // UNDEFINED is the only possible value of mAccumulateType, but the type should be
467  // defined before this function is ever called.
468  break;
469  }
470 
471  float w = 1.0f;
472  if (mAccumulateType == AVGPOOLING) {
473  float relative_XScale = pow(2, (getPostLayer()->getXScale() - getPreLayer()->getXScale()));
474  float relative_YScale = pow(2, (getPostLayer()->getYScale() - getPreLayer()->getYScale()));
475  float nxp = (float)mPatchSize->getPatchSizeX();
476  float nyp = (float)mPatchSize->getPatchSizeY();
477  w = 1.0f / (nxp * relative_XScale * nyp * relative_YScale);
478  }
479 
480  PVLayerCube activityCube = mPreLayer->getPublisher()->createCube(0 /*delay*/);
481 
482  float *gSyn = getPostLayer()->getChannel(getChannelCode());
483  pvAssert(gSyn);
484 
485  float resetVal = 0;
486  if (mAccumulateType == MAXPOOLING) {
487  resetVal = -INFINITY;
488 #ifdef PV_USE_OPENMP_THREADS
489 #pragma omp parallel for
490 #endif
491  for (int i = 0; i < getPostLayer()->getNumNeuronsAllBatches(); i++) {
492  gSyn[i] = resetVal;
493  }
494  }
495 
496  clearGateIdxBuffer();
497 
498  for (int b = 0; b < mPreLayer->getLayerLoc()->nbatch; b++) {
499  float *activityBatch = activityCube.data
500  + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt)
501  * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn)
502  * preLoc->nf;
503  float *gSynPatchHeadBatch = gSyn + b * postLoc->nx * postLoc->ny * postLoc->nf;
504  float *gatePatchHeadBatch = NULL;
505  if (mNeedPostIndexLayer) {
506  gatePatchHeadBatch =
507  mPostIndexLayer->getChannel(CHANNEL_EXC) + b * mPostIndexLayer->getNumNeurons();
508  }
509 
510  SparseList<float>::Entry const *activeIndicesBatch = NULL;
511  if (activityCube.isSparse) {
512  activeIndicesBatch = (SparseList<float>::Entry *)activityCube.activeIndices
513  + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt)
514  * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn)
515  * preLoc->nf;
516  }
517  int numLoop = activityCube.isSparse ? activityCube.numActive[b] : mPreLayer->getNumExtended();
518 
519  if (!mThreadGateIdxBuffer.empty()) {
520 #ifdef PV_USE_OPENMP_THREADS
521 #pragma omp parallel for
522 #endif
523  for (int i = 0; i < parent->getNumThreads() * getPostLayer()->getNumNeurons(); i++) {
524  int ti = i / getPostLayer()->getNumNeurons();
525  int ni = i % getPostLayer()->getNumNeurons();
526  mThreadGateIdxBuffer[ti][ni] = -1;
527  }
528  }
529 
530 #ifdef PV_USE_OPENMP_THREADS
531  // Clear all gsyn buffers
532  if (!mThreadGSyn.empty()) {
533  int numNeurons = getPostLayer()->getNumNeurons();
534 #ifdef PV_USE_OPENMP_THREADS
535 #pragma omp parallel for
536 #endif
537  for (int i = 0; i < parent->getNumThreads() * numNeurons; i++) {
538  int ti = i / numNeurons;
539  int ni = i % numNeurons;
540  mThreadGSyn[ti][ni] = resetVal;
541  }
542  }
543 #endif // PV_USE_OPENMP_THREADS
544  std::size_t const *gSynPatchStart = preWeights->getGeometry()->getGSynPatchStart().data();
545 
546 #ifdef PV_USE_OPENMP_THREADS
547 #pragma omp parallel for schedule(static)
548 #endif
549  for (int loopIndex = 0; loopIndex < numLoop; loopIndex++) {
550  int kPreExt;
551  float a; // We never convert rates to spike counts in pooling conns
552  if (activityCube.isSparse) {
553  kPreExt = activeIndicesBatch[loopIndex].index;
554  a = activeIndicesBatch[loopIndex].value;
555  }
556  else {
557  kPreExt = loopIndex;
558  a = activityBatch[kPreExt];
559  }
560 
561  // If we're using mThreadGSyn, set this here
562  float *gSynPatchHead;
563  float *gatePatchHead = NULL;
564 #ifdef PV_USE_OPENMP_THREADS
565  if (!mThreadGSyn.empty()) {
566  int ti = omp_get_thread_num();
567  gSynPatchHead = mThreadGSyn[ti].data();
568  }
569  else {
570  gSynPatchHead = gSynPatchHeadBatch;
571  }
572 
573  if (mNeedPostIndexLayer) {
574  if (!mThreadGateIdxBuffer.empty()) {
575  int ti = omp_get_thread_num();
576  gatePatchHead = mThreadGateIdxBuffer[ti].data();
577  }
578  else {
579  gatePatchHead = gatePatchHeadBatch;
580  }
581  }
582 #else // PV_USE_OPENMP_THREADS
583  gSynPatchHead = gSynPatchHeadBatch;
584  if (mNeedPostIndexLayer) {
585  gatePatchHead = gatePatchHeadBatch;
586  }
587 #endif // PV_USE_OPENMP_THREADS
588  Patch const *patch = &preWeights->getPatch(kPreExt);
589  int const nk = patch->nx * preWeights->getPatchSizeF();
590  int const ny = patch->ny;
591  int const sy = postLoc->nx * postLoc->nf; // stride in restricted layer
592  float *postPatchStart = &gSynPatchHead[gSynPatchStart[kPreExt]];
593  float *postGatePatchStart = &gatePatchHead[gSynPatchStart[kPreExt]];
594 
595  int const kxPreExt =
596  kxPos(kPreExt,
597  preLoc->nx + preLoc->halo.lt + preLoc->halo.rt,
598  preLoc->ny + preLoc->halo.dn + preLoc->halo.up,
599  preLoc->nf);
600  int const kyPreExt =
601  kyPos(kPreExt,
602  preLoc->nx + preLoc->halo.lt + preLoc->halo.rt,
603  preLoc->ny + preLoc->halo.dn + preLoc->halo.up,
604  preLoc->nf);
605  int const kfPre = featureIndex(
606  kPreExt,
607  preLoc->nx + preLoc->halo.lt + preLoc->halo.rt,
608  preLoc->ny + preLoc->halo.dn + preLoc->halo.up,
609  preLoc->nf);
610 
611  int const kxPreGlobalExt = kxPreExt + preLoc->kx0;
612  int const kyPreGlobalExt = kyPreExt + preLoc->ky0;
613 
614  int const kPreGlobalExt = kIndex(
615  kxPreGlobalExt,
616  kyPreGlobalExt,
617  kfPre,
618  preLoc->nxGlobal + preLoc->halo.lt + preLoc->halo.rt,
619  preLoc->nyGlobal + preLoc->halo.up + preLoc->halo.dn,
620  preLoc->nf);
621 
622  int offset = kfPre;
623  int sf = preWeights->getPatchSizeF();
624  void *auxPtr = nullptr;
625  for (int y = 0; y < ny; y++) {
626  if (mNeedPostIndexLayer) {
627  auxPtr = &postGatePatchStart[y * sy + offset];
628  }
629  (accumulateFunctionPointer)(
630  kPreGlobalExt, nk, postPatchStart + y * sy + offset, a, &w, auxPtr, sf);
631  }
632  }
633 #ifdef PV_USE_OPENMP_THREADS
634  // Accumulate back into gSyn // Should this be done in HyPerLayer where it
635  // can be done once,
636  // as opposed to once per connection?
637  if (!mThreadGSyn.empty()) {
638  float *gSynPatchHead = gSynPatchHeadBatch;
639  float *gateIdxBuffer = nullptr;
640  if (mNeedPostIndexLayer && !mThreadGateIdxBuffer.empty()) {
641  gateIdxBuffer = gatePatchHeadBatch;
642  }
643  int numNeurons = getPostLayer()->getNumNeurons();
644 // Looping over neurons first to be thread safe
645 #pragma omp parallel for
646  for (int ni = 0; ni < numNeurons; ni++) {
647  // Different for maxpooling
648  if (mAccumulateType == MAXPOOLING) {
649  for (int ti = 0; ti < parent->getNumThreads(); ti++) {
650  if (gSynPatchHead[ni] < mThreadGSyn[ti][ni]) {
651  gSynPatchHead[ni] = mThreadGSyn[ti][ni];
652  if (mNeedPostIndexLayer && !mThreadGateIdxBuffer.empty()) {
653  gateIdxBuffer[ni] = mThreadGateIdxBuffer[ti][ni];
654  }
655  }
656  }
657  }
658  else {
659  for (int ti = 0; ti < parent->getNumThreads(); ti++) {
660  gSynPatchHead[ni] += mThreadGSyn[ti][ni];
661  }
662  }
663  }
664  }
665 #endif
666  }
667  if (activityCube.isSparse) {
668  for (int k = 0; k < getPostLayer()->getNumNeuronsAllBatches(); k++) {
669  if (gSyn[k] == -INFINITY) {
670  gSyn[k] = 0.0f;
671  }
672  }
673  }
674 }
675 
676 void PoolingDelivery::clearGateIdxBuffer() {
677  if (mNeedPostIndexLayer) {
678  // Reset mPostIndexLayer's gsyn
679  resetGSynBuffers_PoolingIndexLayer(
680  mPostIndexLayer->getLayerLoc()->nbatch,
681  mPostIndexLayer->getNumNeurons(),
682  mPostIndexLayer->getNumChannels(),
683  mPostIndexLayer->getChannel(CHANNEL_EXC));
684  }
685 }
686 
688  bool isReady = true;
689  if (getChannelCode() != CHANNEL_NOUPDATE) {
690  isReady &= getPreLayer()->isExchangeFinished(0 /*delay*/);
691  }
692  return isReady;
693 }
694 
695 #ifdef PV_USE_CUDA
696 void PoolingDelivery::deliverGPU() {
697  pvAssert(
698  getChannelCode() != CHANNEL_NOUPDATE); // Only called by deliver(), which already checked.
699  pvAssert(mPostLayer->getChannel(getChannelCode()));
700 
701  if (mPreLayer->getUpdatedDeviceDatastoreFlag()) {
702  PVLayerCube activityCube = mPreLayer->getPublisher()->createCube(0 /*delay*/);
703  float *h_preDatastore = activityCube.data;
704  PVCuda::CudaBuffer *d_preDatastore = mPreLayer->getDeviceDatastore();
705  pvAssert(d_preDatastore);
706  d_preDatastore->copyToDevice(h_preDatastore);
707  // Device now has updated
708  mPreLayer->setUpdatedDeviceDatastoreFlag(false);
709  }
710 
711  mRecvKernel->run();
712 }
713 #endif // PV_USE_CUDA
714 
715 } // end namespace PV
virtual bool isAllInputReady() override
void ioParam_needPostIndexLayer(enum ParamsIOFlag ioFlag)
int getPatchSizeX() const
Definition: Weights.hpp:219
virtual void ioParam_updateGSynFromPostPerspective(enum ParamsIOFlag ioFlag)
updateGSynFromPostPerspective: Specifies if the connection should push from pre or pull from post...
PVLayerCube createCube(int delay=0)
Definition: Publisher.cpp:60
void ioParam_postIndexLayerName(enum ParamsIOFlag ioFlag)
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
bool isExchangeFinished(int delay=0)
bool getDataStructuresAllocatedFlag() const
Definition: BaseObject.hpp:102
virtual void ioParam_pvpatchAccumulateType(enum ParamsIOFlag ioFlag)
pvpatchAccumulateType: Specifies the method to accumulate synaptic input
static AccumulateType parseAccumulateTypeString(char const *typestring)
static bool completed(Status &a)
Definition: Response.hpp:49
Patch const & getPatch(int patchIndex) const
Definition: Weights.cpp:194
int getPatchSizeY() const
Definition: Weights.hpp:222
std::shared_ptr< PatchGeometry > getGeometry() const
Definition: Weights.hpp:148
int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
int getPatchSizeF() const
Definition: Weights.hpp:225
bool getInitInfoCommunicatedFlag() const
Definition: BaseObject.hpp:95