PetaVision  Alpha
TransposePoolingDelivery.cpp
1 /*
2  * TransposePoolingDelivery.cpp
3  *
4  * Created on: Jan 9, 2018
5  * Author: Pete Schultz
6  */
7 
8 #include "TransposePoolingDelivery.hpp"
9 #include "columns/HyPerCol.hpp"
10 #include "columns/ObjectMapComponent.hpp"
11 #include "components/OriginalConnNameParam.hpp"
12 #include "connections/PoolingConn.hpp"
13 #include "delivery/accumulate_functions.hpp"
14 #include "utils/MapLookupByType.hpp"
15 
16 namespace PV {
17 
18 TransposePoolingDelivery::TransposePoolingDelivery(char const *name, HyPerCol *hc) {
19  initialize(name, hc);
20 }
21 
22 TransposePoolingDelivery::TransposePoolingDelivery() {}
23 
24 TransposePoolingDelivery::~TransposePoolingDelivery() {}
25 
26 int TransposePoolingDelivery::initialize(char const *name, HyPerCol *hc) {
27  return BaseDelivery::initialize(name, hc);
28 }
29 
30 void TransposePoolingDelivery::setObjectType() { mObjectType = "TransposePoolingDelivery"; }
31 
32 int TransposePoolingDelivery::ioParamsFillGroup(enum ParamsIOFlag ioFlag) {
33  int status = BaseDelivery::ioParamsFillGroup(ioFlag);
35  return PV_SUCCESS;
36 }
37 
38 void TransposePoolingDelivery::ioParam_receiveGpu(enum ParamsIOFlag ioFlag) {
39  // During the communication phase, receiveGpu will be copied from the original conn
40  if (ioFlag == PARAMS_IO_READ) {
41  parent->parameters()->handleUnnecessaryParameter(name, "receiveGpu");
42  }
43 }
44 
46  // To read this param, we need to wait until the CommunicateInitInfo stage, because the behavior
47  // depends on mReceiveGpu, which isn't determined until the communicate stage, since it is
48  // copied from the original conn.
49  if (ioFlag == PARAMS_IO_WRITE) {
50  if (!mReceiveGpu) {
51  parent->parameters()->ioParamValue(
52  ioFlag,
53  name,
54  "updateGSynFromPostPerspective",
55  &mUpdateGSynFromPostPerspective,
56  mUpdateGSynFromPostPerspective);
57  }
58  }
59 }
60 
61 Response::Status TransposePoolingDelivery::communicateInitInfo(
62  std::shared_ptr<CommunicateInitInfoMessage const> message) {
63  auto status = BaseDelivery::communicateInitInfo(message);
64  if (!Response::completed(status)) {
65  return status;
66  }
67 
68  auto hierarchy = message->mHierarchy;
69 
70  auto *originalConnNameParam =
71  mapLookupByType<OriginalConnNameParam>(hierarchy, getDescription());
72  FatalIf(
73  originalConnNameParam == nullptr,
74  "%s requires an OriginalConnNameParam component.\n",
75  getDescription_c());
76  if (!originalConnNameParam->getInitInfoCommunicatedFlag()) {
77  return Response::POSTPONE;
78  }
79  const char *originalConnName = originalConnNameParam->getOriginalConnName();
80 
81  ObjectMapComponent *objectMapComponent =
82  mapLookupByType<ObjectMapComponent>(hierarchy, getDescription());
83  FatalIf(
84  objectMapComponent == nullptr, "%s requires an ObjectMapComponent.\n", getDescription_c());
85  PoolingConn *originalConn =
86  objectMapComponent->lookup<PoolingConn>(std::string(originalConnName));
87  if (originalConn == nullptr) {
88  if (parent->getCommunicator()->globalCommRank() == 0) {
89  ErrorLog().printf(
90  "%s: originalConnName \"%s\" does not correspond to a PoolingConn in the column.\n",
91  getDescription_c(),
92  originalConnName);
93  }
94  MPI_Barrier(parent->getCommunicator()->globalCommunicator());
95  exit(EXIT_FAILURE);
96  }
97  auto *originalPoolingDelivery = originalConn->getComponentByType<PoolingDelivery>();
98  pvAssert(originalPoolingDelivery);
99  mAccumulateType = originalPoolingDelivery->getAccumulateType();
100  mReceiveGpu = originalPoolingDelivery->getReceiveGpu();
101 #ifdef PV_USE_CUDA
102  mUsingGPUFlag = originalPoolingDelivery->isUsingGPU();
103 #endif // PV_USE_CUDA
104  mOriginalPostIndexLayer = originalPoolingDelivery->getPostIndexLayer();
105  mOriginalPreLayer = originalPoolingDelivery->getPreLayer();
106  mOriginalPostLayer = originalPoolingDelivery->getPostLayer();
107 
108  // If receiveGpu is false, we need to read updateGSynFromPostPerspective.
109  // If it is true, we use the CUDA routine, which always uses the post perspective.
110  if (!mReceiveGpu) {
111  parent->parameters()->ioParamValue(
112  PARAMS_IO_READ,
113  name,
114  "updateGSynFromPostPerspective",
115  &mUpdateGSynFromPostPerspective,
116  mUpdateGSynFromPostPerspective);
117  }
118  else {
119  mUpdateGSynFromPostPerspective = true;
120  parent->parameters()->handleUnnecessaryParameter(
121  name, "updateGSynFromPostPerspective", mUpdateGSynFromPostPerspective);
122  }
123 
124  mPatchSize = mapLookupByType<DependentPatchSize>(hierarchy, getDescription());
125  FatalIf(
126  mPatchSize == nullptr,
127  "%s requires a DependentPatchSize component.\n",
128  getDescription_c());
129  if (!mPatchSize->getInitInfoCommunicatedFlag()) {
130  return Response::POSTPONE;
131  }
132 
133  mWeightsPair = mapLookupByType<ImpliedWeightsPair>(hierarchy, getDescription());
134  FatalIf(
135  mWeightsPair == nullptr,
136  "%s requires an ImpliedWeightsPair component.\n",
137  getDescription_c());
138  if (!mWeightsPair->getInitInfoCommunicatedFlag()) {
139  return Response::POSTPONE;
140  }
141 
142  if (mUpdateGSynFromPostPerspective) {
143  mWeightsPair->needPost();
144  }
145  else {
146  mWeightsPair->needPre();
147  }
148 
149 #ifdef PV_USE_CUDA
150  if (mReceiveGpu) {
151  // we need pre datastore, weights, and post gsyn for the channelCode allocated on the GPU.
152  getPreLayer()->setAllocDeviceDatastore();
153  getPostLayer()->setAllocDeviceGSyn();
154  Weights *weights = mWeightsPair->getPostWeights();
155  pvAssert(weights);
156  weights->useGPU();
157 
158  // If recv from pre and pre layer is sparse, allocate activeIndices
159  if (!mUpdateGSynFromPostPerspective && getPreLayer()->getSparseFlag()) {
160  getPreLayer()->setAllocDeviceActiveIndices();
161  }
162  }
163 #endif // PV_USE_CUDA
164  return Response::SUCCESS;
165 }
166 
167 #ifdef PV_USE_CUDA
168 Response::Status
169 TransposePoolingDelivery::setCudaDevice(std::shared_ptr<SetCudaDeviceMessage const> message) {
170  if (mUsingGPUFlag) {
171  auto status = BaseDelivery::setCudaDevice(message);
172  if (status != Response::SUCCESS) {
173  return status;
174  }
175  Weights *weights = mWeightsPair->getPostWeights();
176  pvAssert(weights);
177  weights->setCudaDevice(message->mCudaDevice);
178  }
179  return Response::SUCCESS;
180 }
181 #endif // PV_USE_CUDA
182 
183 Response::Status TransposePoolingDelivery::allocateDataStructures() {
184  auto status = BaseDelivery::allocateDataStructures();
185  if (!Response::completed(status)) {
186  return status;
187  }
188 #ifdef PV_USE_CUDA
189  if (mReceiveGpu) {
190  if (!mPreLayer->getDataStructuresAllocatedFlag()) {
191  return Response::POSTPONE;
192  }
193  if (!mPostLayer->getDataStructuresAllocatedFlag()) {
194  return Response::POSTPONE;
195  }
196  if (!mOriginalPreLayer->getDataStructuresAllocatedFlag()) {
197  return Response::POSTPONE;
198  }
199  if (!mOriginalPostLayer->getDataStructuresAllocatedFlag()) {
200  return Response::POSTPONE;
201  }
202  if (!mWeightsPair->getDataStructuresAllocatedFlag()) {
203  return Response::POSTPONE;
204  }
205  initializeDeliverKernelArgs();
206  }
207 #endif // PV_USE_CUDA
208  allocateThreadGSyn();
209  return Response::SUCCESS;
210 }
211 
212 #ifdef PV_USE_CUDA
213 void TransposePoolingDelivery::initializeDeliverKernelArgs() {
214  PVCuda::CudaDevice *device = parent->getDevice();
215  PVCuda::CudaBuffer *d_preDatastore = mPreLayer->getDeviceDatastore();
216  PVCuda::CudaBuffer *d_postGSyn = mPostLayer->getDeviceGSyn();
217  PVCuda::CudaBuffer *d_originalPreDatastore = mOriginalPreLayer->getDeviceDatastore();
218  PVCuda::CudaBuffer *d_originalPostGSyn = mOriginalPostLayer->getDeviceGSyn();
219  Weights *weights = mWeightsPair->getPostWeights();
220  pvAssert(weights);
221  int const nxpPost = weights->getPatchSizeX();
222  int const nypPost = weights->getPatchSizeY();
223  cudnnPoolingMode_t poolingMode;
224  int multiplier = 1;
225  switch (mAccumulateType) {
226  case PoolingDelivery::MAXPOOLING: poolingMode = CUDNN_POOLING_MAX; break;
227  case PoolingDelivery::SUMPOOLING:
228  poolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
229  multiplier = nxpPost * nypPost;
230  break;
231  case PoolingDelivery::AVGPOOLING:
232  poolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
233  break;
234  default: pvAssert(0); break;
235  }
236  mDeliverKernel = new PVCuda::CudaTransposePoolingDeliverKernel(device);
237  mDeliverKernel->setArgs(
238  mPreLayer->getLayerLoc(),
239  mPostLayer->getLayerLoc(),
240  mOriginalPreLayer->getLayerLoc(),
241  mOriginalPostLayer->getLayerLoc(),
242  nxpPost,
243  nypPost,
244  poolingMode,
245  multiplier,
246  d_preDatastore,
247  d_postGSyn,
248  d_originalPreDatastore,
249  d_originalPostGSyn,
250  (int)mChannelCode);
251 }
252 #endif // PV_USE_CUDA
253 
254 void TransposePoolingDelivery::allocateThreadGSyn() {
255  // If multithreaded, allocate a GSyn buffer for each thread, to avoid collisions.
256  int const numThreads = parent->getNumThreads();
257  if (numThreads > 1) {
258  mThreadGSyn.resize(numThreads);
259  // mThreadGSyn is only a buffer for one batch element. We're threading over presynaptic
260  // neuron index, not batch element; so batch elements will be processed serially.
261  for (auto &th : mThreadGSyn) {
262  th.resize(mPostLayer->getNumNeurons());
263  }
264  }
265 }
266 
267 void TransposePoolingDelivery::deliver() {
268  // Check if we need to update based on connection's channel
269  if (getChannelCode() == CHANNEL_NOUPDATE) {
270  return;
271  }
272 
273  if (mReceiveGpu) {
274 #ifdef PV_USE_CUDA
275  deliverGPU();
276 #endif // PV_USE_CUDA
277  }
278  else {
279  if (mUpdateGSynFromPostPerspective) {
280  deliverPostsynapticPerspective();
281  }
282  else {
283  deliverPresynapticPerspective();
284  }
285  }
286 }
287 
288 void TransposePoolingDelivery::deliverPostsynapticPerspective() {
289  Fatal() << "Delivering from PostSynapticPerspective for TransposePoolingDelivery has not been "
290  "implemented yet.\n";
291 }
292 
293 void TransposePoolingDelivery::deliverPresynapticPerspective() {
294  PVLayerLoc const *preLoc = getPreLayer()->getLayerLoc();
295  PVLayerLoc const *postLoc = getPostLayer()->getLayerLoc();
296  Weights *preWeights = mWeightsPair->getPreWeights();
297 
298  // Slightly inefficient to define the function pointer each time deliver() is called;
299  // but the real inefficiency is calling the function pointer in a tight for-loop.
300  // TODO: Use templating instead of function pointer.
301  void (*accumulateFunctionPointer)(
302  int kPreRes, int nk, float *v, float a, float *w, void *auxPtr, int sf) = nullptr;
303  switch (mAccumulateType) {
304  case PoolingDelivery::MAXPOOLING: accumulateFunctionPointer = pvpatch_max_pooling; break;
305  case PoolingDelivery::SUMPOOLING: accumulateFunctionPointer = pvpatch_sum_pooling; break;
306  case PoolingDelivery::AVGPOOLING:
307  accumulateFunctionPointer = pvpatch_sum_pooling;
308  // Division by the number of weights happens outside the call to the accumulate function.
309  break;
310  default:
311  pvAssert(0);
312  // Only MAXPOOLING, SUMPOOLING, AVGPOOLING are allowed.
313  // UNDEFINED is the only other possible value of mAccumulateType, but the type should be
314  // defined before this function is ever called.
315  break;
316  }
317 
318  float w = 1.0f;
319  if (mAccumulateType == PoolingDelivery::AVGPOOLING) {
320  float relative_XScale = pow(2, (getPostLayer()->getXScale() - getPreLayer()->getXScale()));
321  float relative_YScale = pow(2, (getPostLayer()->getYScale() - getPreLayer()->getYScale()));
322  float nxp = (float)mPatchSize->getPatchSizeX();
323  float nyp = (float)mPatchSize->getPatchSizeY();
324  w = 1.0f / (nxp * relative_XScale * nyp * relative_YScale);
325  }
326 
327  PVLayerCube activityCube = mPreLayer->getPublisher()->createCube(0 /*delay*/);
328 
329  float *gSyn = getPostLayer()->getChannel(getChannelCode());
330  pvAssert(gSyn);
331 
332  // Grab postIdxLayer's data
333  float *postIdxData = nullptr;
334  if (mAccumulateType == PoolingDelivery::MAXPOOLING) {
335  assert(mOriginalPostIndexLayer);
336  // Make sure this layer is an integer layer
337  assert(mOriginalPostIndexLayer->getDataType() == PV_INT);
338  PVLayerCube cube = mOriginalPostIndexLayer->getPublisher()->createCube(0 /*delay*/);
339  postIdxData = cube.data;
340  }
341 
342  for (int b = 0; b < parent->getNBatch(); b++) {
343  float *activityBatch = activityCube.data
344  + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt)
345  * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn)
346  * preLoc->nf;
347  float *gSynPatchHeadBatch = gSyn + b * postLoc->nx * postLoc->ny * postLoc->nf;
348  float *postIdxDataBatch = nullptr;
349  if (mAccumulateType == PoolingDelivery::MAXPOOLING) {
350  postIdxDataBatch = postIdxData + b * mOriginalPostIndexLayer->getNumExtended();
351  }
352 
353  SparseList<float>::Entry const *activeIndicesBatch = NULL;
354  if (activityCube.isSparse) {
355  activeIndicesBatch = (SparseList<float>::Entry *)activityCube.activeIndices
356  + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt)
357  * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn)
358  * preLoc->nf;
359  }
360 
361  int numLoop = activityCube.isSparse ? activityCube.numActive[b] : mPreLayer->getNumExtended();
362 
363 #ifdef PV_USE_OPENMP_THREADS
364  // Clear all thread gsyn buffer
365  if (!mThreadGSyn.empty()) {
366  int numNeurons = getPostLayer()->getNumNeurons();
367 #ifdef PV_USE_OPENMP_THREADS
368 #pragma omp parallel for
369 #endif
370  for (int i = 0; i < parent->getNumThreads() * numNeurons; i++) {
371  int ti = i / numNeurons;
372  int ni = i % numNeurons;
373  mThreadGSyn[ti][ni] = 0;
374  }
375  }
376 #endif // PV_USE_OPENMP_THREADS
377  std::size_t const *gSynPatchStart = preWeights->getGeometry()->getGSynPatchStart().data();
378 
379 #ifdef PV_USE_OPENMP_THREADS
380 #pragma omp parallel for schedule(static)
381 #endif
382  for (int loopIndex = 0; loopIndex < numLoop; loopIndex++) {
383  float a = 0.0f;
384  int kPreExt = loopIndex;
385  if (activityCube.isSparse) {
386  a = activeIndicesBatch[loopIndex].value;
387  kPreExt = activeIndicesBatch[loopIndex].index;
388  }
389  else {
390  a = activityBatch[loopIndex];
391  }
392  if (a == 0.0f) {
393  continue;
394  }
395 
396  // If we're using mThreadGSyn, set this here
397  float *gSynPatchHead;
398 #ifdef PV_USE_OPENMP_THREADS
399  if (!mThreadGSyn.empty()) {
400  int ti = omp_get_thread_num();
401  gSynPatchHead = mThreadGSyn[ti].data();
402  }
403  else {
404  gSynPatchHead = gSynPatchHeadBatch;
405  }
406 #else // PV_USE_OPENMP_THREADS
407  gSynPatchHead = gSynPatchHeadBatch;
408 #endif // PV_USE_OPENMP_THREADS
409 
410  const int kxPreExt =
411  kxPos(kPreExt,
412  preLoc->nx + preLoc->halo.lt + preLoc->halo.rt,
413  preLoc->ny + preLoc->halo.dn + preLoc->halo.up,
414  preLoc->nf);
415  const int kyPreExt =
416  kyPos(kPreExt,
417  preLoc->nx + preLoc->halo.lt + preLoc->halo.rt,
418  preLoc->ny + preLoc->halo.dn + preLoc->halo.up,
419  preLoc->nf);
420  const int kfPre = featureIndex(
421  kPreExt,
422  preLoc->nx + preLoc->halo.lt + preLoc->halo.rt,
423  preLoc->ny + preLoc->halo.dn + preLoc->halo.up,
424  preLoc->nf);
425 
426  if (mAccumulateType == PoolingDelivery::MAXPOOLING) {
427  const int kxPreGlobalExt = kxPreExt + preLoc->kx0;
428  const int kyPreGlobalExt = kyPreExt + preLoc->ky0;
429  if (kxPreGlobalExt < preLoc->halo.lt
430  || kxPreGlobalExt >= preLoc->nxGlobal + preLoc->halo.lt
431  || kyPreGlobalExt < preLoc->halo.up
432  || kyPreGlobalExt >= preLoc->nyGlobal + preLoc->halo.up) {
433  continue;
434  }
435 
436  // Convert stored global extended index into local extended index
437  int postGlobalExtIdx = (int)postIdxDataBatch[kPreExt];
438 
439  // If all inputs are zero and input layer is sparse, postGlobalExtIdx will still be
440  // -1.
441  if (postGlobalExtIdx == -1) {
442  continue;
443  }
444 
445  // Make sure the index is in bounds
446  assert(
447  postGlobalExtIdx >= 0
448  && postGlobalExtIdx
449  < (postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt)
450  * (postLoc->nyGlobal + postLoc->halo.up + postLoc->halo.dn)
451  * postLoc->nf);
452 
453  const int kxPostGlobalExt =
454  kxPos(postGlobalExtIdx,
455  postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt,
456  postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up,
457  postLoc->nf);
458  const int kyPostGlobalExt =
459  kyPos(postGlobalExtIdx,
460  postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt,
461  postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up,
462  postLoc->nf);
463  const int kfPost = featureIndex(
464  postGlobalExtIdx,
465  postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt,
466  postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up,
467  postLoc->nf);
468 
469  const int kxPostLocalRes = kxPostGlobalExt - postLoc->kx0 - postLoc->halo.lt;
470  const int kyPostLocalRes = kyPostGlobalExt - postLoc->ky0 - postLoc->halo.up;
471  if (kxPostLocalRes < 0 || kxPostLocalRes >= postLoc->nx || kyPostLocalRes < 0
472  || kyPostLocalRes >= postLoc->ny) {
473  continue;
474  }
475 
476  const int kPostLocalRes = kIndex(
477  kxPostLocalRes, kyPostLocalRes, kfPost, postLoc->nx, postLoc->ny, postLoc->nf);
478  if (fabs(a) > fabs(gSynPatchHead[kPostLocalRes])) {
479  gSynPatchHead[kPostLocalRes] = a;
480  }
481  }
482  else {
483  Patch const *patch = &preWeights->getPatch(kPreExt);
484  const int nk = patch->nx * preWeights->getPatchSizeF();
485  const int ny = patch->ny;
486  const int sy = postLoc->nx * postLoc->nf; // stride in restricted layer
487  float *postPatchStart = &gSynPatchHead[gSynPatchStart[kPreExt]];
488 
489  int offset = kfPre;
490  int sf = preWeights->getPatchSizeF();
491 
492  float w = 1.0f;
493  if (mAccumulateType == PoolingDelivery::MAXPOOLING) {
494  w = 1.0f;
495  }
496  else if (mAccumulateType == PoolingDelivery::MAXPOOLING) {
497  // float relative_XScale = pow(2, (post->getXScale() - pre->getXScale()));
498  // float relative_YScale = pow(2, (post->getYScale() - pre->getYScale()));
499  float const nxp = (float)mPatchSize->getPatchSizeX();
500  float const nyp = (float)mPatchSize->getPatchSizeY();
501  float const normVal = nxp * nyp;
502  w = 1.0f / normVal;
503  }
504  void *auxPtr = NULL;
505  for (int y = 0; y < ny; y++) {
506  (accumulateFunctionPointer)(
507  0, nk, postPatchStart + y * sy + offset, a, &w, auxPtr, sf);
508  }
509  }
510  }
511  float relative_XScale = pow(2, (getPostLayer()->getXScale() - getPreLayer()->getXScale()));
512  float relative_YScale = pow(2, (getPostLayer()->getYScale() - getPreLayer()->getYScale()));
513  float nxp = (float)mPatchSize->getPatchSizeX();
514  float nyp = (float)mPatchSize->getPatchSizeY();
515  w = 1.0f / (nxp * relative_XScale * nyp * relative_YScale);
516 
517 #ifdef PV_USE_OPENMP_THREADS
518  // Set back into gSyn
519  if (!mThreadGSyn.empty()) {
520  float *gSynPatchHead = gSynPatchHeadBatch;
521  int numNeurons = getPostLayer()->getNumNeurons();
522 // Looping over neurons first to be thread safe
523 #pragma omp parallel for
524  for (int ni = 0; ni < numNeurons; ni++) {
525  if (mAccumulateType == PoolingDelivery::MAXPOOLING) {
526  // Grab maxumum magnitude of mThreadGSyn and set that value
527  float maxMag = -INFINITY;
528  int maxMagIdx = -1;
529  for (int ti = 0; ti < parent->getNumThreads(); ti++) {
530  if (maxMag < fabsf(mThreadGSyn[ti][ni])) {
531  maxMag = fabsf(mThreadGSyn[ti][ni]);
532  maxMagIdx = ti;
533  }
534  }
535  assert(maxMagIdx >= 0);
536  gSynPatchHead[ni] = mThreadGSyn[maxMagIdx][ni];
537  }
538  else {
539  for (int ti = 0; ti < parent->getNumThreads(); ti++) {
540  gSynPatchHead[ni] += mThreadGSyn[ti][ni];
541  }
542  }
543  }
544  }
545 #endif
546  }
547 }
548 
550  bool isReady = true;
551  if (getChannelCode() != CHANNEL_NOUPDATE) {
552  isReady &= getPreLayer()->isExchangeFinished(0 /*delay*/);
553  }
554  return isReady;
555 }
556 
557 #ifdef PV_USE_CUDA
558 void TransposePoolingDelivery::deliverGPU() {
559  pvAssert(mPostLayer->getChannel(getChannelCode()));
560 
561  if (mPreLayer->getUpdatedDeviceDatastoreFlag()) {
562  PVLayerCube activityCube = mPreLayer->getPublisher()->createCube(0 /*delay*/);
563  float *h_preDatastore = activityCube.data;
564  PVCuda::CudaBuffer *d_preDatastore = mPreLayer->getDeviceDatastore();
565  pvAssert(d_preDatastore);
566  d_preDatastore->copyToDevice(h_preDatastore);
567  // Device now has updated
568  mPreLayer->setUpdatedDeviceDatastoreFlag(false);
569  }
570 
571  mDeliverKernel->run();
572 }
573 #endif // PV_USE_CUDA
574 
575 } // end namespace PV
virtual void ioParam_updateGSynFromPostPerspective(enum ParamsIOFlag ioFlag)
updateGSynFromPostPerspective: Specifies if the connection should push from pre or pull from post...
int getPatchSizeX() const
Definition: Weights.hpp:219
PVLayerCube createCube(int delay=0)
Definition: Publisher.cpp:60
bool isExchangeFinished(int delay=0)
bool getDataStructuresAllocatedFlag() const
Definition: BaseObject.hpp:102
static bool completed(Status &a)
Definition: Response.hpp:49
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
Patch const & getPatch(int patchIndex) const
Definition: Weights.cpp:194
int getPatchSizeY() const
Definition: Weights.hpp:222
std::shared_ptr< PatchGeometry > getGeometry() const
Definition: Weights.hpp:148
virtual void ioParam_receiveGpu(enum ParamsIOFlag ioFlag) override
int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
int getPatchSizeF() const
Definition: Weights.hpp:225
bool getInitInfoCommunicatedFlag() const
Definition: BaseObject.hpp:95