PetaVision  Alpha
HyPerLayer.cpp
1 /*
2  * HyPerLayer.cpp
3  *
4  * Created on: Jul 29, 2008
5  *
6  * The top of the hierarchy for layer classes.
7  *
8  * To make it easy to subclass from classes in the HyPerLayer hierarchy,
9  * please follow the guidelines below when adding subclasses to the HyPerLayer hierarchy:
10  *
11  * For a class named DerivedLayer that is derived from a class named BaseLayer,
12  * the .hpp file should have
13 */
14 
15 #include "HyPerLayer.hpp"
16 #include "checkpointing/CheckpointEntryPvpBuffer.hpp"
17 #include "checkpointing/CheckpointEntryRandState.hpp"
18 #include "columns/HyPerCol.hpp"
19 #include "connections/BaseConnection.hpp"
20 #include "include/default_params.h"
21 #include "include/pv_common.h"
22 #include "io/FileStream.hpp"
23 #include "io/io.hpp"
24 #include <assert.h>
25 #include <iostream>
26 #include <sstream>
27 #include <string.h>
28 
29 namespace PV {
30 
31 // This constructor is protected so that only derived classes can call it.
32 // It should be called as the normal method of object construction by
33 // derived classes. It should NOT call any virtual methods
34 HyPerLayer::HyPerLayer() { initialize_base(); }
35 
36 HyPerLayer::HyPerLayer(const char *name, HyPerCol *hc) {
37  initialize_base();
38  initialize(name, hc);
39 }
40 
41 // initialize_base should be called only by constructors. It should not
42 // call any virtual methods, because polymorphism is not available when
43 // a base class constructor is inherited from a derived class constructor.
44 // In general, initialize_base should be used only to initialize member variables
45 // to safe values.
46 int HyPerLayer::initialize_base() {
47  name = NULL;
48  probes = NULL;
49  nxScale = 1.0f;
50  nyScale = 1.0f;
51  numFeatures = 1;
52  mirrorBCflag = 0;
53  xmargin = 0;
54  ymargin = 0;
55  numProbes = 0;
56  numChannels = 2;
57  clayer = NULL;
58  GSyn = NULL;
59  marginIndices = NULL;
60  numMargin = 0;
61  writeTime = 0;
62  initialWriteTime = 0;
63  triggerFlag = false; // Default to update every timestamp
64  triggerLayer = NULL;
65  triggerLayerName = NULL;
66  triggerBehavior = NULL;
67  triggerBehaviorType = NO_TRIGGER;
68  triggerResetLayerName = NULL;
69  triggerOffset = 0;
70  initializeFromCheckpointFlag = false;
71 
72  mLastUpdateTime = 0.0;
73  mLastTriggerTime = 0.0;
74 
75  phase = 0;
76 
77  numSynchronizedMarginWidthLayers = 0;
78  synchronizedMarginWidthLayers = NULL;
79 
80  dataType = PV_FLOAT;
81  dataTypeString = NULL;
82 
83 #ifdef PV_USE_CUDA
84  allocDeviceV = false;
85  allocDeviceGSyn = false;
86  allocDeviceActivity = false;
87  allocDeviceDatastore = false;
88  allocDeviceActiveIndices = false;
89  d_V = NULL;
90  d_GSyn = NULL;
91  d_Activity = NULL;
92  d_Datastore = NULL;
93  d_ActiveIndices = NULL;
94  d_numActive = NULL;
95  updatedDeviceActivity = true; // Start off always updating activity
96  updatedDeviceDatastore = true;
97  updatedDeviceGSyn = true;
98  mRecvGpu = false;
99  mUpdateGpu = false;
100  krUpdate = NULL;
101 #ifdef PV_USE_CUDNN
102  cudnn_GSyn = NULL;
103  cudnn_Datastore = NULL;
104 #endif // PV_USE_CUDNN
105 #endif // PV_USE_CUDA
106 
107  update_timer = NULL;
108  recvsyn_timer = NULL;
109  publish_timer = NULL;
110  timescale_timer = NULL;
111  io_timer = NULL;
112 
113 #ifdef PV_USE_CUDA
114  gpu_recvsyn_timer = NULL;
115  gpu_update_timer = NULL;
116 #endif
117 
118  thread_gSyn = NULL;
119  recvConns.clear();
120 
121  return PV_SUCCESS;
122 }
123 
129 int HyPerLayer::initialize(const char *name, HyPerCol *hc) {
130  int status = BaseLayer::initialize(name, hc);
131  if (status != PV_SUCCESS) {
132  return status;
133  }
134  readParams();
135 
136  writeTime = initialWriteTime;
137  writeActivityCalls = 0;
138  writeActivitySparseCalls = 0;
139  numDelayLevels = 1; // If a connection has positive delay so that more delay levels are needed,
140  // numDelayLevels is increased when BaseConnection::communicateInitInfo calls
141  // increaseDelayLevels
142 
143  initClayer();
144 
145  mLastUpdateTime = parent->getDeltaTime();
146  mLastTriggerTime = parent->getDeltaTime();
147  return PV_SUCCESS;
148 }
149 
150 int HyPerLayer::initClayer() {
151  clayer = (PVLayer *)calloc(1UL, sizeof(PVLayer));
152  int status = PV_SUCCESS;
153  if (clayer == NULL) {
154  Fatal().printf(
155  "HyPerLayer \"%s\" error in rank %d process: unable to allocate memory for Clayer.\n",
156  name,
157  parent->columnId());
158  }
159 
160  PVLayerLoc *loc = &clayer->loc;
161  setLayerLoc(loc, nxScale, nyScale, numFeatures, parent->getNBatch());
162  assert(loc->halo.lt == 0 && loc->halo.rt == 0 && loc->halo.dn == 0 && loc->halo.up == 0);
163 
164  int nBatch = parent->getNBatch();
165 
166  clayer->numNeurons = loc->nx * loc->ny * loc->nf;
167  clayer->numExtended = clayer->numNeurons; // initially, margin is zero; it will be updated as
168  // needed during the communicateInitInfo stage.
169  clayer->numNeuronsAllBatches = nBatch * loc->nx * loc->ny * loc->nf;
170  clayer->numExtendedAllBatches = clayer->numNeuronsAllBatches;
171 
172  double xScaled = -log2((double)nxScale);
173  double yScaled = -log2((double)nyScale);
174 
175  int xScale = (int)nearbyint(xScaled);
176  int yScale = (int)nearbyint(yScaled);
177 
178  clayer->xScale = xScale;
179  clayer->yScale = yScale;
180 
181  // Other fields of clayer will be set in allocateClayerBuffers, or during updateState
182  return status;
183 }
184 
185 HyPerLayer::~HyPerLayer() {
186  delete recvsyn_timer;
187  delete update_timer;
188  delete publish_timer;
189  delete timescale_timer;
190  delete io_timer;
191 #ifdef PV_USE_CUDA
192  delete gpu_recvsyn_timer;
193  delete gpu_update_timer;
194 #endif
195 
196  delete mOutputStateStream;
197 
198  delete mInitVObject;
199  freeClayer();
200  freeChannels();
201 
202 #ifdef PV_USE_CUDA
203  if (krUpdate) {
204  delete krUpdate;
205  }
206  if (d_V) {
207  delete d_V;
208  }
209  if (d_Activity) {
210  delete d_Activity;
211  }
212  if (d_Datastore) {
213  delete d_Datastore;
214  }
215 
216 #ifdef PV_USE_CUDNN
217  if (cudnn_Datastore) {
218  delete cudnn_Datastore;
219  }
220 #endif // PV_USE_CUDNN
221 #endif // PV_USE_CUDA
222 
223  free(marginIndices);
224  free(probes); // All probes are deleted by the HyPerCol, so probes[i] doesn't need to be deleted,
225  // only the array itself.
226 
227  free(synchronizedMarginWidthLayers);
228 
229  free(triggerLayerName);
230  free(triggerBehavior);
231  free(triggerResetLayerName);
232  free(initVTypeString);
233 
234  if (thread_gSyn) {
235  for (int i = 0; i < parent->getNumThreads(); i++) {
236  free(thread_gSyn[i]);
237  }
238  free(thread_gSyn);
239  }
240  delete publisher;
241 }
242 
243 template <typename T>
244 int HyPerLayer::freeBuffer(T **buf) {
245  free(*buf);
246  *buf = NULL;
247  return PV_SUCCESS;
248 }
249 // Declare the instantiations of allocateBuffer that occur in other .cpp files; otherwise you may
250 // get linker errors.
251 template int HyPerLayer::freeBuffer<float>(float **buf);
252 template int HyPerLayer::freeBuffer<int>(int **buf);
253 
254 int HyPerLayer::freeRestrictedBuffer(float **buf) { return freeBuffer(buf); }
255 
256 int HyPerLayer::freeExtendedBuffer(float **buf) { return freeBuffer(buf); }
257 
258 int HyPerLayer::freeClayer() {
259  pvcube_delete(clayer->activity);
260 
261  freeBuffer(&clayer->prevActivity);
262  freeBuffer(&clayer->V);
263  free(clayer);
264  clayer = NULL;
265 
266  return PV_SUCCESS;
267 }
268 
269 void HyPerLayer::freeChannels() {
270 
271 #ifdef PV_USE_CUDA
272  if (d_GSyn != NULL) {
273  delete d_GSyn;
274  d_GSyn = NULL;
275  }
276 #ifdef PV_USE_CUDNN
277  if (cudnn_GSyn != NULL) {
278  delete cudnn_GSyn;
279  }
280 #endif // PV_USE_CUDNN
281 #endif // PV_USE_CUDA
282 
283  // GSyn gets allocated in allocateDataStructures, but only if numChannels>0.
284  if (GSyn) {
285  assert(numChannels > 0);
286  free(GSyn[0]); // conductances allocated contiguously so frees all buffer storage
287  free(GSyn); // this frees the array pointers to separate conductance channels
288  GSyn = NULL;
289  numChannels = 0;
290  }
291 }
292 
293 int HyPerLayer::allocateClayerBuffers() {
294  // clayer fields numNeurons, numExtended, loc, xScale, yScale,
295  // dx, dy, xOrigin, yOrigin were set in initClayer().
296  assert(clayer);
297  allocateV();
298  allocateActivity();
299 
300  // athresher 11-4-16 TODO: Should these be called on non-spiking layers?
301  allocatePrevActivity();
302  for (int k = 0; k < getNumExtendedAllBatches(); k++) {
303  clayer->prevActivity[k] = -10 * REFRACTORY_PERIOD; // allow neuron to fire at time t==0
304  }
305  return PV_SUCCESS;
306 }
307 
308 template <typename T>
309 void HyPerLayer::allocateBuffer(T **buf, int bufsize, const char *bufname) {
310  *buf = (T *)calloc(bufsize, sizeof(T));
311  if (*buf == NULL) {
312  Fatal().printf(
313  "%s: rank %d process unable to allocate memory for %s: %s.\n",
314  getDescription_c(),
315  parent->columnId(),
316  bufname,
317  strerror(errno));
318  }
319 }
320 // Declare the instantiations of allocateBuffer that occur in other .cpp files; otherwise you may
321 // get linker errors.
322 template void HyPerLayer::allocateBuffer<float>(float **buf, int bufsize, const char *bufname);
323 template void HyPerLayer::allocateBuffer<int>(int **buf, int bufsize, const char *bufname);
324 
325 void HyPerLayer::allocateRestrictedBuffer(float **buf, char const *bufname) {
326  allocateBuffer(buf, getNumNeuronsAllBatches(), bufname);
327 }
328 
329 void HyPerLayer::allocateExtendedBuffer(float **buf, char const *bufname) {
330  allocateBuffer(buf, getNumExtendedAllBatches(), bufname);
331 }
332 
333 void HyPerLayer::allocateV() {
334  return allocateRestrictedBuffer(&clayer->V, "membrane potential V");
335 }
336 
337 void HyPerLayer::allocateActivity() {
338  clayer->activity = pvcube_new(&clayer->loc, getNumExtendedAllBatches());
339  FatalIf(
340  clayer->activity == nullptr, "%s failed to allocate activity cube.\n", getDescription_c());
341 }
342 
343 void HyPerLayer::allocatePrevActivity() {
344  allocateExtendedBuffer(&clayer->prevActivity, "time of previous activity");
345 }
346 
347 int HyPerLayer::setLayerLoc(
348  PVLayerLoc *layerLoc,
349  float nxScale,
350  float nyScale,
351  int nf,
352  int numBatches) {
353  int status = PV_SUCCESS;
354 
355  Communicator *icComm = parent->getCommunicator();
356 
357  float nxglobalfloat = nxScale * parent->getNxGlobal();
358  layerLoc->nxGlobal = (int)nearbyintf(nxglobalfloat);
359  if (std::fabs(nxglobalfloat - layerLoc->nxGlobal) > 0.0001f) {
360  if (parent->columnId() == 0) {
361  ErrorLog(errorMessage);
362  errorMessage.printf(
363  "nxScale of layer \"%s\" is incompatible with size of column.\n", getName());
364  errorMessage.printf(
365  "Column nx %d multiplied by nxScale %f must be an integer.\n",
366  (double)parent->getNxGlobal(),
367  (double)nxScale);
368  }
369  status = PV_FAILURE;
370  }
371 
372  float nyglobalfloat = nyScale * parent->getNyGlobal();
373  layerLoc->nyGlobal = (int)nearbyintf(nyglobalfloat);
374  if (std::fabs(nyglobalfloat - layerLoc->nyGlobal) > 0.0001f) {
375  if (parent->columnId() == 0) {
376  ErrorLog(errorMessage);
377  errorMessage.printf(
378  "nyScale of layer \"%s\" is incompatible with size of column.\n", getName());
379  errorMessage.printf(
380  "Column ny %d multiplied by nyScale %f must be an integer.\n",
381  (double)parent->getNyGlobal(),
382  (double)nyScale);
383  }
384  status = PV_FAILURE;
385  }
386 
387  // partition input space based on the number of processor
388  // columns and rows
389  //
390 
391  if (layerLoc->nxGlobal % icComm->numCommColumns() != 0) {
392  if (parent->columnId() == 0) {
393  ErrorLog(errorMessage);
394  errorMessage.printf(
395  "Size of HyPerLayer \"%s\" is not compatible with the mpi configuration.\n", name);
396  errorMessage.printf(
397  "The layer has %d pixels horizontally, and there are %d mpi processes in a row, but "
398  "%d does not divide %d.\n",
399  layerLoc->nxGlobal,
400  icComm->numCommColumns(),
401  icComm->numCommColumns(),
402  layerLoc->nxGlobal);
403  }
404  status = PV_FAILURE;
405  }
406  if (layerLoc->nyGlobal % icComm->numCommRows() != 0) {
407  if (parent->columnId() == 0) {
408  ErrorLog(errorMessage);
409  errorMessage.printf(
410  "Size of HyPerLayer \"%s\" is not compatible with the mpi configuration.\n", name);
411  errorMessage.printf(
412  "The layer has %d pixels vertically, and there are %d mpi processes in a column, "
413  "but %d does not divide %d.\n",
414  layerLoc->nyGlobal,
415  icComm->numCommRows(),
416  icComm->numCommRows(),
417  layerLoc->nyGlobal);
418  }
419  status = PV_FAILURE;
420  }
421  MPI_Barrier(icComm->communicator()); // If there is an error, make sure that MPI doesn't kill the
422  // run before process 0 reports the error.
423  if (status != PV_SUCCESS) {
424  if (parent->columnId() == 0) {
425  ErrorLog().printf("setLayerLoc failed for %s.\n", getDescription_c());
426  }
427  exit(EXIT_FAILURE);
428  }
429  layerLoc->nx = layerLoc->nxGlobal / icComm->numCommColumns();
430  layerLoc->ny = layerLoc->nyGlobal / icComm->numCommRows();
431  assert(layerLoc->nxGlobal == layerLoc->nx * icComm->numCommColumns());
432  assert(layerLoc->nyGlobal == layerLoc->ny * icComm->numCommRows());
433 
434  layerLoc->kx0 = layerLoc->nx * icComm->commColumn();
435  layerLoc->ky0 = layerLoc->ny * icComm->commRow();
436 
437  layerLoc->nf = nf;
438 
439  layerLoc->nbatch = numBatches;
440 
441  layerLoc->kb0 = parent->commBatch() * numBatches;
442  layerLoc->nbatchGlobal = parent->numCommBatches() * numBatches;
443 
444  // halo is set in calls to updateClayerMargin
445  layerLoc->halo.lt = 0;
446  layerLoc->halo.rt = 0;
447  layerLoc->halo.dn = 0;
448  layerLoc->halo.up = 0;
449 
450  return 0;
451 }
452 
453 void HyPerLayer::calcNumExtended() {
454  PVLayerLoc const *loc = getLayerLoc();
455  clayer->numExtended = (loc->nx + loc->halo.lt + loc->halo.rt)
456  * (loc->ny + loc->halo.dn + loc->halo.up) * loc->nf;
457  clayer->numExtendedAllBatches = clayer->numExtended * loc->nbatch;
458 }
459 
460 void HyPerLayer::allocateBuffers() {
461  // allocate memory for input buffers. For HyPerLayer, allocates GSyn
462  // virtual so that subclasses can initialize additional buffers if needed.
463  // Typically an overriding allocateBuffers should call HyPerLayer::allocateBuffers
464  // Specialized subclasses that don't use GSyn (e.g. CloneVLayer) should override
465  // allocateGSyn to do nothing.
466 
467  allocateGSyn();
468 }
469 
470 void HyPerLayer::allocateGSyn() {
471  GSyn = nullptr;
472  if (numChannels > 0) {
473  GSyn = (float **)malloc(numChannels * sizeof(float *));
474  FatalIf(GSyn == nullptr, "%s unable to allocate GSyn pointers.\n", getDescription_c());
475 
476  GSyn[0] = (float *)calloc(getNumNeuronsAllBatches() * numChannels, sizeof(float));
477  // All channels allocated at once and contiguously. resetGSynBuffers_HyPerLayer() assumes
478  // this is true, to make it easier to port to GPU.
479  FatalIf(GSyn[0] == nullptr, "%s unable to allocate GSyn buffer.\n", getDescription_c());
480 
481  for (int m = 1; m < numChannels; m++) {
482  GSyn[m] = GSyn[0] + m * getNumNeuronsAllBatches();
483  }
484  }
485 }
486 
487 void HyPerLayer::addPublisher() {
488  MPIBlock const *mpiBlock = parent->getCommunicator()->getLocalMPIBlock();
489  publisher = new Publisher(*mpiBlock, clayer->activity, getNumDelayLevels(), getSparseFlag());
490 }
491 
492 void HyPerLayer::checkpointPvpActivityFloat(
493  Checkpointer *checkpointer,
494  char const *bufferName,
495  float *pvpBuffer,
496  bool extended) {
497  bool registerSucceeded = checkpointer->registerCheckpointEntry(
498  std::make_shared<CheckpointEntryPvpBuffer<float>>(
499  getName(),
500  bufferName,
501  checkpointer->getMPIBlock(),
502  pvpBuffer,
503  getLayerLoc(),
504  extended),
505  false /*not constant*/);
506  FatalIf(
507  !registerSucceeded,
508  "%s failed to register %s for checkpointing.\n",
509  getDescription_c(),
510  bufferName);
511 }
512 
513 void HyPerLayer::checkpointRandState(
514  Checkpointer *checkpointer,
515  char const *bufferName,
516  Random *randState,
517  bool extendedFlag) {
518  bool registerSucceeded = checkpointer->registerCheckpointEntry(
519  std::make_shared<CheckpointEntryRandState>(
520  getName(),
521  bufferName,
522  checkpointer->getMPIBlock(),
523  randState->getRNG(0),
524  getLayerLoc(),
525  extendedFlag),
526  false /*not constant*/);
527  FatalIf(
528  !registerSucceeded,
529  "%s failed to register %s for checkpointing.\n",
530  getDescription_c(),
531  bufferName);
532 }
533 
534 Response::Status HyPerLayer::initializeState() {
535  initializeV();
536  initializeActivity();
537  return Response::SUCCESS;
538 }
539 
540 #ifdef PV_USE_CUDA
541 Response::Status HyPerLayer::copyInitialStateToGPU() {
542  if (mUpdateGpu) {
543  float *h_V = getV();
544  if (h_V != NULL) {
545  PVCuda::CudaBuffer *d_V = getDeviceV();
546  assert(d_V);
547  d_V->copyToDevice(h_V);
548  }
549 
550  PVCuda::CudaBuffer *d_activity = getDeviceActivity();
551  assert(d_activity);
552  float *h_activity = getCLayer()->activity->data;
553  d_activity->copyToDevice(h_activity);
554  }
555  return Response::SUCCESS;
556 }
557 
558 #endif // PV_USE_CUDA
559 
560 void HyPerLayer::initializeV() {
561  if (getV() != nullptr && mInitVObject != nullptr) {
562  mInitVObject->calcV(getV(), getLayerLoc());
563  }
564 }
565 
566 void HyPerLayer::initializeActivity() {
567  int status = setActivity();
568  FatalIf(status != PV_SUCCESS, "%s failed to initialize activity.\n", getDescription_c());
569 }
570 
571 int HyPerLayer::ioParamsFillGroup(enum ParamsIOFlag ioFlag) {
572  // Derived classes with new params behavior should override ioParamsFillGroup
573  // and the overriding method should call the base class's ioParamsFillGroup.
574  ioParam_nxScale(ioFlag);
575  ioParam_nyScale(ioFlag);
576  ioParam_nf(ioFlag);
577  ioParam_phase(ioFlag);
578  ioParam_mirrorBCflag(ioFlag);
579  ioParam_valueBC(ioFlag);
581  ioParam_InitVType(ioFlag);
582  ioParam_triggerLayerName(ioFlag);
583  ioParam_triggerFlag(ioFlag);
584  ioParam_triggerOffset(ioFlag);
585  ioParam_triggerBehavior(ioFlag);
587  ioParam_writeStep(ioFlag);
588  ioParam_initialWriteTime(ioFlag);
589  ioParam_sparseLayer(ioFlag);
591 
592  // GPU-specific parameter. If not using GPUs, this flag
593  // can be set to false or left out, but it is an error
594  // to set updateGpu to true if compiling without GPUs.
595  ioParam_updateGpu(ioFlag);
596 
597  ioParam_dataType(ioFlag);
598  return PV_SUCCESS;
599 }
600 
601 void HyPerLayer::ioParam_dataType(enum ParamsIOFlag ioFlag) {
602  this->parent->parameters()->ioParamString(
603  ioFlag, this->getName(), "dataType", &dataTypeString, NULL, false /*warnIfAbsent*/);
604  if (dataTypeString == NULL) {
605  // Default value
606  dataType = PV_FLOAT;
607  return;
608  }
609  if (!strcmp(dataTypeString, "float")) {
610  dataType = PV_FLOAT;
611  }
612  else if (!strcmp(dataTypeString, "int")) {
613  dataType = PV_INT;
614  }
615  else {
616  Fatal() << "BaseLayer \"" << name
617  << "\": dataType not recognized, can be \"float\" or \"int\"\n";
618  }
619 }
620 
621 void HyPerLayer::ioParam_updateGpu(enum ParamsIOFlag ioFlag) {
622 #ifdef PV_USE_CUDA
623  parent->parameters()->ioParamValue(
624  ioFlag, name, "updateGpu", &mUpdateGpu, mUpdateGpu, true /*warnIfAbsent*/);
625  mUsingGPUFlag = mUpdateGpu;
626 #else // PV_USE_CUDA
627  bool mUpdateGpu = false;
628  parent->parameters()->ioParamValue(
629  ioFlag, name, "updateGpu", &mUpdateGpu, mUpdateGpu, false /*warnIfAbsent*/);
630  if (parent->columnId() == 0) {
631  FatalIf(
632  mUpdateGpu,
633  "%s: updateGpu is set to true, but PetaVision was compiled without GPU acceleration.\n",
634  getDescription_c());
635  }
636 #endif // PV_USE_CUDA
637 }
638 
639 void HyPerLayer::ioParam_nxScale(enum ParamsIOFlag ioFlag) {
640  parent->parameters()->ioParamValue(ioFlag, name, "nxScale", &nxScale, nxScale);
641 }
642 
643 void HyPerLayer::ioParam_nyScale(enum ParamsIOFlag ioFlag) {
644  parent->parameters()->ioParamValue(ioFlag, name, "nyScale", &nyScale, nyScale);
645 }
646 
647 void HyPerLayer::ioParam_nf(enum ParamsIOFlag ioFlag) {
648  parent->parameters()->ioParamValue(ioFlag, name, "nf", &numFeatures, numFeatures);
649 }
650 
651 void HyPerLayer::ioParam_phase(enum ParamsIOFlag ioFlag) {
652  parent->parameters()->ioParamValue(ioFlag, name, "phase", &phase, phase);
653  if (ioFlag == PARAMS_IO_READ && phase < 0) {
654  if (parent->columnId() == 0)
655  Fatal().printf(
656  "%s: phase must be >= 0 (given value was %d).\n", getDescription_c(), phase);
657  }
658 }
659 
660 void HyPerLayer::ioParam_mirrorBCflag(enum ParamsIOFlag ioFlag) {
661  parent->parameters()->ioParamValue(ioFlag, name, "mirrorBCflag", &mirrorBCflag, mirrorBCflag);
662 }
663 
664 void HyPerLayer::ioParam_valueBC(enum ParamsIOFlag ioFlag) {
665  assert(!parent->parameters()->presentAndNotBeenRead(name, "mirrorBCflag"));
666  if (!mirrorBCflag) {
667  parent->parameters()->ioParamValue(ioFlag, name, "valueBC", &valueBC, (float)0);
668  }
669 }
670 
671 void HyPerLayer::ioParam_initializeFromCheckpointFlag(enum ParamsIOFlag ioFlag) {
672  parent->parameters()->ioParamValue(
673  ioFlag,
674  name,
675  "initializeFromCheckpointFlag",
676  &initializeFromCheckpointFlag,
677  initializeFromCheckpointFlag,
678  true /*warnIfAbsent*/);
679 }
680 
681 void HyPerLayer::ioParam_InitVType(enum ParamsIOFlag ioFlag) {
682  parent->parameters()->ioParamString(
683  ioFlag,
684  name,
685  "InitVType",
686  &initVTypeString,
687  BaseInitV::mDefaultInitV.data(),
688  true /*warnIfAbsent*/);
689  if (ioFlag == PARAMS_IO_READ) {
690  BaseObject *object = Factory::instance()->createByKeyword(initVTypeString, name, parent);
691  mInitVObject = dynamic_cast<BaseInitV *>(object);
692  if (mInitVObject == nullptr) {
693  ErrorLog().printf("%s: unable to create InitV object\n", getDescription_c());
694  abort();
695  }
696  }
697  if (mInitVObject != nullptr) {
698  mInitVObject->ioParamsFillGroup(ioFlag);
699  }
700 }
701 
702 void HyPerLayer::ioParam_triggerLayerName(enum ParamsIOFlag ioFlag) {
703  parent->parameters()->ioParamString(
704  ioFlag, name, "triggerLayerName", &triggerLayerName, NULL, false /*warnIfAbsent*/);
705  if (ioFlag == PARAMS_IO_READ) {
706  if (triggerLayerName && !strcmp(name, triggerLayerName)) {
707  if (parent->columnId() == 0) {
708  ErrorLog().printf(
709  "%s: triggerLayerName cannot be the same as the name of the layer itself.\n",
710  getDescription_c());
711  }
712  MPI_Barrier(parent->getCommunicator()->communicator());
713  exit(EXIT_FAILURE);
714  }
715  triggerFlag = (triggerLayerName != NULL && triggerLayerName[0] != '\0');
716  }
717 }
718 
719 // triggerFlag was deprecated Aug 7, 2015.
720 // Setting triggerLayerName to a nonempty string has the effect of triggerFlag=true, and
721 // setting triggerLayerName to NULL or "" has the effect of triggerFlag=false.
722 // While triggerFlag is being deprecated, it is an error for triggerFlag to be false
723 // and triggerLayerName to be a nonempty string.
724 void HyPerLayer::ioParam_triggerFlag(enum ParamsIOFlag ioFlag) {
725  pvAssert(!parent->parameters()->presentAndNotBeenRead(name, "triggerLayerName"));
726  if (ioFlag == PARAMS_IO_READ && parent->parameters()->present(name, "triggerFlag")) {
727  bool flagFromParams = false;
728  parent->parameters()->ioParamValue(
729  ioFlag, name, "triggerFlag", &flagFromParams, flagFromParams);
730  if (parent->columnId() == 0) {
731  WarnLog(triggerFlagMessage);
732  triggerFlagMessage.printf("%s: triggerFlag has been deprecated.\n", getDescription_c());
733  triggerFlagMessage.printf(
734  " If triggerLayerName is a nonempty string, triggering will be on;\n");
735  triggerFlagMessage.printf(
736  " if triggerLayerName is empty or null, triggering will be off.\n");
737  if (parent->columnId() == 0) {
738  if (flagFromParams != triggerFlag) {
739  ErrorLog(errorMessage);
740  errorMessage.printf("triggerLayerName=", name);
741  if (triggerLayerName) {
742  errorMessage.printf("\"%s\"", triggerLayerName);
743  }
744  else {
745  errorMessage.printf("NULL");
746  }
747  errorMessage.printf(
748  " implies triggerFlag=%s but triggerFlag was set in params to %s\n",
749  triggerFlag ? "true" : "false",
750  flagFromParams ? "true" : "false");
751  }
752  }
753  }
754  if (flagFromParams != triggerFlag) {
755  MPI_Barrier(parent->getCommunicator()->communicator());
756  exit(EXIT_FAILURE);
757  }
758  }
759 }
760 
761 void HyPerLayer::ioParam_triggerOffset(enum ParamsIOFlag ioFlag) {
762  assert(!parent->parameters()->presentAndNotBeenRead(name, "triggerLayerName"));
763  if (triggerFlag) {
764  parent->parameters()->ioParamValue(
765  ioFlag, name, "triggerOffset", &triggerOffset, triggerOffset);
766  if (triggerOffset < 0) {
767  if (parent->columnId() == 0) {
768  Fatal().printf(
769  "%s: TriggerOffset (%f) must be positive\n", getDescription_c(), triggerOffset);
770  }
771  }
772  }
773 }
774 void HyPerLayer::ioParam_triggerBehavior(enum ParamsIOFlag ioFlag) {
775  assert(!parent->parameters()->presentAndNotBeenRead(name, "triggerLayerName"));
776  if (triggerFlag) {
777  parent->parameters()->ioParamString(
778  ioFlag,
779  name,
780  "triggerBehavior",
781  &triggerBehavior,
782  "updateOnlyOnTrigger",
783  true /*warnIfAbsent*/);
784  if (triggerBehavior == NULL || !strcmp(triggerBehavior, "")) {
785  free(triggerBehavior);
786  triggerBehavior = strdup("updateOnlyOnTrigger");
787  triggerBehaviorType = UPDATEONLY_TRIGGER;
788  }
789  else if (!strcmp(triggerBehavior, "updateOnlyOnTrigger")) {
790  triggerBehaviorType = UPDATEONLY_TRIGGER;
791  }
792  else if (!strcmp(triggerBehavior, "resetStateOnTrigger")) {
793  triggerBehaviorType = RESETSTATE_TRIGGER;
794  }
795  else if (!strcmp(triggerBehavior, "ignore")) {
796  triggerBehaviorType = NO_TRIGGER;
797  }
798  else {
799  if (parent->columnId() == 0) {
800  ErrorLog().printf(
801  "%s: triggerBehavior=\"%s\" is unrecognized.\n",
802  getDescription_c(),
803  triggerBehavior);
804  }
805  MPI_Barrier(parent->getCommunicator()->communicator());
806  exit(EXIT_FAILURE);
807  }
808  }
809  else {
810  triggerBehaviorType = NO_TRIGGER;
811  }
812 }
813 
814 void HyPerLayer::ioParam_triggerResetLayerName(enum ParamsIOFlag ioFlag) {
815  assert(!parent->parameters()->presentAndNotBeenRead(name, "triggerLayerName"));
816  if (triggerFlag) {
817  assert(!parent->parameters()->presentAndNotBeenRead(name, "triggerBehavior"));
818  if (!strcmp(triggerBehavior, "resetStateOnTrigger")) {
819  parent->parameters()->ioParamStringRequired(
820  ioFlag, name, "triggerResetLayerName", &triggerResetLayerName);
821  }
822  }
823 }
824 
825 void HyPerLayer::ioParam_writeStep(enum ParamsIOFlag ioFlag) {
826  parent->parameters()->ioParamValue(
827  ioFlag, name, "writeStep", &writeStep, parent->getDeltaTime());
828 }
829 
830 void HyPerLayer::ioParam_initialWriteTime(enum ParamsIOFlag ioFlag) {
831  assert(!parent->parameters()->presentAndNotBeenRead(name, "writeStep"));
832  if (writeStep >= 0.0) {
833  parent->parameters()->ioParamValue(ioFlag, name, "initialWriteTime", &initialWriteTime, 0.0);
834  if (ioFlag == PARAMS_IO_READ && writeStep > 0.0 && initialWriteTime < 0.0) {
835  double storeInitialWriteTime = initialWriteTime;
836  while (initialWriteTime < 0.0) {
837  initialWriteTime += writeStep;
838  }
839  if (parent->columnId() == 0) {
840  WarnLog(warningMessage);
841  warningMessage.printf(
842  "%s: initialWriteTime %f is negative. Adjusting "
843  "initialWriteTime:\n",
844  getDescription_c(),
845  initialWriteTime);
846  warningMessage.printf(" initialWriteTime adjusted to %f\n", initialWriteTime);
847  }
848  }
849  }
850 }
851 
852 void HyPerLayer::ioParam_sparseLayer(enum ParamsIOFlag ioFlag) {
853  if (ioFlag == PARAMS_IO_READ && !parent->parameters()->present(name, "sparseLayer")
854  && parent->parameters()->present(name, "writeSparseActivity")) {
855  Fatal().printf("writeSparseActivity is obsolete. Use sparseLayer instead.\n");
856  }
857  // writeSparseActivity was deprecated Nov 4, 2014 and marked obsolete Mar 14, 2017.
858  parent->parameters()->ioParamValue(ioFlag, name, "sparseLayer", &sparseLayer, false);
859 }
860 
861 // writeSparseValues is obsolete as of Mar 14, 2017.
862 void HyPerLayer::ioParam_writeSparseValues(enum ParamsIOFlag ioFlag) {
863  if (ioFlag == PARAMS_IO_READ) {
864  assert(!parent->parameters()->presentAndNotBeenRead(name, "sparseLayer"));
865  if (sparseLayer && parent->parameters()->present(name, "writeSparseValues")) {
866  WarnLog() << "writeSparseValues parameter, defined in " << getDescription()
867  << ", is obsolete.\n";
868  bool writeSparseValues;
869  parent->parameters()->ioParamValue(
870  ioFlag, name, "writeSparseValues", &writeSparseValues, true /*default value*/);
871  if (!writeSparseValues) {
872  WarnLog() << "The sparse-values format is used for all sparse layers.\n";
873  }
874  }
875  }
876 }
877 
878 Response::Status HyPerLayer::respond(std::shared_ptr<BaseMessage const> message) {
879  Response::Status status = BaseLayer::respond(message);
880  if (status != Response::SUCCESS) {
881  return status;
882  }
883  else if (auto castMessage = std::dynamic_pointer_cast<LayerSetMaxPhaseMessage const>(message)) {
884  return respondLayerSetMaxPhase(castMessage);
885  }
886  else if (auto castMessage = std::dynamic_pointer_cast<LayerWriteParamsMessage const>(message)) {
887  return respondLayerWriteParams(castMessage);
888  }
889  else if (
890  auto castMessage =
891  std::dynamic_pointer_cast<LayerProbeWriteParamsMessage const>(message)) {
892  return respondLayerProbeWriteParams(castMessage);
893  }
894  else if (
895  auto castMessage =
896  std::dynamic_pointer_cast<LayerClearProgressFlagsMessage const>(message)) {
897  return respondLayerClearProgressFlags(castMessage);
898  }
899  else if (auto castMessage = std::dynamic_pointer_cast<LayerUpdateStateMessage const>(message)) {
900  return respondLayerUpdateState(castMessage);
901  }
902  else if (
903  auto castMessage =
904  std::dynamic_pointer_cast<LayerRecvSynapticInputMessage const>(message)) {
905  return respondLayerRecvSynapticInput(castMessage);
906  }
907 #ifdef PV_USE_CUDA
908  else if (auto castMessage = std::dynamic_pointer_cast<LayerCopyFromGpuMessage const>(message)) {
909  return respondLayerCopyFromGpu(castMessage);
910  }
911 #endif // PV_USE_CUDA
912  else if (
913  auto castMessage =
914  std::dynamic_pointer_cast<LayerAdvanceDataStoreMessage const>(message)) {
915  return respondLayerAdvanceDataStore(castMessage);
916  }
917  else if (auto castMessage = std::dynamic_pointer_cast<LayerPublishMessage const>(message)) {
918  return respondLayerPublish(castMessage);
919  }
920  else if (auto castMessage = std::dynamic_pointer_cast<LayerOutputStateMessage const>(message)) {
921  return respondLayerOutputState(castMessage);
922  }
923  else if (
924  auto castMessage = std::dynamic_pointer_cast<LayerCheckNotANumberMessage const>(message)) {
925  return respondLayerCheckNotANumber(castMessage);
926  }
927  else {
928  return status;
929  }
930 }
931 
932 Response::Status
933 HyPerLayer::respondLayerSetMaxPhase(std::shared_ptr<LayerSetMaxPhaseMessage const> message) {
934  return setMaxPhase(message->mMaxPhase);
935 }
936 
937 Response::Status
938 HyPerLayer::respondLayerWriteParams(std::shared_ptr<LayerWriteParamsMessage const> message) {
939  writeParams();
940  return Response::SUCCESS;
941 }
942 
943 Response::Status HyPerLayer::respondLayerProbeWriteParams(
944  std::shared_ptr<LayerProbeWriteParamsMessage const> message) {
945  return outputProbeParams();
946 }
947 
948 Response::Status HyPerLayer::respondLayerClearProgressFlags(
949  std::shared_ptr<LayerClearProgressFlagsMessage const> message) {
950  clearProgressFlags();
951  return Response::SUCCESS;
952 }
953 
954 Response::Status HyPerLayer::respondLayerRecvSynapticInput(
955  std::shared_ptr<LayerRecvSynapticInputMessage const> message) {
956  Response::Status status = Response::SUCCESS;
957  if (message->mPhase != getPhase()) {
958  return status;
959  }
960 #ifdef PV_USE_CUDA
961  if (message->mRecvOnGpuFlag != mRecvGpu) {
962  return status;
963  }
964 #endif // PV_USE_CUDA
965  if (mHasReceived) {
966  return status;
967  }
968  if (*(message->mSomeLayerHasActed) or !isAllInputReady()) {
969  *(message->mSomeLayerIsPending) = true;
970  return status;
971  }
972  resetGSynBuffers(message->mTime, message->mDeltaT); // deltaTimeAdapt is not used
973 
974  message->mTimer->start();
975  recvAllSynapticInput();
976  mHasReceived = true;
977  *(message->mSomeLayerHasActed) = true;
978  message->mTimer->stop();
979 
980  return status;
981 }
982 
983 Response::Status
984 HyPerLayer::respondLayerUpdateState(std::shared_ptr<LayerUpdateStateMessage const> message) {
985  Response::Status status = Response::SUCCESS;
986  if (message->mPhase != getPhase()) {
987  return status;
988  }
989 #ifdef PV_USE_CUDA
990  if (message->mRecvOnGpuFlag != mRecvGpu) {
991  return status;
992  }
993  if (message->mUpdateOnGpuFlag != mUpdateGpu) {
994  return status;
995  }
996 #endif // PV_USE_CUDA
997  if (mHasUpdated) {
998  return status;
999  }
1000  if (*(message->mSomeLayerHasActed) or !mHasReceived) {
1001  *(message->mSomeLayerIsPending) = true;
1002  return status;
1003  }
1004  status = callUpdateState(message->mTime, message->mDeltaT);
1005 
1006  mHasUpdated = true;
1007  *(message->mSomeLayerHasActed) = true;
1008  return status;
1009 }
1010 
1011 #ifdef PV_USE_CUDA
1012 Response::Status
1013 HyPerLayer::respondLayerCopyFromGpu(std::shared_ptr<LayerCopyFromGpuMessage const> message) {
1014  Response::Status status = Response::SUCCESS;
1015  if (message->mPhase != getPhase()) {
1016  return status;
1017  }
1018  message->mTimer->start();
1019  copyAllActivityFromDevice();
1020  copyAllVFromDevice();
1021  copyAllGSynFromDevice();
1022  addGpuTimers();
1023  message->mTimer->stop();
1024  return status;
1025 }
1026 #endif // PV_USE_CUDA
1027 
1028 Response::Status HyPerLayer::respondLayerAdvanceDataStore(
1029  std::shared_ptr<LayerAdvanceDataStoreMessage const> message) {
1030  if (message->mPhase < 0 || message->mPhase == getPhase()) {
1031  publisher->increaseTimeLevel();
1032  }
1033  return Response::SUCCESS;
1034 }
1035 
1036 Response::Status
1037 HyPerLayer::respondLayerPublish(std::shared_ptr<LayerPublishMessage const> message) {
1038  if (message->mPhase != getPhase()) {
1039  return Response::NO_ACTION;
1040  }
1041  publish(parent->getCommunicator(), message->mTime);
1042  return Response::SUCCESS;
1043 }
1044 
1045 Response::Status HyPerLayer::respondLayerCheckNotANumber(
1046  std::shared_ptr<LayerCheckNotANumberMessage const> message) {
1047  Response::Status status = Response::SUCCESS;
1048  if (message->mPhase != getPhase()) {
1049  return status;
1050  }
1051  auto layerData = getLayerData();
1052  int const N = getNumExtendedAllBatches();
1053  for (int n = 0; n < N; n++) {
1054  float a = layerData[n];
1055  FatalIf(
1056  a != a,
1057  "%s has not-a-number values in the activity buffer. Exiting.\n",
1058  getDescription_c());
1059  }
1060  return status;
1061 }
1062 
1063 Response::Status
1064 HyPerLayer::respondLayerOutputState(std::shared_ptr<LayerOutputStateMessage const> message) {
1065  Response::Status status = Response::SUCCESS;
1066  if (message->mPhase != getPhase()) {
1067  return status;
1068  }
1069  status = outputState(message->mTime); // also calls layer probes' outputState
1070  return status;
1071 }
1072 
1073 void HyPerLayer::clearProgressFlags() {
1074  mHasReceived = false;
1075  mHasUpdated = false;
1076 }
1077 
1078 #ifdef PV_USE_CUDA
1079 
1080 int HyPerLayer::allocateUpdateKernel() {
1081  Fatal() << "Layer \"" << name << "\" of type " << mObjectType
1082  << " does not support updating on gpus yet\n";
1083  return PV_FAILURE;
1084 }
1085 
1091  int status = 0;
1092 
1093  const size_t size = getNumNeuronsAllBatches() * sizeof(float);
1094  const size_t size_ex = getNumExtendedAllBatches() * sizeof(float);
1095 
1096  PVCuda::CudaDevice *device = parent->getDevice();
1097 
1098  // Allocate based on which flags are set
1099  if (allocDeviceV) {
1100  d_V = device->createBuffer(size, &description);
1101  }
1102 
1103  if (allocDeviceDatastore) {
1104  d_Datastore = device->createBuffer(size_ex, &description);
1105  assert(d_Datastore);
1106 #ifdef PV_USE_CUDNN
1107  cudnn_Datastore = device->createBuffer(size_ex, &description);
1108  assert(cudnn_Datastore);
1109 #endif
1110  }
1111 
1112  if (allocDeviceActiveIndices) {
1113  d_numActive = device->createBuffer(parent->getNBatch() * sizeof(long), &description);
1114  d_ActiveIndices = device->createBuffer(
1115  getNumExtendedAllBatches() * sizeof(SparseList<float>::Entry), &description);
1116  assert(d_ActiveIndices);
1117  }
1118 
1119  if (allocDeviceActivity) {
1120  d_Activity = device->createBuffer(size_ex, &description);
1121  }
1122 
1123  // d_GSyn is the entire gsyn buffer. cudnn_GSyn is only one gsyn channel
1124  if (allocDeviceGSyn) {
1125  d_GSyn = device->createBuffer(size * numChannels, &description);
1126  assert(d_GSyn);
1127 #ifdef PV_USE_CUDNN
1128  cudnn_GSyn = device->createBuffer(size, &description);
1129 #endif
1130  }
1131 
1132  return status;
1133 }
1134 
1135 #endif // PV_USE_CUDA
1136 
1137 Response::Status
1138 HyPerLayer::communicateInitInfo(std::shared_ptr<CommunicateInitInfoMessage const> message) {
1139  // HyPerLayers need to tell the parent HyPerCol how many random number
1140  // seeds they need. At the start of HyPerCol::run, the parent HyPerCol
1141  // calls each layer's communicateInitInfo() sequentially in a repeatable order
1142  // (probably the order the layers appear in the params file) to make sure
1143  // that the same runs use the same RNG seeds in the same way.
1144  //
1145  // If any other object in the column needs the layer to have a certain minimum
1146  // margin width (e.g. a HyPerConn with patch size bigger than one), it should
1147  // call the layer's requireMarginWidth() method during its communicateInitInfo
1148  // stage.
1149  //
1150  // Since all communicateInitInfo() methods are called before any allocateDataStructures()
1151  // methods, HyPerLayer knows its marginWidth before it has to allocate
1152  // anything. So the margin width does not have to be specified in params.
1153  if (triggerFlag) {
1154  triggerLayer = message->lookup<HyPerLayer>(std::string(triggerLayerName));
1155  if (triggerLayer == NULL) {
1156  if (parent->columnId() == 0) {
1157  ErrorLog().printf(
1158  "%s: triggerLayerName \"%s\" is not a layer in the HyPerCol.\n",
1159  getDescription_c(),
1160  triggerLayerName);
1161  }
1162  MPI_Barrier(parent->getCommunicator()->communicator());
1163  exit(EXIT_FAILURE);
1164  }
1165  if (triggerBehaviorType == RESETSTATE_TRIGGER) {
1166  char const *resetLayerName = NULL; // Will point to name of actual resetLayer, whether
1167  // triggerResetLayerName is blank (in which case
1168  // resetLayerName==triggerLayerName) or not
1169  if (triggerResetLayerName == NULL || triggerResetLayerName[0] == '\0') {
1170  resetLayerName = triggerLayerName;
1171  triggerResetLayer = triggerLayer;
1172  }
1173  else {
1174  resetLayerName = triggerResetLayerName;
1175  triggerResetLayer = message->lookup<HyPerLayer>(std::string(triggerResetLayerName));
1176  if (triggerResetLayer == NULL) {
1177  if (parent->columnId() == 0) {
1178  ErrorLog().printf(
1179  "%s: triggerResetLayerName \"%s\" is not a layer in the HyPerCol.\n",
1180  getDescription_c(),
1181  triggerResetLayerName);
1182  }
1183  MPI_Barrier(parent->getCommunicator()->communicator());
1184  exit(EXIT_FAILURE);
1185  }
1186  }
1187  // Check that triggerResetLayer and this layer have the same (restricted) dimensions.
1188  // Do we need to postpone until triggerResetLayer has finished its communicateInitInfo?
1189  PVLayerLoc const *triggerLoc = triggerResetLayer->getLayerLoc();
1190  PVLayerLoc const *localLoc = this->getLayerLoc();
1191  if (triggerLoc->nxGlobal != localLoc->nxGlobal
1192  || triggerLoc->nyGlobal != localLoc->nyGlobal
1193  || triggerLoc->nf != localLoc->nf) {
1194  if (parent->columnId() == 0) {
1195  Fatal(errorMessage);
1196  errorMessage.printf(
1197  "%s: triggerResetLayer \"%s\" has incompatible dimensions.\n",
1198  getDescription_c(),
1199  resetLayerName);
1200  errorMessage.printf(
1201  " \"%s\" is %d-by-%d-by-%d and \"%s\" is %d-by-%d-by-%d.\n",
1202  name,
1203  localLoc->nxGlobal,
1204  localLoc->nyGlobal,
1205  localLoc->nf,
1206  resetLayerName,
1207  triggerLoc->nxGlobal,
1208  triggerLoc->nyGlobal,
1209  triggerLoc->nf);
1210  }
1211  }
1212  }
1213  }
1214 
1215 #ifdef PV_USE_CUDA
1216  // Here, the connection tells all participating recev layers to allocate memory on gpu
1217  // if receive from gpu is set. These buffers should be set in allocate
1218  if (mUpdateGpu) {
1219  this->setAllocDeviceGSyn();
1220  this->setAllocDeviceV();
1221  this->setAllocDeviceActivity();
1222  }
1223 #endif
1224 
1225  return Response::SUCCESS;
1226 }
1227 
1228 Response::Status HyPerLayer::setMaxPhase(int *maxPhase) {
1229  if (*maxPhase < phase) {
1230  *maxPhase = phase;
1231  }
1232  return Response::SUCCESS;
1233 }
1234 
1236  FatalIf(
1237  conn->getPost() != this,
1238  "%s called addRecvConn for %s, but \"%s\" is not the post-synaptic layer for \"%s\"\n.",
1239  conn->getDescription_c(),
1240  getDescription_c(),
1241  getName(),
1242  conn->getName());
1243 #ifdef PV_USE_CUDA
1244  // CPU connections must run first to avoid race conditions
1245  if (!conn->getReceiveGpu()) {
1246  recvConns.insert(recvConns.begin(), conn);
1247  }
1248  // Otherwise, add to the back. If no gpus at all, just add to back
1249  else
1250 #endif
1251  {
1252  recvConns.push_back(conn);
1253 #ifdef PV_USE_CUDA
1254  // If it is receiving from gpu, set layer flag as such
1255  mRecvGpu = true;
1256 #endif
1257  }
1258 }
1259 
1260 int HyPerLayer::openOutputStateFile(Checkpointer *checkpointer) {
1261  pvAssert(writeStep >= 0);
1262 
1263  if (checkpointer->getMPIBlock()->getRank() == 0) {
1264  std::string outputStatePath(getName());
1265  outputStatePath.append(".pvp");
1266 
1267  std::string checkpointLabel(getName());
1268  checkpointLabel.append("_filepos");
1269 
1270  bool createFlag = checkpointer->getCheckpointReadDirectory().empty();
1271  mOutputStateStream = new CheckpointableFileStream(
1272  outputStatePath.c_str(), createFlag, checkpointer, checkpointLabel);
1273  }
1274  return PV_SUCCESS;
1275 }
1276 
1277 void HyPerLayer::synchronizeMarginWidth(HyPerLayer *layer) {
1278  if (layer == this) {
1279  return;
1280  }
1281  assert(layer->getLayerLoc() != NULL && this->getLayerLoc() != NULL);
1282  HyPerLayer **newSynchronizedMarginWidthLayers =
1283  (HyPerLayer **)calloc(numSynchronizedMarginWidthLayers + 1, sizeof(HyPerLayer *));
1284  assert(newSynchronizedMarginWidthLayers);
1285  if (numSynchronizedMarginWidthLayers > 0) {
1286  for (int k = 0; k < numSynchronizedMarginWidthLayers; k++) {
1287  newSynchronizedMarginWidthLayers[k] = synchronizedMarginWidthLayers[k];
1288  }
1289  free(synchronizedMarginWidthLayers);
1290  }
1291  else {
1292  assert(synchronizedMarginWidthLayers == NULL);
1293  }
1294  synchronizedMarginWidthLayers = newSynchronizedMarginWidthLayers;
1295  synchronizedMarginWidthLayers[numSynchronizedMarginWidthLayers] = layer;
1296  numSynchronizedMarginWidthLayers++;
1297 
1298  equalizeMargins(this, layer);
1299 
1300  return;
1301 }
1302 
1303 int HyPerLayer::equalizeMargins(HyPerLayer *layer1, HyPerLayer *layer2) {
1304  int border1, border2, maxborder, result;
1305  int status = PV_SUCCESS;
1306 
1307  border1 = layer1->getLayerLoc()->halo.lt;
1308  border2 = layer2->getLayerLoc()->halo.lt;
1309  maxborder = border1 > border2 ? border1 : border2;
1310  layer1->requireMarginWidth(maxborder, &result, 'x');
1311  if (result != maxborder) {
1312  status = PV_FAILURE;
1313  }
1314  layer2->requireMarginWidth(maxborder, &result, 'x');
1315  if (result != maxborder) {
1316  status = PV_FAILURE;
1317  }
1318  if (status != PV_SUCCESS) {
1319  Fatal().printf(
1320  "Error in rank %d process: unable to synchronize x-margin widths of layers \"%s\" and "
1321  "\"%s\" to %d\n",
1322  layer1->parent->columnId(),
1323  layer1->getName(),
1324  layer2->getName(),
1325  maxborder);
1326  ;
1327  }
1328  assert(
1329  layer1->getLayerLoc()->halo.lt == layer2->getLayerLoc()->halo.lt
1330  && layer1->getLayerLoc()->halo.rt == layer2->getLayerLoc()->halo.rt
1331  && layer1->getLayerLoc()->halo.lt == layer1->getLayerLoc()->halo.rt
1332  && layer1->getLayerLoc()->halo.lt == maxborder);
1333 
1334  border1 = layer1->getLayerLoc()->halo.dn;
1335  border2 = layer2->getLayerLoc()->halo.dn;
1336  maxborder = border1 > border2 ? border1 : border2;
1337  layer1->requireMarginWidth(maxborder, &result, 'y');
1338  if (result != maxborder) {
1339  status = PV_FAILURE;
1340  }
1341  layer2->requireMarginWidth(maxborder, &result, 'y');
1342  if (result != maxborder) {
1343  status = PV_FAILURE;
1344  }
1345  if (status != PV_SUCCESS) {
1346  Fatal().printf(
1347  "Error in rank %d process: unable to synchronize y-margin widths of layers \"%s\" and "
1348  "\"%s\" to %d\n",
1349  layer1->parent->columnId(),
1350  layer1->getName(),
1351  layer2->getName(),
1352  maxborder);
1353  ;
1354  }
1355  assert(
1356  layer1->getLayerLoc()->halo.dn == layer2->getLayerLoc()->halo.dn
1357  && layer1->getLayerLoc()->halo.up == layer2->getLayerLoc()->halo.up
1358  && layer1->getLayerLoc()->halo.dn == layer1->getLayerLoc()->halo.up
1359  && layer1->getLayerLoc()->halo.dn == maxborder);
1360  return status;
1361 }
1362 
1363 Response::Status HyPerLayer::allocateDataStructures() {
1364  // Once initialize and communicateInitInfo have been called, HyPerLayer has the
1365  // information it needs to allocate the membrane potential buffer V, the
1366  // activity buffer activity->data, and the data store.
1367  auto status = Response::SUCCESS;
1368 
1369  // Doing this check here, since trigger layers are being set up in communicateInitInfo
1370  // If the magnitude of the trigger offset is bigger than the delta update time, then error
1371  if (triggerFlag) {
1372  double deltaUpdateTime = getDeltaUpdateTime();
1373  if (deltaUpdateTime != -1 && triggerOffset >= deltaUpdateTime) {
1374  Fatal().printf(
1375  "%s error in rank %d process: TriggerOffset (%f) must be lower than the change in "
1376  "update time (%f) \n",
1377  getDescription_c(),
1378  parent->columnId(),
1379  triggerOffset,
1380  deltaUpdateTime);
1381  }
1382  }
1383 
1384  allocateClayerBuffers();
1385 
1386  const PVLayerLoc *loc = getLayerLoc();
1387  int nx = loc->nx;
1388  int ny = loc->ny;
1389  int nf = loc->nf;
1390  PVHalo const *halo = &loc->halo;
1391 
1392  // If not mirroring, fill the boundaries with the value in the valueBC param
1393  if (!useMirrorBCs() && getValueBC() != 0.0f) {
1394  int idx = 0;
1395  for (int batch = 0; batch < loc->nbatch; batch++) {
1396  for (int b = 0; b < halo->up; b++) {
1397  for (int k = 0; k < (nx + halo->lt + halo->rt) * nf; k++) {
1398  clayer->activity->data[idx] = getValueBC();
1399  idx++;
1400  }
1401  }
1402  for (int y = 0; y < ny; y++) {
1403  for (int k = 0; k < halo->lt * nf; k++) {
1404  clayer->activity->data[idx] = getValueBC();
1405  idx++;
1406  }
1407  idx += nx * nf;
1408  for (int k = 0; k < halo->rt * nf; k++) {
1409  clayer->activity->data[idx] = getValueBC();
1410  idx++;
1411  }
1412  }
1413  for (int b = 0; b < halo->dn; b++) {
1414  for (int k = 0; k < (nx + halo->lt + halo->rt) * nf; k++) {
1415  clayer->activity->data[idx] = getValueBC();
1416  idx++;
1417  }
1418  }
1419  }
1420  assert(idx == getNumExtendedAllBatches());
1421  }
1422 
1423  // allocate storage for the input conductance arrays
1424  allocateBuffers();
1425 
1426  // Allocate temp buffers if needed, 1 for each thread
1427  if (parent->getNumThreads() > 1) {
1428  thread_gSyn = (float **)malloc(sizeof(float *) * parent->getNumThreads());
1429  assert(thread_gSyn);
1430 
1431  // Assign thread_gSyn to different points of tempMem
1432  for (int i = 0; i < parent->getNumThreads(); i++) {
1433  float *tempMem = (float *)malloc(sizeof(float) * getNumNeuronsAllBatches());
1434  if (!tempMem) {
1435  Fatal().printf(
1436  "HyPerLayer \"%s\" error: rank %d unable to allocate %zu memory for thread_gSyn: "
1437  "%s\n",
1438  name,
1439  parent->columnId(),
1440  sizeof(float) * getNumNeuronsAllBatches(),
1441  strerror(errno));
1442  }
1443  thread_gSyn[i] = tempMem;
1444  }
1445  }
1446 
1447 // Allocate cuda stuff on gpu if set
1448 #ifdef PV_USE_CUDA
1449  int deviceStatus = allocateDeviceBuffers();
1450  // Allocate receive from post kernel
1451  if (deviceStatus == 0) {
1452  status = Response::SUCCESS;
1453  }
1454  else {
1455  Fatal().printf(
1456  "%s unable to allocate device memory in rank %d process: %s\n",
1457  getDescription_c(),
1458  parent->columnId(),
1459  strerror(errno));
1460  }
1461  if (mUpdateGpu) {
1462  // This function needs to be overwritten as needed on a subclass basis
1463  deviceStatus = allocateUpdateKernel();
1464  if (deviceStatus == 0) {
1465  status = Response::SUCCESS;
1466  }
1467  }
1468 #endif
1469 
1470  addPublisher();
1471 
1472  return status;
1473 }
1474 
1475 /*
1476  * Call this routine to increase the number of levels in the data store ring buffer.
1477  * Calls to this routine after the data store has been initialized will have no effect.
1478  * The routine returns the new value of numDelayLevels
1479  */
1480 int HyPerLayer::increaseDelayLevels(int neededDelay) {
1481  if (numDelayLevels < neededDelay + 1)
1482  numDelayLevels = neededDelay + 1;
1483  if (numDelayLevels > MAX_F_DELAY)
1484  numDelayLevels = MAX_F_DELAY;
1485  return numDelayLevels;
1486 }
1487 
1488 int HyPerLayer::requireMarginWidth(int marginWidthNeeded, int *marginWidthResult, char axis) {
1489  // TODO: Is there a good way to handle x- and y-axis margins without so much duplication of code?
1490  // Navigating through the halo makes it difficult to combine cases.
1491  PVLayerLoc *loc = &clayer->loc;
1492  PVHalo *halo = &loc->halo;
1493  switch (axis) {
1494  case 'x':
1495  *marginWidthResult = xmargin;
1496  if (xmargin < marginWidthNeeded) {
1497  assert(clayer);
1498  if (parent->columnId() == 0) {
1499  InfoLog().printf(
1500  "%s: adjusting x-margin width from %d to %d\n",
1501  getDescription_c(),
1502  xmargin,
1503  marginWidthNeeded);
1504  }
1505  xmargin = marginWidthNeeded;
1506  halo->lt = xmargin;
1507  halo->rt = xmargin;
1508  calcNumExtended();
1509  assert(axis == 'x' && getLayerLoc()->halo.lt == getLayerLoc()->halo.rt);
1510  *marginWidthResult = xmargin;
1511  if (synchronizedMarginWidthLayers != NULL) {
1512  for (int k = 0; k < numSynchronizedMarginWidthLayers; k++) {
1513  HyPerLayer *l = synchronizedMarginWidthLayers[k];
1514  if (l->getLayerLoc()->halo.lt < marginWidthNeeded) {
1515  synchronizedMarginWidthLayers[k]->requireMarginWidth(
1516  marginWidthNeeded, marginWidthResult, axis);
1517  }
1518  assert(l->getLayerLoc()->halo.lt == getLayerLoc()->halo.lt);
1519  assert(l->getLayerLoc()->halo.rt == getLayerLoc()->halo.rt);
1520  }
1521  }
1522  }
1523  break;
1524  case 'y':
1525  *marginWidthResult = ymargin;
1526  if (ymargin < marginWidthNeeded) {
1527  assert(clayer);
1528  if (parent->columnId() == 0) {
1529  InfoLog().printf(
1530  "%s: adjusting y-margin width from %d to %d\n",
1531  getDescription_c(),
1532  ymargin,
1533  marginWidthNeeded);
1534  }
1535  ymargin = marginWidthNeeded;
1536  halo->dn = ymargin;
1537  halo->up = ymargin;
1538  calcNumExtended();
1539  assert(axis == 'y' && getLayerLoc()->halo.dn == getLayerLoc()->halo.up);
1540  *marginWidthResult = ymargin;
1541  if (synchronizedMarginWidthLayers != NULL) {
1542  for (int k = 0; k < numSynchronizedMarginWidthLayers; k++) {
1543  HyPerLayer *l = synchronizedMarginWidthLayers[k];
1544  if (l->getLayerLoc()->halo.up < marginWidthNeeded) {
1545  synchronizedMarginWidthLayers[k]->requireMarginWidth(
1546  marginWidthNeeded, marginWidthResult, axis);
1547  }
1548  assert(l->getLayerLoc()->halo.dn == getLayerLoc()->halo.dn);
1549  assert(l->getLayerLoc()->halo.up == getLayerLoc()->halo.up);
1550  }
1551  }
1552  }
1553  break;
1554  default: assert(0); break;
1555  }
1556  return PV_SUCCESS;
1557 }
1558 
1559 int HyPerLayer::requireChannel(int channelNeeded, int *numChannelsResult) {
1560  if (channelNeeded >= numChannels) {
1561  int numOldChannels = numChannels;
1562  numChannels = channelNeeded + 1;
1563  }
1564  *numChannelsResult = numChannels;
1565 
1566  return PV_SUCCESS;
1567 }
1568 
1573 const float *HyPerLayer::getLayerData(int delay) {
1574  PVLayerCube cube = publisher->createCube(delay);
1575  return cube.data;
1576 }
1577 
1578 int HyPerLayer::mirrorInteriorToBorder(PVLayerCube *cube, PVLayerCube *border) {
1579  assert(cube->numItems == border->numItems);
1580  assert(localDimensionsEqual(&cube->loc, &border->loc));
1581 
1582  mirrorToNorthWest(border, cube);
1583  mirrorToNorth(border, cube);
1584  mirrorToNorthEast(border, cube);
1585  mirrorToWest(border, cube);
1586  mirrorToEast(border, cube);
1587  mirrorToSouthWest(border, cube);
1588  mirrorToSouth(border, cube);
1589  mirrorToSouthEast(border, cube);
1590  return 0;
1591 }
1592 
1593 Response::Status HyPerLayer::registerData(Checkpointer *checkpointer) {
1594  auto status = BaseLayer::registerData(checkpointer);
1595  if (!Response::completed(status)) {
1596  return status;
1597  }
1598  checkpointPvpActivityFloat(checkpointer, "A", getActivity(), true /*extended*/);
1599  if (getV() != nullptr) {
1600  checkpointPvpActivityFloat(checkpointer, "V", getV(), false /*not extended*/);
1601  }
1602  publisher->checkpointDataStore(checkpointer, getName(), "Delays");
1603  checkpointer->registerCheckpointData(
1604  std::string(getName()),
1605  std::string("lastUpdateTime"),
1606  &mLastUpdateTime,
1607  (std::size_t)1,
1608  true /*broadcast*/,
1609  false /*not constant*/);
1610  checkpointer->registerCheckpointData(
1611  std::string(getName()),
1612  std::string("nextWrite"),
1613  &writeTime,
1614  (std::size_t)1,
1615  true /*broadcast*/,
1616  false /*not constant*/);
1617 
1618  if (writeStep >= 0.0) {
1619  openOutputStateFile(checkpointer);
1620  if (sparseLayer) {
1621  checkpointer->registerCheckpointData(
1622  std::string(getName()),
1623  std::string("numframes_sparse"),
1624  &writeActivitySparseCalls,
1625  (std::size_t)1,
1626  true /*broadcast*/,
1627  false /*not constant*/);
1628  }
1629  else {
1630  checkpointer->registerCheckpointData(
1631  std::string(getName()),
1632  std::string("numframes"),
1633  &writeActivityCalls,
1634  (std::size_t)1,
1635  true /*broadcast*/,
1636  false /*not constant*/);
1637  }
1638  }
1639 
1640  // Timers
1641 
1642  update_timer = new Timer(getName(), "layer", "update ");
1643  checkpointer->registerTimer(update_timer);
1644 
1645  recvsyn_timer = new Timer(getName(), "layer", "recvsyn");
1646  checkpointer->registerTimer(recvsyn_timer);
1647 #ifdef PV_USE_CUDA
1648  auto cudaDevice = parent->getDevice();
1649  if (cudaDevice) {
1650  gpu_update_timer = new PVCuda::CudaTimer(getName(), "layer", "gpuupdate");
1651  gpu_update_timer->setStream(cudaDevice->getStream());
1652  checkpointer->registerTimer(gpu_update_timer);
1653 
1654  gpu_recvsyn_timer = new PVCuda::CudaTimer(getName(), "layer", "gpurecvsyn");
1655  gpu_recvsyn_timer->setStream(cudaDevice->getStream());
1656  checkpointer->registerTimer(gpu_recvsyn_timer);
1657  }
1658 #endif // PV_USE_CUDA
1659 
1660  publish_timer = new Timer(getName(), "layer", "publish");
1661  checkpointer->registerTimer(publish_timer);
1662 
1663  timescale_timer = new Timer(getName(), "layer", "timescale");
1664  checkpointer->registerTimer(timescale_timer);
1665 
1666  io_timer = new Timer(getName(), "layer", "io ");
1667  checkpointer->registerTimer(io_timer);
1668 
1669  if (mInitVObject) {
1670  auto message = std::make_shared<RegisterDataMessage<Checkpointer>>(checkpointer);
1671  mInitVObject->respond(message);
1672  }
1673 
1674  return Response::SUCCESS;
1675 }
1676 
1678  if (triggerLayer != NULL && triggerBehaviorType == UPDATEONLY_TRIGGER) {
1679  return getDeltaTriggerTime();
1680  }
1681  else {
1682  return parent->getDeltaTime();
1683  }
1684 }
1685 
1687  if (triggerLayer != NULL) {
1688  return triggerLayer->getDeltaUpdateTime();
1689  }
1690  else {
1691  return -1;
1692  }
1693 }
1694 
1695 bool HyPerLayer::needUpdate(double simTime, double dt) {
1696  if (getDeltaUpdateTime() <= 0) {
1697  return false;
1698  }
1699  if (mLastUpdateTime == simTime + triggerOffset) {
1700  return true;
1701  }
1702  double timeToCheck = mLastUpdateTime;
1703  if (triggerLayer != nullptr && triggerBehaviorType == UPDATEONLY_TRIGGER) {
1704  timeToCheck = triggerLayer->getLastUpdateTime();
1705 
1706  // If our target layer updates this tick, so do we
1707  if (timeToCheck == simTime && triggerOffset == 0) {
1708  return true;
1709  }
1710  }
1711  if (simTime + triggerOffset >= timeToCheck + getDeltaUpdateTime()
1712  && simTime + triggerOffset + dt <= timeToCheck + getDeltaUpdateTime() + dt) {
1713  return true;
1714  }
1715  return false;
1716 }
1717 
1718 bool HyPerLayer::needReset(double simTime, double dt) {
1719  if (triggerLayer == nullptr) {
1720  return false;
1721  }
1722  if (triggerBehaviorType != RESETSTATE_TRIGGER) {
1723  return false;
1724  }
1725  if (getDeltaTriggerTime() <= 0) {
1726  return false;
1727  }
1728  if (simTime >= mLastTriggerTime + getDeltaTriggerTime()) {
1729  // TODO: test "simTime > mLastTriggerTime + getDeltaTriggerTime() - 0.5 * dt",
1730  // to avoid roundoff issues.
1731  return true;
1732  }
1733  return false;
1734 }
1735 
1736 Response::Status HyPerLayer::callUpdateState(double simTime, double dt) {
1737  auto status = Response::NO_ACTION;
1738  if (needUpdate(simTime, dt)) {
1739  if (needReset(simTime, dt)) {
1741  mLastTriggerTime = simTime;
1742  }
1743 
1744  update_timer->start();
1745 #ifdef PV_USE_CUDA
1746  if (mUpdateGpu) {
1747  gpu_update_timer->start();
1748  float *gSynHead = GSyn == NULL ? NULL : GSyn[0];
1749  assert(mUpdateGpu);
1750  status = updateStateGpu(simTime, dt);
1751  gpu_update_timer->stop();
1752  }
1753  else {
1754 #endif
1755  status = updateState(simTime, dt);
1756 #ifdef PV_USE_CUDA
1757  }
1758  // Activity updated, set flag to true
1759  updatedDeviceActivity = true;
1760  updatedDeviceDatastore = true;
1761 #endif
1762  update_timer->stop();
1763  mNeedToPublish = true;
1764  mLastUpdateTime = simTime;
1765  }
1766  return status;
1767 }
1768 
1770  assert(triggerResetLayer != NULL);
1771  float *V = getV();
1772  if (V == NULL) {
1773  if (parent->columnId() == 0) {
1774  ErrorLog().printf(
1775  "%s: triggerBehavior is \"resetStateOnTrigger\" but layer does not have a membrane "
1776  "potential.\n",
1777  getDescription_c());
1778  }
1779  MPI_Barrier(parent->getCommunicator()->communicator());
1780  exit(EXIT_FAILURE);
1781  }
1782  float const *resetV = triggerResetLayer->getV();
1783  if (resetV != NULL) {
1784 #ifdef PV_USE_OPENMP_THREADS
1785 #pragma omp parallel for
1786 #endif // PV_USE_OPENMP_THREADS
1787  for (int k = 0; k < getNumNeuronsAllBatches(); k++) {
1788  V[k] = resetV[k];
1789  }
1790  }
1791  else {
1792  float const *resetA = triggerResetLayer->getActivity();
1793  PVLayerLoc const *loc = triggerResetLayer->getLayerLoc();
1794  PVHalo const *halo = &loc->halo;
1795  for (int b = 0; b < parent->getNBatch(); b++) {
1796  float const *resetABatch = resetA + (b * triggerResetLayer->getNumExtended());
1797  float *VBatch = V + (b * triggerResetLayer->getNumNeurons());
1798 #ifdef PV_USE_OPENMP_THREADS
1799 #pragma omp parallel for
1800 #endif // PV_USE_OPENMP_THREADS
1801  for (int k = 0; k < getNumNeurons(); k++) {
1802  int kex = kIndexExtended(
1803  k, loc->nx, loc->ny, loc->nf, halo->lt, halo->rt, halo->dn, halo->up);
1804  VBatch[k] = resetABatch[kex];
1805  }
1806  }
1807  }
1808 
1809  setActivity();
1810 
1811 // Update V on GPU after CPU V gets set
1812 #ifdef PV_USE_CUDA
1813  if (mUpdateGpu) {
1814  getDeviceV()->copyToDevice(V);
1815  // Right now, we're setting the activity on the CPU and memsetting the GPU memory
1816  // TODO calculate this on the GPU
1817  getDeviceActivity()->copyToDevice(clayer->activity->data);
1818  // We need to updateDeviceActivity and Datastore if we're resetting V
1819  updatedDeviceActivity = true;
1820  updatedDeviceDatastore = true;
1821  }
1822 #endif
1823 }
1824 
1825 int HyPerLayer::resetGSynBuffers(double timef, double dt) {
1826  int status = PV_SUCCESS;
1827  if (GSyn == NULL)
1828  return PV_SUCCESS;
1829  resetGSynBuffers_HyPerLayer(
1830  parent->getNBatch(), this->getNumNeurons(), getNumChannels(), GSyn[0]);
1831  return status;
1832 }
1833 
1834 #ifdef PV_USE_CUDA
1835 int HyPerLayer::runUpdateKernel() {
1836 
1837 #ifdef PV_USE_CUDA
1838  assert(mUpdateGpu);
1839  if (updatedDeviceGSyn) {
1840  copyAllGSynToDevice();
1841  updatedDeviceGSyn = false;
1842  }
1843 
1844  // V and Activity are write only buffers, so we don't need to do anything with them
1845  assert(krUpdate);
1846 
1847  // Sync all buffers before running
1848  syncGpu();
1849 
1850  // Run kernel
1851  krUpdate->run();
1852 #endif
1853 
1854  return PV_SUCCESS;
1855 }
1856 
1857 Response::Status HyPerLayer::updateStateGpu(double timef, double dt) {
1858  Fatal() << "Update state for layer " << name << " is not implemented\n";
1859  return Response::NO_ACTION; // never reached; added to prevent compiler warnings.
1860 }
1861 #endif
1862 
1863 Response::Status HyPerLayer::updateState(double timef, double dt) {
1864  // just copy accumulation buffer to membrane potential
1865  // and activity buffer (nonspiking)
1866 
1867  const PVLayerLoc *loc = getLayerLoc();
1868  float *A = getCLayer()->activity->data;
1869  float *V = getV();
1870  int num_channels = getNumChannels();
1871  float *gSynHead = GSyn == NULL ? NULL : GSyn[0];
1872 
1873  int nx = loc->nx;
1874  int ny = loc->ny;
1875  int nf = loc->nf;
1876  int nbatch = loc->nbatch;
1877  int num_neurons = nx * ny * nf;
1878  if (num_channels == 1) {
1879  applyGSyn_HyPerLayer1Channel(nbatch, num_neurons, V, gSynHead);
1880  }
1881  else {
1882  applyGSyn_HyPerLayer(nbatch, num_neurons, V, gSynHead);
1883  }
1884  setActivity_HyPerLayer(
1885  nbatch,
1886  num_neurons,
1887  A,
1888  V,
1889  nx,
1890  ny,
1891  nf,
1892  loc->halo.lt,
1893  loc->halo.rt,
1894  loc->halo.dn,
1895  loc->halo.up);
1896 
1897  return Response::SUCCESS;
1898 }
1899 
1900 int HyPerLayer::setActivity() {
1901  const PVLayerLoc *loc = getLayerLoc();
1902  return setActivity_HyPerLayer(
1903  loc->nbatch,
1904  getNumNeurons(),
1905  clayer->activity->data,
1906  getV(),
1907  loc->nx,
1908  loc->ny,
1909  loc->nf,
1910  loc->halo.lt,
1911  loc->halo.rt,
1912  loc->halo.dn,
1913  loc->halo.up);
1914 }
1915 
1916 // Updates active indices for all levels (delays) here
1917 void HyPerLayer::updateAllActiveIndices() { publisher->updateAllActiveIndices(); }
1918 
1919 void HyPerLayer::updateActiveIndices() { publisher->updateActiveIndices(0); }
1920 
1921 bool HyPerLayer::isExchangeFinished(int delay) { return publisher->isExchangeFinished(delay); }
1922 
1924  bool isReady = true;
1925  for (auto &c : recvConns) {
1926  isReady &= c->isAllInputReady();
1927  }
1928  return isReady;
1929 }
1930 
1931 int HyPerLayer::recvAllSynapticInput() {
1932  int status = PV_SUCCESS;
1933  // Only recvAllSynapticInput if we need an update
1934  if (needUpdate(parent->simulationTime(), parent->getDeltaTime())) {
1935  bool switchGpu = false;
1936  // Start CPU timer here
1937  recvsyn_timer->start();
1938 
1939  for (auto &conn : recvConns) {
1940  pvAssert(conn != NULL);
1941 #ifdef PV_USE_CUDA
1942  // Check if it's done with cpu connections
1943  if (!switchGpu && conn->getReceiveGpu()) {
1944  // Copy GSyn over to GPU
1945  copyAllGSynToDevice();
1946  // Start gpu timer
1947  gpu_recvsyn_timer->start();
1948  switchGpu = true;
1949  }
1950 #endif
1951  conn->deliver();
1952  }
1953 #ifdef PV_USE_CUDA
1954  if (switchGpu) {
1955  // Stop timer
1956  gpu_recvsyn_timer->stop();
1957  }
1958 #endif
1959  recvsyn_timer->stop();
1960  }
1961  return status;
1962 }
1963 
1964 #ifdef PV_USE_CUDA
1965 double HyPerLayer::addGpuTimers() {
1966  double simTime = 0;
1967  bool updateNeeded = needUpdate(parent->simulationTime(), parent->getDeltaTime());
1968  if (mRecvGpu && updateNeeded) {
1969  simTime += gpu_recvsyn_timer->accumulateTime();
1970  }
1971  if (mUpdateGpu && updateNeeded) {
1972  simTime += gpu_update_timer->accumulateTime();
1973  }
1974  return simTime;
1975 }
1976 
1977 void HyPerLayer::syncGpu() {
1978  if (mRecvGpu || mUpdateGpu) {
1979  parent->getDevice()->syncDevice();
1980  }
1981 }
1982 
1983 void HyPerLayer::copyAllGSynToDevice() {
1984  if (mRecvGpu || mUpdateGpu) {
1985  // Copy it to device
1986  float *h_postGSyn = GSyn[0];
1987  PVCuda::CudaBuffer *d_postGSyn = this->getDeviceGSyn();
1988  assert(d_postGSyn);
1989  d_postGSyn->copyToDevice(h_postGSyn);
1990  }
1991 }
1992 
1993 void HyPerLayer::copyAllGSynFromDevice() {
1994  // Only copy if recving
1995  if (mRecvGpu) {
1996  float *h_postGSyn = GSyn[0];
1997  PVCuda::CudaBuffer *d_postGSyn = this->getDeviceGSyn();
1998  assert(d_postGSyn);
1999  d_postGSyn->copyFromDevice(h_postGSyn);
2000  }
2001 }
2002 
2003 void HyPerLayer::copyAllVFromDevice() {
2004  // Only copy if updating
2005  if (mUpdateGpu) {
2006  // Allocated as a big chunk, this should work
2007  float *h_V = getV();
2008  PVCuda::CudaBuffer *d_V = this->getDeviceV();
2009  assert(d_V);
2010  d_V->copyFromDevice(h_V);
2011  }
2012 }
2013 
2014 void HyPerLayer::copyAllActivityFromDevice() {
2015  // Only copy if updating
2016  if (mUpdateGpu) {
2017  // Allocated as a big chunk, this should work
2018  float *h_activity = getCLayer()->activity->data;
2019  PVCuda::CudaBuffer *d_activity = this->getDeviceActivity();
2020  assert(d_activity);
2021  d_activity->copyFromDevice(h_activity);
2022  }
2023 }
2024 
2025 #endif
2026 
2027 int HyPerLayer::publish(Communicator *comm, double simTime) {
2028  publish_timer->start();
2029 
2030  int status = PV_SUCCESS;
2031  if (mNeedToPublish) {
2032  if (useMirrorBCs()) {
2033  mirrorInteriorToBorder(clayer->activity, clayer->activity);
2034  }
2035  status = publisher->publish(mLastUpdateTime);
2036  mNeedToPublish = false;
2037  }
2038  else {
2039  publisher->copyForward(mLastUpdateTime);
2040  }
2041  publish_timer->stop();
2042  return status;
2043 }
2044 
2045 int HyPerLayer::waitOnPublish(Communicator *comm) {
2046  publish_timer->start();
2047 
2048  // wait for MPI border transfers to complete
2049  //
2050  int status = publisher->wait();
2051 
2052  publish_timer->stop();
2053  return status;
2054 }
2055 
2056 /******************************************************************
2057  * FileIO
2058  *****************************************************************/
2059 
2060 /* Inserts a new probe into an array of LayerProbes.
2061  *
2062  *
2063  *
2064  */
2065 int HyPerLayer::insertProbe(LayerProbe *p) {
2066  if (p->getTargetLayer() != this) {
2067  WarnLog().printf(
2068  "HyPerLayer \"%s\": insertProbe called with probe %p, whose targetLayer is not this "
2069  "layer. Probe was not inserted.\n",
2070  name,
2071  p);
2072  return numProbes;
2073  }
2074  for (int i = 0; i < numProbes; i++) {
2075  if (p == probes[i]) {
2076  WarnLog().printf(
2077  "HyPerLayer \"%s\": insertProbe called with probe %p, which has already been "
2078  "inserted as probe %d.\n",
2079  name,
2080  p,
2081  i);
2082  return numProbes;
2083  }
2084  }
2085 
2086  // malloc'ing a new buffer, copying data over, and freeing the old buffer could be replaced by
2087  // malloc
2088  LayerProbe **tmp;
2089  tmp = (LayerProbe **)malloc((numProbes + 1) * sizeof(LayerProbe *));
2090  assert(tmp != NULL);
2091 
2092  for (int i = 0; i < numProbes; i++) {
2093  tmp[i] = probes[i];
2094  }
2095  free(probes);
2096 
2097  probes = tmp;
2098  probes[numProbes] = p;
2099 
2100  return ++numProbes;
2101 }
2102 
2103 Response::Status HyPerLayer::outputProbeParams() {
2104  for (int p = 0; p < numProbes; p++) {
2105  probes[p]->writeParams();
2106  }
2107  return Response::SUCCESS;
2108 }
2109 
2110 Response::Status HyPerLayer::outputState(double timef) {
2111  io_timer->start();
2112 
2113  for (int i = 0; i < numProbes; i++) {
2114  probes[i]->outputStateWrapper(timef, parent->getDeltaTime());
2115  }
2116 
2117  if (timef >= (writeTime - (parent->getDeltaTime() / 2)) && writeStep >= 0) {
2118  int writeStatus = PV_SUCCESS;
2119  writeTime += writeStep;
2120  if (sparseLayer) {
2121  writeStatus = writeActivitySparse(timef);
2122  }
2123  else {
2124  writeStatus = writeActivity(timef);
2125  }
2126  FatalIf(
2127  writeStatus != PV_SUCCESS,
2128  "%s: outputState failed on rank %d process.\n",
2129  getDescription_c(),
2130  parent->columnId());
2131  }
2132 
2133  io_timer->stop();
2134  return Response::SUCCESS;
2135 }
2136 
2137 Response::Status HyPerLayer::readStateFromCheckpoint(Checkpointer *checkpointer) {
2138  if (initializeFromCheckpointFlag) {
2139  readActivityFromCheckpoint(checkpointer);
2140  readVFromCheckpoint(checkpointer);
2141  readDelaysFromCheckpoint(checkpointer);
2142  updateAllActiveIndices();
2143  return Response::SUCCESS;
2144  }
2145  else {
2146  return Response::NO_ACTION;
2147  }
2148 }
2149 
2150 void HyPerLayer::readActivityFromCheckpoint(Checkpointer *checkpointer) {
2151  checkpointer->readNamedCheckpointEntry(std::string(name), std::string("A"), false);
2152 }
2153 
2154 void HyPerLayer::readVFromCheckpoint(Checkpointer *checkpointer) {
2155  if (getV() != nullptr) {
2156  checkpointer->readNamedCheckpointEntry(std::string(name), std::string("V"), false);
2157  }
2158 }
2159 
2160 void HyPerLayer::readDelaysFromCheckpoint(Checkpointer *checkpointer) {
2161  checkpointer->readNamedCheckpointEntry(std::string(name), std::string("Delays"), false);
2162 }
2163 
2164 // readBufferFile and readDataStoreFromFile were removed Jan 23, 2017.
2165 // They were only used by checkpointing, which is now handled by the
2166 // CheckpointEntry class hierarchy.
2167 
2168 Response::Status HyPerLayer::processCheckpointRead() {
2169  updateAllActiveIndices();
2170  return Response::SUCCESS;
2171 }
2172 
2173 int HyPerLayer::writeActivitySparse(double timed) {
2174  PVLayerCube cube = publisher->createCube(0 /*delay*/);
2175  PVLayerLoc const *loc = getLayerLoc();
2176  pvAssert(cube.numItems == loc->nbatch * getNumExtended());
2177 
2178  int const mpiBatchDimension = getMPIBlock()->getBatchDimension();
2179  int const numFrames = mpiBatchDimension * loc->nbatch;
2180  for (int frame = 0; frame < numFrames; frame++) {
2181  int const localBatchIndex = frame % loc->nbatch;
2182  int const mpiBatchIndex = frame / loc->nbatch; // Integer division
2183  pvAssert(mpiBatchIndex * loc->nbatch + localBatchIndex == frame);
2184 
2185  SparseList<float> list;
2186  auto *activeIndicesBatch = (SparseList<float>::Entry const *)cube.activeIndices;
2187  auto *activeIndicesElement = &activeIndicesBatch[localBatchIndex * getNumExtended()];
2188  PVLayerLoc const *loc = getLayerLoc();
2189  int nxExt = loc->nx + loc->halo.lt + loc->halo.rt;
2190  int nyExt = loc->ny + loc->halo.dn + loc->halo.up;
2191  int nf = loc->nf;
2192  for (long int k = 0; k < cube.numActive[localBatchIndex]; k++) {
2193  SparseList<float>::Entry entry = activeIndicesElement[k];
2194  int index = (int)entry.index;
2195 
2196  // Location is local extended; need global restricted.
2197  // Get local restricted coordinates.
2198  int x = kxPos(index, nxExt, nyExt, nf) - loc->halo.lt;
2199  if (x < 0 or x >= loc->nx) {
2200  continue;
2201  }
2202  int y = kyPos(index, nxExt, nyExt, nf) - loc->halo.up;
2203  if (y < 0 or y >= loc->ny) {
2204  continue;
2205  }
2206  // Convert to global restricted coordinates.
2207  x += loc->kx0;
2208  y += loc->ky0;
2209  int f = featureIndex(index, nxExt, nyExt, nf);
2210 
2211  // Get global restricted index.
2212  entry.index = (uint32_t)kIndex(x, y, f, loc->nxGlobal, loc->nyGlobal, nf);
2213  list.addEntry(entry);
2214  }
2215  auto gatheredList =
2216  BufferUtils::gatherSparse(getMPIBlock(), list, mpiBatchIndex, 0 /*root process*/);
2217  if (getMPIBlock()->getRank() == 0) {
2218  long fpos = mOutputStateStream->getOutPos();
2219  if (fpos == 0L) {
2220  BufferUtils::ActivityHeader header = BufferUtils::buildSparseActivityHeader<float>(
2221  loc->nx * getMPIBlock()->getNumColumns(),
2222  loc->ny * getMPIBlock()->getNumRows(),
2223  loc->nf,
2224  0 /* numBands */); // numBands will be set by call to incrementNBands.
2225  header.timestamp = timed;
2226  BufferUtils::writeActivityHeader(*mOutputStateStream, header);
2227  }
2228  BufferUtils::writeSparseFrame(*mOutputStateStream, &gatheredList, timed);
2229  }
2230  }
2231  writeActivitySparseCalls += numFrames;
2232  updateNBands(writeActivitySparseCalls);
2233  return PV_SUCCESS;
2234 }
2235 
2236 // write non-spiking activity
2237 int HyPerLayer::writeActivity(double timed) {
2238  PVLayerCube cube = publisher->createCube(0);
2239  PVLayerLoc const *loc = getLayerLoc();
2240  pvAssert(cube.numItems == loc->nbatch * getNumExtended());
2241 
2242  PVHalo const &halo = loc->halo;
2243  int const nxExtLocal = loc->nx + halo.lt + halo.rt;
2244  int const nyExtLocal = loc->ny + halo.dn + halo.up;
2245  int const nf = loc->nf;
2246 
2247  int const mpiBatchDimension = getMPIBlock()->getBatchDimension();
2248  int const numFrames = mpiBatchDimension * loc->nbatch;
2249  for (int frame = 0; frame < numFrames; frame++) {
2250  int const localBatchIndex = frame % loc->nbatch;
2251  int const mpiBatchIndex = frame / loc->nbatch; // Integer division
2252  pvAssert(mpiBatchIndex * loc->nbatch + localBatchIndex == frame);
2253 
2254  float *data = &cube.data[localBatchIndex * getNumExtended()];
2255  Buffer<float> localBuffer(data, nxExtLocal, nyExtLocal, nf);
2256  localBuffer.crop(loc->nx, loc->ny, Buffer<float>::CENTER);
2257  Buffer<float> blockBuffer = BufferUtils::gather<float>(
2258  getMPIBlock(), localBuffer, loc->nx, loc->ny, mpiBatchIndex, 0 /*root process*/);
2259  // At this point, the rank-zero process has the entire block for the batch element,
2260  // regardless of what the mpiBatchIndex is.
2261  if (getMPIBlock()->getRank() == 0) {
2262  long fpos = mOutputStateStream->getOutPos();
2263  if (fpos == 0L) {
2264  BufferUtils::ActivityHeader header = BufferUtils::buildActivityHeader<float>(
2265  loc->nx * getMPIBlock()->getNumColumns(),
2266  loc->ny * getMPIBlock()->getNumRows(),
2267  loc->nf,
2268  0 /* numBands */); // numBands will be set by call to incrementNBands.
2269  header.timestamp = timed;
2270  BufferUtils::writeActivityHeader(*mOutputStateStream, header);
2271  }
2272  BufferUtils::writeFrame<float>(*mOutputStateStream, &blockBuffer, timed);
2273  }
2274  }
2275  writeActivityCalls += numFrames;
2276  updateNBands(writeActivityCalls);
2277  return PV_SUCCESS;
2278 }
2279 
2280 void HyPerLayer::updateNBands(int const numCalls) {
2281  // Only the root process needs to maintain INDEX_NBANDS, so only the root process modifies
2282  // numCalls
2283  // This way, writeActivityCalls does not need to be coordinated across MPI
2284  if (mOutputStateStream != nullptr) {
2285  long int fpos = mOutputStateStream->getOutPos();
2286  mOutputStateStream->setOutPos(sizeof(int) * INDEX_NBANDS, true /*fromBeginning*/);
2287  mOutputStateStream->write(&numCalls, (long)sizeof(numCalls));
2288  mOutputStateStream->setOutPos(fpos, true /*fromBeginning*/);
2289  }
2290 }
2291 
2292 bool HyPerLayer::localDimensionsEqual(PVLayerLoc const *loc1, PVLayerLoc const *loc2) {
2293  return loc1->nbatch == loc2->nbatch && loc1->nx == loc2->nx && loc1->ny == loc2->ny
2294  && loc1->nf == loc2->nf && loc1->halo.lt == loc2->halo.lt
2295  && loc1->halo.rt == loc2->halo.rt && loc1->halo.dn == loc2->halo.dn
2296  && loc1->halo.up == loc2->halo.up;
2297 }
2298 
2299 int HyPerLayer::mirrorToNorthWest(PVLayerCube *dest, PVLayerCube *src) {
2300  if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2301  return -1;
2302  }
2303  int nbatch = dest->loc.nbatch;
2304  int nf = dest->loc.nf;
2305  int leftBorder = dest->loc.halo.lt;
2306  int topBorder = dest->loc.halo.up;
2307  size_t sb = strideBExtended(&dest->loc);
2308  size_t sf = strideFExtended(&dest->loc);
2309  size_t sx = strideXExtended(&dest->loc);
2310  size_t sy = strideYExtended(&dest->loc);
2311 
2312  for (int b = 0; b < nbatch; b++) {
2313  float *srcData = src->data + b * sb;
2314  float *destData = dest->data + b * sb;
2315 
2316  float *src0 = srcData + topBorder * sy + leftBorder * sx;
2317  float *dst0 = srcData + (topBorder - 1) * sy + (leftBorder - 1) * sx;
2318 
2319  for (int ky = 0; ky < topBorder; ky++) {
2320  float *to = dst0 - ky * sy;
2321  float *from = src0 + ky * sy;
2322  for (int kx = 0; kx < leftBorder; kx++) {
2323  for (int kf = 0; kf < nf; kf++) {
2324  to[kf * sf] = from[kf * sf];
2325  }
2326  to -= nf;
2327  from += nf;
2328  }
2329  }
2330  }
2331  return 0;
2332 }
2333 
2334 int HyPerLayer::mirrorToNorth(PVLayerCube *dest, PVLayerCube *src) {
2335  if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2336  return -1;
2337  }
2338  int nx = clayer->loc.nx;
2339  int nf = clayer->loc.nf;
2340  int leftBorder = dest->loc.halo.lt;
2341  int topBorder = dest->loc.halo.up;
2342  int nbatch = dest->loc.nbatch;
2343  size_t sb = strideBExtended(&dest->loc);
2344  size_t sf = strideFExtended(&dest->loc);
2345  size_t sx = strideXExtended(&dest->loc);
2346  size_t sy = strideYExtended(&dest->loc);
2347 
2348  for (int b = 0; b < nbatch; b++) {
2349  float *srcData = src->data + b * sb;
2350  float *destData = dest->data + b * sb;
2351  float *src0 = srcData + topBorder * sy + leftBorder * sx;
2352  float *dst0 = destData + (topBorder - 1) * sy + leftBorder * sx;
2353 
2354  for (int ky = 0; ky < topBorder; ky++) {
2355  float *to = dst0 - ky * sy;
2356  float *from = src0 + ky * sy;
2357  for (int kx = 0; kx < nx; kx++) {
2358  for (int kf = 0; kf < nf; kf++) {
2359  to[kf * sf] = from[kf * sf];
2360  }
2361  to += nf;
2362  from += nf;
2363  }
2364  }
2365  }
2366  return 0;
2367 }
2368 
2369 int HyPerLayer::mirrorToNorthEast(PVLayerCube *dest, PVLayerCube *src) {
2370  if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2371  return -1;
2372  }
2373  int nx = dest->loc.nx;
2374  int nf = dest->loc.nf;
2375  int leftBorder = dest->loc.halo.lt;
2376  int rightBorder = dest->loc.halo.rt;
2377  int topBorder = dest->loc.halo.up;
2378  int nbatch = dest->loc.nbatch;
2379  size_t sb = strideBExtended(&dest->loc);
2380  size_t sf = strideFExtended(&dest->loc);
2381  size_t sx = strideXExtended(&dest->loc);
2382  size_t sy = strideYExtended(&dest->loc);
2383 
2384  for (int b = 0; b < nbatch; b++) {
2385  float *srcData = src->data + b * sb;
2386  float *destData = dest->data + b * sb;
2387  float *src0 = srcData + topBorder * sy + (nx + leftBorder - 1) * sx;
2388  float *dst0 = destData + (topBorder - 1) * sy + (nx + leftBorder) * sx;
2389 
2390  for (int ky = 0; ky < topBorder; ky++) {
2391  float *to = dst0 - ky * sy;
2392  float *from = src0 + ky * sy;
2393  for (int kx = 0; kx < rightBorder; kx++) {
2394  for (int kf = 0; kf < nf; kf++) {
2395  to[kf * sf] = from[kf * sf];
2396  }
2397  to += nf;
2398  from -= nf;
2399  }
2400  }
2401  }
2402  return 0;
2403 }
2404 
2405 int HyPerLayer::mirrorToWest(PVLayerCube *dest, PVLayerCube *src) {
2406  if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2407  return -1;
2408  }
2409  int ny = dest->loc.ny;
2410  int nf = dest->loc.nf;
2411  int leftBorder = dest->loc.halo.lt;
2412  int topBorder = dest->loc.halo.up;
2413  int nbatch = dest->loc.nbatch;
2414  size_t sb = strideBExtended(&dest->loc);
2415  size_t sf = strideFExtended(&dest->loc);
2416  size_t sx = strideXExtended(&dest->loc);
2417  size_t sy = strideYExtended(&dest->loc);
2418 
2419  for (int b = 0; b < nbatch; b++) {
2420  float *srcData = src->data + b * sb;
2421  float *destData = dest->data + b * sb;
2422  float *src0 = srcData + topBorder * sy + leftBorder * sx;
2423  float *dst0 = destData + topBorder * sy + (leftBorder - 1) * sx;
2424 
2425  for (int ky = 0; ky < ny; ky++) {
2426  float *to = dst0 + ky * sy;
2427  float *from = src0 + ky * sy;
2428  for (int kx = 0; kx < leftBorder; kx++) {
2429  for (int kf = 0; kf < nf; kf++) {
2430  to[kf * sf] = from[kf * sf];
2431  }
2432  to -= nf;
2433  from += nf;
2434  }
2435  }
2436  }
2437  return 0;
2438 }
2439 
2440 int HyPerLayer::mirrorToEast(PVLayerCube *dest, PVLayerCube *src) {
2441  if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2442  return -1;
2443  }
2444  int nx = clayer->loc.nx;
2445  int ny = clayer->loc.ny;
2446  int nf = clayer->loc.nf;
2447  int leftBorder = dest->loc.halo.lt;
2448  int rightBorder = dest->loc.halo.rt;
2449  int topBorder = dest->loc.halo.up;
2450  int nbatch = dest->loc.nbatch;
2451  size_t sb = strideBExtended(&dest->loc);
2452  size_t sf = strideFExtended(&dest->loc);
2453  size_t sx = strideXExtended(&dest->loc);
2454  size_t sy = strideYExtended(&dest->loc);
2455 
2456  for (int b = 0; b < nbatch; b++) {
2457  float *srcData = src->data + b * sb;
2458  float *destData = dest->data + b * sb;
2459  float *src0 = srcData + topBorder * sy + (nx + leftBorder - 1) * sx;
2460  float *dst0 = destData + topBorder * sy + (nx + leftBorder) * sx;
2461 
2462  for (int ky = 0; ky < ny; ky++) {
2463  float *to = dst0 + ky * sy;
2464  float *from = src0 + ky * sy;
2465  for (int kx = 0; kx < rightBorder; kx++) {
2466  for (int kf = 0; kf < nf; kf++) {
2467  to[kf * sf] = from[kf * sf];
2468  }
2469  to += nf;
2470  from -= nf;
2471  }
2472  }
2473  }
2474  return 0;
2475 }
2476 
2477 int HyPerLayer::mirrorToSouthWest(PVLayerCube *dest, PVLayerCube *src) {
2478  if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2479  return -1;
2480  }
2481  int ny = dest->loc.ny;
2482  int nf = dest->loc.nf;
2483  int leftBorder = dest->loc.halo.lt;
2484  int topBorder = dest->loc.halo.up;
2485  int bottomBorder = dest->loc.halo.dn;
2486  int nbatch = dest->loc.nbatch;
2487  size_t sb = strideBExtended(&dest->loc);
2488  size_t sf = strideFExtended(&dest->loc);
2489  size_t sx = strideXExtended(&dest->loc);
2490  size_t sy = strideYExtended(&dest->loc);
2491 
2492  for (int b = 0; b < nbatch; b++) {
2493  float *srcData = src->data + b * sb;
2494  float *destData = dest->data + b * sb;
2495  float *src0 = srcData + (ny + topBorder - 1) * sy + leftBorder * sx;
2496  float *dst0 = destData + (ny + topBorder) * sy + (leftBorder - 1) * sx;
2497 
2498  for (int ky = 0; ky < bottomBorder; ky++) {
2499  float *to = dst0 + ky * sy;
2500  float *from = src0 - ky * sy;
2501  for (int kx = 0; kx < leftBorder; kx++) {
2502  for (int kf = 0; kf < nf; kf++) {
2503  to[kf * sf] = from[kf * sf];
2504  }
2505  to -= nf;
2506  from += nf;
2507  }
2508  }
2509  }
2510  return 0;
2511 }
2512 
2513 int HyPerLayer::mirrorToSouth(PVLayerCube *dest, PVLayerCube *src) {
2514  if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2515  return -1;
2516  }
2517  int nx = dest->loc.nx;
2518  int ny = dest->loc.ny;
2519  int nf = dest->loc.nf;
2520  int leftBorder = dest->loc.halo.lt;
2521  int rightBorder = dest->loc.halo.rt;
2522  int topBorder = dest->loc.halo.up;
2523  int bottomBorder = dest->loc.halo.dn;
2524  int nbatch = dest->loc.nbatch;
2525  size_t sb = strideBExtended(&dest->loc);
2526  size_t sf = strideFExtended(&dest->loc);
2527  size_t sx = strideXExtended(&dest->loc);
2528  size_t sy = strideYExtended(&dest->loc);
2529 
2530  for (int b = 0; b < nbatch; b++) {
2531  float *srcData = src->data + b * sb;
2532  float *destData = dest->data + b * sb;
2533  float *src0 = srcData + (ny + topBorder - 1) * sy + leftBorder * sx;
2534  float *dst0 = destData + (ny + topBorder) * sy + leftBorder * sx;
2535 
2536  for (int ky = 0; ky < bottomBorder; ky++) {
2537  float *to = dst0 + ky * sy;
2538  float *from = src0 - ky * sy;
2539  for (int kx = 0; kx < nx; kx++) {
2540  for (int kf = 0; kf < nf; kf++) {
2541  to[kf * sf] = from[kf * sf];
2542  }
2543  to += nf;
2544  from += nf;
2545  }
2546  }
2547  }
2548  return 0;
2549 }
2550 
2551 int HyPerLayer::mirrorToSouthEast(PVLayerCube *dest, PVLayerCube *src) {
2552  if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2553  return -1;
2554  }
2555  int nx = dest->loc.nx;
2556  int ny = dest->loc.ny;
2557  int nf = dest->loc.nf;
2558  int leftBorder = dest->loc.halo.lt;
2559  int rightBorder = dest->loc.halo.rt;
2560  int topBorder = dest->loc.halo.up;
2561  int bottomBorder = dest->loc.halo.dn;
2562  int nbatch = dest->loc.nbatch;
2563  size_t sb = strideBExtended(&dest->loc);
2564  size_t sf = strideFExtended(&dest->loc);
2565  size_t sx = strideXExtended(&dest->loc);
2566  size_t sy = strideYExtended(&dest->loc);
2567 
2568  for (int b = 0; b < nbatch; b++) {
2569  float *srcData = src->data + b * sb;
2570  float *destData = dest->data + b * sb;
2571  float *src0 = srcData + (ny + topBorder - 1) * sy + (nx + leftBorder - 1) * sx;
2572  float *dst0 = destData + (ny + topBorder) * sy + (nx + leftBorder) * sx;
2573 
2574  for (int ky = 0; ky < bottomBorder; ky++) {
2575  float *to = dst0 + ky * sy;
2576  float *from = src0 - ky * sy;
2577  for (int kx = 0; kx < rightBorder; kx++) {
2578  for (int kf = 0; kf < nf; kf++) {
2579  to[kf * sf] = from[kf * sf];
2580  }
2581  to += nf;
2582  from -= nf;
2583  }
2584  }
2585  }
2586  return 0;
2587 }
2588 
2589 } // end of PV namespace
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
Definition: HyPerLayer.cpp:571
virtual void ioParam_triggerOffset(enum ParamsIOFlag ioFlag)
triggerOffset: If triggerLayer is set, triggers <triggerOffset> timesteps before target trigger ...
Definition: HyPerLayer.cpp:761
virtual void resetStateOnTrigger()
virtual void ioParam_triggerBehavior(enum ParamsIOFlag ioFlag)
triggerBehavior: If triggerLayerName is set, this parameter specifies how the trigger is handled...
Definition: HyPerLayer.cpp:774
virtual int allocateDeviceBuffers()
int getNumColumns() const
Definition: MPIBlock.hpp:130
int present(const char *groupName, const char *paramName)
Definition: PVParams.cpp:1254
virtual void ioParam_writeStep(enum ParamsIOFlag ioFlag)
writeStep: Specifies how often to output a pvp file for this layer
Definition: HyPerLayer.cpp:825
virtual void ioParam_phase(enum ParamsIOFlag ioFlag)
phase: Defines the ordering in which each layer is updated
Definition: HyPerLayer.cpp:651
bool isAllInputReady()
void allocateRestrictedBuffer(float **buf, const char *bufname)
Definition: HyPerLayer.cpp:325
PVLayerCube createCube(int delay=0)
Definition: Publisher.cpp:60
int getNumRows() const
Definition: MPIBlock.hpp:125
virtual void ioParam_initializeFromCheckpointFlag(enum ParamsIOFlag ioFlag)
initializeFromCheckpointFlag: If set to true, initialize using checkpoint direcgtory set in HyPerCol...
Definition: HyPerLayer.cpp:671
bool isExchangeFinished(int delay=0)
virtual void ioParam_sparseLayer(enum ParamsIOFlag ioFlag)
sparseLayer: Specifies if the layer should be considered sparese for optimization and output ...
Definition: HyPerLayer.cpp:852
virtual void ioParam_nf(enum ParamsIOFlag ioFlag)
nf: Defines how many features this layer has
Definition: HyPerLayer.cpp:647
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
Definition: BaseInitV.cpp:34
virtual void ioParam_triggerLayerName(enum ParamsIOFlag ioFlag)
triggerLayerName: Specifies the name of the layer that this layer triggers off of. If set to NULL or the empty string, the layer does not trigger but updates its state on every timestep.
Definition: HyPerLayer.cpp:702
virtual double getDeltaUpdateTime()
static bool completed(Status &a)
Definition: Response.hpp:49
int getRank() const
Definition: MPIBlock.hpp:100
int initialize(const char *name, HyPerCol *hc)
Definition: HyPerLayer.cpp:129
virtual void ioParam_valueBC(enum ParamsIOFlag ioFlag)
valueBC: If mirrorBC is set to true, Uses the specified value for the margin area ...
Definition: HyPerLayer.cpp:664
void allocateExtendedBuffer(float **buf, const char *bufname)
Definition: HyPerLayer.cpp:329
int getBatchDimension() const
Definition: MPIBlock.hpp:135
virtual bool needUpdate(double simTime, double dt)
virtual void ioParam_mirrorBCflag(enum ParamsIOFlag ioFlag)
mirrorBCflag: If set to true, the margin will mirror the data
Definition: HyPerLayer.cpp:660
virtual void ioParam_triggerResetLayerName(enum ParamsIOFlag ioFlag)
triggerResetLayerName: If triggerLayerName is set, this parameter specifies the layer to use for upda...
Definition: HyPerLayer.cpp:814
void addRecvConn(BaseConnection *conn)
virtual void ioParam_InitVType(enum ParamsIOFlag ioFlag)
initVType: Specifies how to initialize the V buffer.
Definition: HyPerLayer.cpp:681
virtual Response::Status outputStateWrapper(double timef, double dt)
Definition: BaseProbe.cpp:358
int freeExtendedBuffer(float **buf)
Definition: HyPerLayer.cpp:256
int freeRestrictedBuffer(float **buf)
Definition: HyPerLayer.cpp:254
int wait(int delay=0)
Definition: Publisher.cpp:166
virtual void ioParam_triggerFlag(enum ParamsIOFlag ioFlag)
triggerFlag: (Deprecated) Specifies if this layer is being triggered
Definition: HyPerLayer.cpp:724
virtual double getDeltaTriggerTime()
void writeParams()
Definition: BaseObject.hpp:69
int publish(double lastUpdateTime)
Definition: Publisher.cpp:77
void copyForward(double lastUpdateTime)
Definition: Publisher.cpp:101
void readParams()
Definition: BaseObject.hpp:62
const float * getLayerData(int delay=0)
virtual void ioParam_writeSparseValues(enum ParamsIOFlag ioFlag)
writeSparseValues: No longer used.
Definition: HyPerLayer.cpp:862
virtual void ioParam_updateGpu(enum ParamsIOFlag ioFlag)
updateGpu: When compiled using CUDA or OpenCL GPU acceleration, this flag tells whether this layer&#39;s ...
Definition: HyPerLayer.cpp:621
virtual void ioParam_nxScale(enum ParamsIOFlag ioFlag)
nxScale: Defines the relationship between the x column size and the layer size.
Definition: HyPerLayer.cpp:639
virtual bool needReset(double timed, double dt)
virtual void ioParam_nyScale(enum ParamsIOFlag ioFlag)
nyScale: Defines the relationship between the y column size and the layer size.
Definition: HyPerLayer.cpp:643
virtual void ioParam_initialWriteTime(enum ParamsIOFlag ioFlag)
initialWriteTime: Specifies the first timestep to start outputing pvp files
Definition: HyPerLayer.cpp:830