15 #include "HyPerLayer.hpp" 16 #include "checkpointing/CheckpointEntryPvpBuffer.hpp" 17 #include "checkpointing/CheckpointEntryRandState.hpp" 18 #include "columns/HyPerCol.hpp" 19 #include "connections/BaseConnection.hpp" 20 #include "include/default_params.h" 21 #include "include/pv_common.h" 22 #include "io/FileStream.hpp" 34 HyPerLayer::HyPerLayer() { initialize_base(); }
36 HyPerLayer::HyPerLayer(
const char *name, HyPerCol *hc) {
46 int HyPerLayer::initialize_base() {
65 triggerLayerName = NULL;
66 triggerBehavior = NULL;
67 triggerBehaviorType = NO_TRIGGER;
68 triggerResetLayerName = NULL;
70 initializeFromCheckpointFlag =
false;
72 mLastUpdateTime = 0.0;
73 mLastTriggerTime = 0.0;
77 numSynchronizedMarginWidthLayers = 0;
78 synchronizedMarginWidthLayers = NULL;
81 dataTypeString = NULL;
85 allocDeviceGSyn =
false;
86 allocDeviceActivity =
false;
87 allocDeviceDatastore =
false;
88 allocDeviceActiveIndices =
false;
93 d_ActiveIndices = NULL;
95 updatedDeviceActivity =
true;
96 updatedDeviceDatastore =
true;
97 updatedDeviceGSyn =
true;
103 cudnn_Datastore = NULL;
104 #endif // PV_USE_CUDNN 105 #endif // PV_USE_CUDA 108 recvsyn_timer = NULL;
109 publish_timer = NULL;
110 timescale_timer = NULL;
114 gpu_recvsyn_timer = NULL;
115 gpu_update_timer = NULL;
130 int status = BaseLayer::initialize(name, hc);
131 if (status != PV_SUCCESS) {
136 writeTime = initialWriteTime;
137 writeActivityCalls = 0;
138 writeActivitySparseCalls = 0;
145 mLastUpdateTime = parent->getDeltaTime();
146 mLastTriggerTime = parent->getDeltaTime();
150 int HyPerLayer::initClayer() {
152 int status = PV_SUCCESS;
153 if (clayer == NULL) {
155 "HyPerLayer \"%s\" error in rank %d process: unable to allocate memory for Clayer.\n",
161 setLayerLoc(loc, nxScale, nyScale, numFeatures, parent->getNBatch());
162 assert(loc->halo.lt == 0 && loc->halo.rt == 0 && loc->halo.dn == 0 && loc->halo.up == 0);
164 int nBatch = parent->getNBatch();
166 clayer->numNeurons = loc->nx * loc->ny * loc->nf;
167 clayer->numExtended = clayer->numNeurons;
169 clayer->numNeuronsAllBatches = nBatch * loc->nx * loc->ny * loc->nf;
170 clayer->numExtendedAllBatches = clayer->numNeuronsAllBatches;
172 double xScaled = -log2((
double)nxScale);
173 double yScaled = -log2((
double)nyScale);
175 int xScale = (int)nearbyint(xScaled);
176 int yScale = (int)nearbyint(yScaled);
178 clayer->xScale = xScale;
179 clayer->yScale = yScale;
185 HyPerLayer::~HyPerLayer() {
186 delete recvsyn_timer;
188 delete publish_timer;
189 delete timescale_timer;
192 delete gpu_recvsyn_timer;
193 delete gpu_update_timer;
196 delete mOutputStateStream;
217 if (cudnn_Datastore) {
218 delete cudnn_Datastore;
220 #endif // PV_USE_CUDNN 221 #endif // PV_USE_CUDA 227 free(synchronizedMarginWidthLayers);
229 free(triggerLayerName);
230 free(triggerBehavior);
231 free(triggerResetLayerName);
232 free(initVTypeString);
235 for (
int i = 0; i < parent->getNumThreads(); i++) {
236 free(thread_gSyn[i]);
243 template <
typename T>
244 int HyPerLayer::freeBuffer(T **buf) {
251 template int HyPerLayer::freeBuffer<float>(
float **buf);
252 template int HyPerLayer::freeBuffer<int>(
int **buf);
258 int HyPerLayer::freeClayer() {
259 pvcube_delete(clayer->activity);
261 freeBuffer(&clayer->prevActivity);
262 freeBuffer(&clayer->V);
269 void HyPerLayer::freeChannels() {
272 if (d_GSyn != NULL) {
277 if (cudnn_GSyn != NULL) {
280 #endif // PV_USE_CUDNN 281 #endif // PV_USE_CUDA 285 assert(numChannels > 0);
293 int HyPerLayer::allocateClayerBuffers() {
301 allocatePrevActivity();
302 for (
int k = 0; k < getNumExtendedAllBatches(); k++) {
303 clayer->prevActivity[k] = -10 * REFRACTORY_PERIOD;
308 template <
typename T>
309 void HyPerLayer::allocateBuffer(T **buf,
int bufsize,
const char *bufname) {
310 *buf = (T *)calloc(bufsize,
sizeof(T));
313 "%s: rank %d process unable to allocate memory for %s: %s.\n",
322 template void HyPerLayer::allocateBuffer<float>(
float **buf,
int bufsize,
const char *bufname);
323 template void HyPerLayer::allocateBuffer<int>(
int **buf,
int bufsize,
const char *bufname);
326 allocateBuffer(buf, getNumNeuronsAllBatches(), bufname);
330 allocateBuffer(buf, getNumExtendedAllBatches(), bufname);
333 void HyPerLayer::allocateV() {
337 void HyPerLayer::allocateActivity() {
338 clayer->activity = pvcube_new(&clayer->loc, getNumExtendedAllBatches());
340 clayer->activity ==
nullptr,
"%s failed to allocate activity cube.\n", getDescription_c());
343 void HyPerLayer::allocatePrevActivity() {
347 int HyPerLayer::setLayerLoc(
353 int status = PV_SUCCESS;
357 float nxglobalfloat = nxScale * parent->getNxGlobal();
358 layerLoc->nxGlobal = (int)nearbyintf(nxglobalfloat);
359 if (std::fabs(nxglobalfloat - layerLoc->nxGlobal) > 0.0001f) {
360 if (parent->columnId() == 0) {
361 ErrorLog(errorMessage);
363 "nxScale of layer \"%s\" is incompatible with size of column.\n", getName());
365 "Column nx %d multiplied by nxScale %f must be an integer.\n",
366 (
double)parent->getNxGlobal(),
372 float nyglobalfloat = nyScale * parent->getNyGlobal();
373 layerLoc->nyGlobal = (int)nearbyintf(nyglobalfloat);
374 if (std::fabs(nyglobalfloat - layerLoc->nyGlobal) > 0.0001f) {
375 if (parent->columnId() == 0) {
376 ErrorLog(errorMessage);
378 "nyScale of layer \"%s\" is incompatible with size of column.\n", getName());
380 "Column ny %d multiplied by nyScale %f must be an integer.\n",
381 (
double)parent->getNyGlobal(),
391 if (layerLoc->nxGlobal % icComm->numCommColumns() != 0) {
392 if (parent->columnId() == 0) {
393 ErrorLog(errorMessage);
395 "Size of HyPerLayer \"%s\" is not compatible with the mpi configuration.\n", name);
397 "The layer has %d pixels horizontally, and there are %d mpi processes in a row, but " 398 "%d does not divide %d.\n",
400 icComm->numCommColumns(),
401 icComm->numCommColumns(),
406 if (layerLoc->nyGlobal % icComm->numCommRows() != 0) {
407 if (parent->columnId() == 0) {
408 ErrorLog(errorMessage);
410 "Size of HyPerLayer \"%s\" is not compatible with the mpi configuration.\n", name);
412 "The layer has %d pixels vertically, and there are %d mpi processes in a column, " 413 "but %d does not divide %d.\n",
415 icComm->numCommRows(),
416 icComm->numCommRows(),
421 MPI_Barrier(icComm->communicator());
423 if (status != PV_SUCCESS) {
424 if (parent->columnId() == 0) {
425 ErrorLog().printf(
"setLayerLoc failed for %s.\n", getDescription_c());
429 layerLoc->nx = layerLoc->nxGlobal / icComm->numCommColumns();
430 layerLoc->ny = layerLoc->nyGlobal / icComm->numCommRows();
431 assert(layerLoc->nxGlobal == layerLoc->nx * icComm->numCommColumns());
432 assert(layerLoc->nyGlobal == layerLoc->ny * icComm->numCommRows());
434 layerLoc->kx0 = layerLoc->nx * icComm->commColumn();
435 layerLoc->ky0 = layerLoc->ny * icComm->commRow();
439 layerLoc->nbatch = numBatches;
441 layerLoc->kb0 = parent->commBatch() * numBatches;
442 layerLoc->nbatchGlobal = parent->numCommBatches() * numBatches;
445 layerLoc->halo.lt = 0;
446 layerLoc->halo.rt = 0;
447 layerLoc->halo.dn = 0;
448 layerLoc->halo.up = 0;
453 void HyPerLayer::calcNumExtended() {
455 clayer->numExtended = (loc->nx + loc->halo.lt + loc->halo.rt)
456 * (loc->ny + loc->halo.dn + loc->halo.up) * loc->nf;
457 clayer->numExtendedAllBatches = clayer->numExtended * loc->nbatch;
460 void HyPerLayer::allocateBuffers() {
470 void HyPerLayer::allocateGSyn() {
472 if (numChannels > 0) {
473 GSyn = (
float **)malloc(numChannels *
sizeof(
float *));
474 FatalIf(GSyn ==
nullptr,
"%s unable to allocate GSyn pointers.\n", getDescription_c());
476 GSyn[0] = (
float *)calloc(getNumNeuronsAllBatches() * numChannels,
sizeof(float));
479 FatalIf(GSyn[0] ==
nullptr,
"%s unable to allocate GSyn buffer.\n", getDescription_c());
481 for (
int m = 1; m < numChannels; m++) {
482 GSyn[m] = GSyn[0] + m * getNumNeuronsAllBatches();
487 void HyPerLayer::addPublisher() {
488 MPIBlock const *mpiBlock = parent->getCommunicator()->getLocalMPIBlock();
489 publisher =
new Publisher(*mpiBlock, clayer->activity, getNumDelayLevels(), getSparseFlag());
492 void HyPerLayer::checkpointPvpActivityFloat(
494 char const *bufferName,
497 bool registerSucceeded = checkpointer->registerCheckpointEntry(
501 checkpointer->getMPIBlock(),
508 "%s failed to register %s for checkpointing.\n",
513 void HyPerLayer::checkpointRandState(
515 char const *bufferName,
518 bool registerSucceeded = checkpointer->registerCheckpointEntry(
519 std::make_shared<CheckpointEntryRandState>(
522 checkpointer->getMPIBlock(),
523 randState->getRNG(0),
529 "%s failed to register %s for checkpointing.\n",
534 Response::Status HyPerLayer::initializeState() {
536 initializeActivity();
537 return Response::SUCCESS;
541 Response::Status HyPerLayer::copyInitialStateToGPU() {
545 PVCuda::CudaBuffer *d_V = getDeviceV();
547 d_V->copyToDevice(h_V);
550 PVCuda::CudaBuffer *d_activity = getDeviceActivity();
552 float *h_activity = getCLayer()->activity->data;
553 d_activity->copyToDevice(h_activity);
555 return Response::SUCCESS;
558 #endif // PV_USE_CUDA 560 void HyPerLayer::initializeV() {
561 if (getV() !=
nullptr && mInitVObject !=
nullptr) {
562 mInitVObject->calcV(getV(), getLayerLoc());
566 void HyPerLayer::initializeActivity() {
567 int status = setActivity();
568 FatalIf(status != PV_SUCCESS,
"%s failed to initialize activity.\n", getDescription_c());
597 ioParam_dataType(ioFlag);
601 void HyPerLayer::ioParam_dataType(
enum ParamsIOFlag ioFlag) {
602 this->parent->parameters()->ioParamString(
603 ioFlag, this->getName(),
"dataType", &dataTypeString, NULL,
false );
604 if (dataTypeString == NULL) {
609 if (!strcmp(dataTypeString,
"float")) {
612 else if (!strcmp(dataTypeString,
"int")) {
616 Fatal() <<
"BaseLayer \"" << name
617 <<
"\": dataType not recognized, can be \"float\" or \"int\"\n";
623 parent->parameters()->ioParamValue(
624 ioFlag, name,
"updateGpu", &mUpdateGpu, mUpdateGpu,
true );
625 mUsingGPUFlag = mUpdateGpu;
627 bool mUpdateGpu =
false;
628 parent->parameters()->ioParamValue(
629 ioFlag, name,
"updateGpu", &mUpdateGpu, mUpdateGpu,
false );
630 if (parent->columnId() == 0) {
633 "%s: updateGpu is set to true, but PetaVision was compiled without GPU acceleration.\n",
636 #endif // PV_USE_CUDA 640 parent->parameters()->ioParamValue(ioFlag, name,
"nxScale", &nxScale, nxScale);
644 parent->parameters()->ioParamValue(ioFlag, name,
"nyScale", &nyScale, nyScale);
648 parent->parameters()->ioParamValue(ioFlag, name,
"nf", &numFeatures, numFeatures);
652 parent->parameters()->ioParamValue(ioFlag, name,
"phase", &phase, phase);
653 if (ioFlag == PARAMS_IO_READ && phase < 0) {
654 if (parent->columnId() == 0)
656 "%s: phase must be >= 0 (given value was %d).\n", getDescription_c(), phase);
661 parent->parameters()->ioParamValue(ioFlag, name,
"mirrorBCflag", &mirrorBCflag, mirrorBCflag);
665 assert(!parent->parameters()->presentAndNotBeenRead(name,
"mirrorBCflag"));
667 parent->parameters()->ioParamValue(ioFlag, name,
"valueBC", &valueBC, (
float)0);
672 parent->parameters()->ioParamValue(
675 "initializeFromCheckpointFlag",
676 &initializeFromCheckpointFlag,
677 initializeFromCheckpointFlag,
682 parent->parameters()->ioParamString(
687 BaseInitV::mDefaultInitV.data(),
689 if (ioFlag == PARAMS_IO_READ) {
690 BaseObject *
object = Factory::instance()->createByKeyword(initVTypeString, name, parent);
691 mInitVObject =
dynamic_cast<BaseInitV *
>(object);
692 if (mInitVObject ==
nullptr) {
693 ErrorLog().printf(
"%s: unable to create InitV object\n", getDescription_c());
697 if (mInitVObject !=
nullptr) {
703 parent->parameters()->ioParamString(
704 ioFlag, name,
"triggerLayerName", &triggerLayerName, NULL,
false );
705 if (ioFlag == PARAMS_IO_READ) {
706 if (triggerLayerName && !strcmp(name, triggerLayerName)) {
707 if (parent->columnId() == 0) {
709 "%s: triggerLayerName cannot be the same as the name of the layer itself.\n",
712 MPI_Barrier(parent->getCommunicator()->communicator());
715 triggerFlag = (triggerLayerName != NULL && triggerLayerName[0] !=
'\0');
725 pvAssert(!parent->parameters()->presentAndNotBeenRead(name,
"triggerLayerName"));
726 if (ioFlag == PARAMS_IO_READ && parent->parameters()->
present(name,
"triggerFlag")) {
727 bool flagFromParams =
false;
728 parent->parameters()->ioParamValue(
729 ioFlag, name,
"triggerFlag", &flagFromParams, flagFromParams);
730 if (parent->columnId() == 0) {
731 WarnLog(triggerFlagMessage);
732 triggerFlagMessage.printf(
"%s: triggerFlag has been deprecated.\n", getDescription_c());
733 triggerFlagMessage.printf(
734 " If triggerLayerName is a nonempty string, triggering will be on;\n");
735 triggerFlagMessage.printf(
736 " if triggerLayerName is empty or null, triggering will be off.\n");
737 if (parent->columnId() == 0) {
738 if (flagFromParams != triggerFlag) {
739 ErrorLog(errorMessage);
740 errorMessage.printf(
"triggerLayerName=", name);
741 if (triggerLayerName) {
742 errorMessage.printf(
"\"%s\"", triggerLayerName);
745 errorMessage.printf(
"NULL");
748 " implies triggerFlag=%s but triggerFlag was set in params to %s\n",
749 triggerFlag ?
"true" :
"false",
750 flagFromParams ?
"true" :
"false");
754 if (flagFromParams != triggerFlag) {
755 MPI_Barrier(parent->getCommunicator()->communicator());
762 assert(!parent->parameters()->presentAndNotBeenRead(name,
"triggerLayerName"));
764 parent->parameters()->ioParamValue(
765 ioFlag, name,
"triggerOffset", &triggerOffset, triggerOffset);
766 if (triggerOffset < 0) {
767 if (parent->columnId() == 0) {
769 "%s: TriggerOffset (%f) must be positive\n", getDescription_c(), triggerOffset);
775 assert(!parent->parameters()->presentAndNotBeenRead(name,
"triggerLayerName"));
777 parent->parameters()->ioParamString(
782 "updateOnlyOnTrigger",
784 if (triggerBehavior == NULL || !strcmp(triggerBehavior,
"")) {
785 free(triggerBehavior);
786 triggerBehavior = strdup(
"updateOnlyOnTrigger");
787 triggerBehaviorType = UPDATEONLY_TRIGGER;
789 else if (!strcmp(triggerBehavior,
"updateOnlyOnTrigger")) {
790 triggerBehaviorType = UPDATEONLY_TRIGGER;
792 else if (!strcmp(triggerBehavior,
"resetStateOnTrigger")) {
793 triggerBehaviorType = RESETSTATE_TRIGGER;
795 else if (!strcmp(triggerBehavior,
"ignore")) {
796 triggerBehaviorType = NO_TRIGGER;
799 if (parent->columnId() == 0) {
801 "%s: triggerBehavior=\"%s\" is unrecognized.\n",
805 MPI_Barrier(parent->getCommunicator()->communicator());
810 triggerBehaviorType = NO_TRIGGER;
815 assert(!parent->parameters()->presentAndNotBeenRead(name,
"triggerLayerName"));
817 assert(!parent->parameters()->presentAndNotBeenRead(name,
"triggerBehavior"));
818 if (!strcmp(triggerBehavior,
"resetStateOnTrigger")) {
819 parent->parameters()->ioParamStringRequired(
820 ioFlag, name,
"triggerResetLayerName", &triggerResetLayerName);
826 parent->parameters()->ioParamValue(
827 ioFlag, name,
"writeStep", &writeStep, parent->getDeltaTime());
831 assert(!parent->parameters()->presentAndNotBeenRead(name,
"writeStep"));
832 if (writeStep >= 0.0) {
833 parent->parameters()->ioParamValue(ioFlag, name,
"initialWriteTime", &initialWriteTime, 0.0);
834 if (ioFlag == PARAMS_IO_READ && writeStep > 0.0 && initialWriteTime < 0.0) {
835 double storeInitialWriteTime = initialWriteTime;
836 while (initialWriteTime < 0.0) {
837 initialWriteTime += writeStep;
839 if (parent->columnId() == 0) {
840 WarnLog(warningMessage);
841 warningMessage.printf(
842 "%s: initialWriteTime %f is negative. Adjusting " 843 "initialWriteTime:\n",
846 warningMessage.printf(
" initialWriteTime adjusted to %f\n", initialWriteTime);
853 if (ioFlag == PARAMS_IO_READ && !parent->parameters()->
present(name,
"sparseLayer")
854 && parent->parameters()->
present(name,
"writeSparseActivity")) {
855 Fatal().printf(
"writeSparseActivity is obsolete. Use sparseLayer instead.\n");
858 parent->parameters()->ioParamValue(ioFlag, name,
"sparseLayer", &sparseLayer,
false);
863 if (ioFlag == PARAMS_IO_READ) {
864 assert(!parent->parameters()->presentAndNotBeenRead(name,
"sparseLayer"));
865 if (sparseLayer && parent->parameters()->
present(name,
"writeSparseValues")) {
866 WarnLog() <<
"writeSparseValues parameter, defined in " << getDescription()
867 <<
", is obsolete.\n";
868 bool writeSparseValues;
869 parent->parameters()->ioParamValue(
870 ioFlag, name,
"writeSparseValues", &writeSparseValues,
true );
871 if (!writeSparseValues) {
872 WarnLog() <<
"The sparse-values format is used for all sparse layers.\n";
878 Response::Status HyPerLayer::respond(std::shared_ptr<BaseMessage const> message) {
879 Response::Status status = BaseLayer::respond(message);
880 if (status != Response::SUCCESS) {
883 else if (
auto castMessage = std::dynamic_pointer_cast<LayerSetMaxPhaseMessage const>(message)) {
884 return respondLayerSetMaxPhase(castMessage);
886 else if (
auto castMessage = std::dynamic_pointer_cast<LayerWriteParamsMessage const>(message)) {
887 return respondLayerWriteParams(castMessage);
891 std::dynamic_pointer_cast<LayerProbeWriteParamsMessage const>(message)) {
892 return respondLayerProbeWriteParams(castMessage);
896 std::dynamic_pointer_cast<LayerClearProgressFlagsMessage const>(message)) {
897 return respondLayerClearProgressFlags(castMessage);
899 else if (
auto castMessage = std::dynamic_pointer_cast<LayerUpdateStateMessage const>(message)) {
900 return respondLayerUpdateState(castMessage);
904 std::dynamic_pointer_cast<LayerRecvSynapticInputMessage const>(message)) {
905 return respondLayerRecvSynapticInput(castMessage);
908 else if (
auto castMessage = std::dynamic_pointer_cast<LayerCopyFromGpuMessage const>(message)) {
909 return respondLayerCopyFromGpu(castMessage);
911 #endif // PV_USE_CUDA 914 std::dynamic_pointer_cast<LayerAdvanceDataStoreMessage const>(message)) {
915 return respondLayerAdvanceDataStore(castMessage);
917 else if (
auto castMessage = std::dynamic_pointer_cast<LayerPublishMessage const>(message)) {
918 return respondLayerPublish(castMessage);
920 else if (
auto castMessage = std::dynamic_pointer_cast<LayerOutputStateMessage const>(message)) {
921 return respondLayerOutputState(castMessage);
924 auto castMessage = std::dynamic_pointer_cast<LayerCheckNotANumberMessage const>(message)) {
925 return respondLayerCheckNotANumber(castMessage);
933 HyPerLayer::respondLayerSetMaxPhase(std::shared_ptr<LayerSetMaxPhaseMessage const> message) {
934 return setMaxPhase(message->mMaxPhase);
938 HyPerLayer::respondLayerWriteParams(std::shared_ptr<LayerWriteParamsMessage const> message) {
940 return Response::SUCCESS;
943 Response::Status HyPerLayer::respondLayerProbeWriteParams(
944 std::shared_ptr<LayerProbeWriteParamsMessage const> message) {
945 return outputProbeParams();
948 Response::Status HyPerLayer::respondLayerClearProgressFlags(
949 std::shared_ptr<LayerClearProgressFlagsMessage const> message) {
950 clearProgressFlags();
951 return Response::SUCCESS;
954 Response::Status HyPerLayer::respondLayerRecvSynapticInput(
955 std::shared_ptr<LayerRecvSynapticInputMessage const> message) {
956 Response::Status status = Response::SUCCESS;
957 if (message->mPhase != getPhase()) {
961 if (message->mRecvOnGpuFlag != mRecvGpu) {
964 #endif // PV_USE_CUDA 969 *(message->mSomeLayerIsPending) =
true;
972 resetGSynBuffers(message->mTime, message->mDeltaT);
974 message->mTimer->start();
975 recvAllSynapticInput();
977 *(message->mSomeLayerHasActed) =
true;
978 message->mTimer->stop();
984 HyPerLayer::respondLayerUpdateState(std::shared_ptr<LayerUpdateStateMessage const> message) {
985 Response::Status status = Response::SUCCESS;
986 if (message->mPhase != getPhase()) {
990 if (message->mRecvOnGpuFlag != mRecvGpu) {
993 if (message->mUpdateOnGpuFlag != mUpdateGpu) {
996 #endif // PV_USE_CUDA 1000 if (*(message->mSomeLayerHasActed) or !mHasReceived) {
1001 *(message->mSomeLayerIsPending) =
true;
1004 status = callUpdateState(message->mTime, message->mDeltaT);
1007 *(message->mSomeLayerHasActed) =
true;
1013 HyPerLayer::respondLayerCopyFromGpu(std::shared_ptr<LayerCopyFromGpuMessage const> message) {
1014 Response::Status status = Response::SUCCESS;
1015 if (message->mPhase != getPhase()) {
1018 message->mTimer->start();
1019 copyAllActivityFromDevice();
1020 copyAllVFromDevice();
1021 copyAllGSynFromDevice();
1023 message->mTimer->stop();
1026 #endif // PV_USE_CUDA 1028 Response::Status HyPerLayer::respondLayerAdvanceDataStore(
1029 std::shared_ptr<LayerAdvanceDataStoreMessage const> message) {
1030 if (message->mPhase < 0 || message->mPhase == getPhase()) {
1031 publisher->increaseTimeLevel();
1033 return Response::SUCCESS;
1037 HyPerLayer::respondLayerPublish(std::shared_ptr<LayerPublishMessage const> message) {
1038 if (message->mPhase != getPhase()) {
1039 return Response::NO_ACTION;
1041 publish(parent->getCommunicator(), message->mTime);
1042 return Response::SUCCESS;
1045 Response::Status HyPerLayer::respondLayerCheckNotANumber(
1046 std::shared_ptr<LayerCheckNotANumberMessage const> message) {
1047 Response::Status status = Response::SUCCESS;
1048 if (message->mPhase != getPhase()) {
1052 int const N = getNumExtendedAllBatches();
1053 for (
int n = 0; n < N; n++) {
1054 float a = layerData[n];
1057 "%s has not-a-number values in the activity buffer. Exiting.\n",
1058 getDescription_c());
1064 HyPerLayer::respondLayerOutputState(std::shared_ptr<LayerOutputStateMessage const> message) {
1065 Response::Status status = Response::SUCCESS;
1066 if (message->mPhase != getPhase()) {
1069 status = outputState(message->mTime);
1073 void HyPerLayer::clearProgressFlags() {
1074 mHasReceived =
false;
1075 mHasUpdated =
false;
1080 int HyPerLayer::allocateUpdateKernel() {
1081 Fatal() <<
"Layer \"" << name <<
"\" of type " << mObjectType
1082 <<
" does not support updating on gpus yet\n";
1093 const size_t size = getNumNeuronsAllBatches() *
sizeof(float);
1094 const size_t size_ex = getNumExtendedAllBatches() *
sizeof(float);
1096 PVCuda::CudaDevice *device = parent->getDevice();
1100 d_V = device->createBuffer(size, &description);
1103 if (allocDeviceDatastore) {
1104 d_Datastore = device->createBuffer(size_ex, &description);
1105 assert(d_Datastore);
1107 cudnn_Datastore = device->createBuffer(size_ex, &description);
1108 assert(cudnn_Datastore);
1112 if (allocDeviceActiveIndices) {
1113 d_numActive = device->createBuffer(parent->getNBatch() *
sizeof(long), &description);
1114 d_ActiveIndices = device->createBuffer(
1116 assert(d_ActiveIndices);
1119 if (allocDeviceActivity) {
1120 d_Activity = device->createBuffer(size_ex, &description);
1124 if (allocDeviceGSyn) {
1125 d_GSyn = device->createBuffer(size * numChannels, &description);
1128 cudnn_GSyn = device->createBuffer(size, &description);
1135 #endif // PV_USE_CUDA 1138 HyPerLayer::communicateInitInfo(std::shared_ptr<CommunicateInitInfoMessage const> message) {
1154 triggerLayer = message->lookup<
HyPerLayer>(std::string(triggerLayerName));
1155 if (triggerLayer == NULL) {
1156 if (parent->columnId() == 0) {
1158 "%s: triggerLayerName \"%s\" is not a layer in the HyPerCol.\n",
1162 MPI_Barrier(parent->getCommunicator()->communicator());
1165 if (triggerBehaviorType == RESETSTATE_TRIGGER) {
1166 char const *resetLayerName = NULL;
1169 if (triggerResetLayerName == NULL || triggerResetLayerName[0] ==
'\0') {
1170 resetLayerName = triggerLayerName;
1171 triggerResetLayer = triggerLayer;
1174 resetLayerName = triggerResetLayerName;
1175 triggerResetLayer = message->lookup<
HyPerLayer>(std::string(triggerResetLayerName));
1176 if (triggerResetLayer == NULL) {
1177 if (parent->columnId() == 0) {
1179 "%s: triggerResetLayerName \"%s\" is not a layer in the HyPerCol.\n",
1181 triggerResetLayerName);
1183 MPI_Barrier(parent->getCommunicator()->communicator());
1189 PVLayerLoc const *triggerLoc = triggerResetLayer->getLayerLoc();
1190 PVLayerLoc const *localLoc = this->getLayerLoc();
1191 if (triggerLoc->nxGlobal != localLoc->nxGlobal
1192 || triggerLoc->nyGlobal != localLoc->nyGlobal
1193 || triggerLoc->nf != localLoc->nf) {
1194 if (parent->columnId() == 0) {
1195 Fatal(errorMessage);
1196 errorMessage.printf(
1197 "%s: triggerResetLayer \"%s\" has incompatible dimensions.\n",
1200 errorMessage.printf(
1201 " \"%s\" is %d-by-%d-by-%d and \"%s\" is %d-by-%d-by-%d.\n",
1207 triggerLoc->nxGlobal,
1208 triggerLoc->nyGlobal,
1219 this->setAllocDeviceGSyn();
1220 this->setAllocDeviceV();
1221 this->setAllocDeviceActivity();
1225 return Response::SUCCESS;
1228 Response::Status HyPerLayer::setMaxPhase(
int *maxPhase) {
1229 if (*maxPhase < phase) {
1232 return Response::SUCCESS;
1237 conn->getPost() !=
this,
1238 "%s called addRecvConn for %s, but \"%s\" is not the post-synaptic layer for \"%s\"\n.",
1239 conn->getDescription_c(),
1245 if (!conn->getReceiveGpu()) {
1246 recvConns.insert(recvConns.begin(), conn);
1252 recvConns.push_back(conn);
1260 int HyPerLayer::openOutputStateFile(
Checkpointer *checkpointer) {
1261 pvAssert(writeStep >= 0);
1263 if (checkpointer->getMPIBlock()->
getRank() == 0) {
1264 std::string outputStatePath(getName());
1265 outputStatePath.append(
".pvp");
1267 std::string checkpointLabel(getName());
1268 checkpointLabel.append(
"_filepos");
1270 bool createFlag = checkpointer->getCheckpointReadDirectory().empty();
1272 outputStatePath.c_str(), createFlag, checkpointer, checkpointLabel);
1277 void HyPerLayer::synchronizeMarginWidth(
HyPerLayer *layer) {
1278 if (layer ==
this) {
1281 assert(layer->getLayerLoc() != NULL && this->getLayerLoc() != NULL);
1282 HyPerLayer **newSynchronizedMarginWidthLayers =
1284 assert(newSynchronizedMarginWidthLayers);
1285 if (numSynchronizedMarginWidthLayers > 0) {
1286 for (
int k = 0; k < numSynchronizedMarginWidthLayers; k++) {
1287 newSynchronizedMarginWidthLayers[k] = synchronizedMarginWidthLayers[k];
1289 free(synchronizedMarginWidthLayers);
1292 assert(synchronizedMarginWidthLayers == NULL);
1294 synchronizedMarginWidthLayers = newSynchronizedMarginWidthLayers;
1295 synchronizedMarginWidthLayers[numSynchronizedMarginWidthLayers] = layer;
1296 numSynchronizedMarginWidthLayers++;
1298 equalizeMargins(
this, layer);
1304 int border1, border2, maxborder, result;
1305 int status = PV_SUCCESS;
1307 border1 = layer1->getLayerLoc()->halo.lt;
1308 border2 = layer2->getLayerLoc()->halo.lt;
1309 maxborder = border1 > border2 ? border1 : border2;
1310 layer1->requireMarginWidth(maxborder, &result,
'x');
1311 if (result != maxborder) {
1312 status = PV_FAILURE;
1314 layer2->requireMarginWidth(maxborder, &result,
'x');
1315 if (result != maxborder) {
1316 status = PV_FAILURE;
1318 if (status != PV_SUCCESS) {
1320 "Error in rank %d process: unable to synchronize x-margin widths of layers \"%s\" and " 1322 layer1->parent->columnId(),
1329 layer1->getLayerLoc()->halo.lt == layer2->getLayerLoc()->halo.lt
1330 && layer1->getLayerLoc()->halo.rt == layer2->getLayerLoc()->halo.rt
1331 && layer1->getLayerLoc()->halo.lt == layer1->getLayerLoc()->halo.rt
1332 && layer1->getLayerLoc()->halo.lt == maxborder);
1334 border1 = layer1->getLayerLoc()->halo.dn;
1335 border2 = layer2->getLayerLoc()->halo.dn;
1336 maxborder = border1 > border2 ? border1 : border2;
1337 layer1->requireMarginWidth(maxborder, &result,
'y');
1338 if (result != maxborder) {
1339 status = PV_FAILURE;
1341 layer2->requireMarginWidth(maxborder, &result,
'y');
1342 if (result != maxborder) {
1343 status = PV_FAILURE;
1345 if (status != PV_SUCCESS) {
1347 "Error in rank %d process: unable to synchronize y-margin widths of layers \"%s\" and " 1349 layer1->parent->columnId(),
1356 layer1->getLayerLoc()->halo.dn == layer2->getLayerLoc()->halo.dn
1357 && layer1->getLayerLoc()->halo.up == layer2->getLayerLoc()->halo.up
1358 && layer1->getLayerLoc()->halo.dn == layer1->getLayerLoc()->halo.up
1359 && layer1->getLayerLoc()->halo.dn == maxborder);
1363 Response::Status HyPerLayer::allocateDataStructures() {
1367 auto status = Response::SUCCESS;
1373 if (deltaUpdateTime != -1 && triggerOffset >= deltaUpdateTime) {
1375 "%s error in rank %d process: TriggerOffset (%f) must be lower than the change in " 1376 "update time (%f) \n",
1384 allocateClayerBuffers();
1390 PVHalo const *halo = &loc->halo;
1393 if (!useMirrorBCs() && getValueBC() != 0.0f) {
1395 for (
int batch = 0; batch < loc->nbatch; batch++) {
1396 for (
int b = 0; b < halo->up; b++) {
1397 for (
int k = 0; k < (nx + halo->lt + halo->rt) * nf; k++) {
1398 clayer->activity->data[idx] = getValueBC();
1402 for (
int y = 0; y < ny; y++) {
1403 for (
int k = 0; k < halo->lt * nf; k++) {
1404 clayer->activity->data[idx] = getValueBC();
1408 for (
int k = 0; k < halo->rt * nf; k++) {
1409 clayer->activity->data[idx] = getValueBC();
1413 for (
int b = 0; b < halo->dn; b++) {
1414 for (
int k = 0; k < (nx + halo->lt + halo->rt) * nf; k++) {
1415 clayer->activity->data[idx] = getValueBC();
1420 assert(idx == getNumExtendedAllBatches());
1427 if (parent->getNumThreads() > 1) {
1428 thread_gSyn = (
float **)malloc(
sizeof(
float *) * parent->getNumThreads());
1429 assert(thread_gSyn);
1432 for (
int i = 0; i < parent->getNumThreads(); i++) {
1433 float *tempMem = (
float *)malloc(
sizeof(
float) * getNumNeuronsAllBatches());
1436 "HyPerLayer \"%s\" error: rank %d unable to allocate %zu memory for thread_gSyn: " 1440 sizeof(float) * getNumNeuronsAllBatches(),
1443 thread_gSyn[i] = tempMem;
1451 if (deviceStatus == 0) {
1452 status = Response::SUCCESS;
1456 "%s unable to allocate device memory in rank %d process: %s\n",
1463 deviceStatus = allocateUpdateKernel();
1464 if (deviceStatus == 0) {
1465 status = Response::SUCCESS;
1480 int HyPerLayer::increaseDelayLevels(
int neededDelay) {
1481 if (numDelayLevels < neededDelay + 1)
1482 numDelayLevels = neededDelay + 1;
1483 if (numDelayLevels > MAX_F_DELAY)
1484 numDelayLevels = MAX_F_DELAY;
1485 return numDelayLevels;
1488 int HyPerLayer::requireMarginWidth(
int marginWidthNeeded,
int *marginWidthResult,
char axis) {
1492 PVHalo *halo = &loc->halo;
1495 *marginWidthResult = xmargin;
1496 if (xmargin < marginWidthNeeded) {
1498 if (parent->columnId() == 0) {
1500 "%s: adjusting x-margin width from %d to %d\n",
1505 xmargin = marginWidthNeeded;
1509 assert(axis ==
'x' && getLayerLoc()->halo.lt == getLayerLoc()->halo.rt);
1510 *marginWidthResult = xmargin;
1511 if (synchronizedMarginWidthLayers != NULL) {
1512 for (
int k = 0; k < numSynchronizedMarginWidthLayers; k++) {
1513 HyPerLayer *l = synchronizedMarginWidthLayers[k];
1514 if (l->getLayerLoc()->halo.lt < marginWidthNeeded) {
1515 synchronizedMarginWidthLayers[k]->requireMarginWidth(
1516 marginWidthNeeded, marginWidthResult, axis);
1518 assert(l->getLayerLoc()->halo.lt == getLayerLoc()->halo.lt);
1519 assert(l->getLayerLoc()->halo.rt == getLayerLoc()->halo.rt);
1525 *marginWidthResult = ymargin;
1526 if (ymargin < marginWidthNeeded) {
1528 if (parent->columnId() == 0) {
1530 "%s: adjusting y-margin width from %d to %d\n",
1535 ymargin = marginWidthNeeded;
1539 assert(axis ==
'y' && getLayerLoc()->halo.dn == getLayerLoc()->halo.up);
1540 *marginWidthResult = ymargin;
1541 if (synchronizedMarginWidthLayers != NULL) {
1542 for (
int k = 0; k < numSynchronizedMarginWidthLayers; k++) {
1543 HyPerLayer *l = synchronizedMarginWidthLayers[k];
1544 if (l->getLayerLoc()->halo.up < marginWidthNeeded) {
1545 synchronizedMarginWidthLayers[k]->requireMarginWidth(
1546 marginWidthNeeded, marginWidthResult, axis);
1548 assert(l->getLayerLoc()->halo.dn == getLayerLoc()->halo.dn);
1549 assert(l->getLayerLoc()->halo.up == getLayerLoc()->halo.up);
1554 default: assert(0);
break;
1559 int HyPerLayer::requireChannel(
int channelNeeded,
int *numChannelsResult) {
1560 if (channelNeeded >= numChannels) {
1561 int numOldChannels = numChannels;
1562 numChannels = channelNeeded + 1;
1564 *numChannelsResult = numChannels;
1579 assert(cube->numItems == border->numItems);
1580 assert(localDimensionsEqual(&cube->loc, &border->loc));
1582 mirrorToNorthWest(border, cube);
1583 mirrorToNorth(border, cube);
1584 mirrorToNorthEast(border, cube);
1585 mirrorToWest(border, cube);
1586 mirrorToEast(border, cube);
1587 mirrorToSouthWest(border, cube);
1588 mirrorToSouth(border, cube);
1589 mirrorToSouthEast(border, cube);
1593 Response::Status HyPerLayer::registerData(
Checkpointer *checkpointer) {
1594 auto status = BaseLayer::registerData(checkpointer);
1598 checkpointPvpActivityFloat(checkpointer,
"A", getActivity(),
true );
1599 if (getV() !=
nullptr) {
1600 checkpointPvpActivityFloat(checkpointer,
"V", getV(),
false );
1602 publisher->checkpointDataStore(checkpointer, getName(),
"Delays");
1603 checkpointer->registerCheckpointData(
1604 std::string(getName()),
1605 std::string(
"lastUpdateTime"),
1610 checkpointer->registerCheckpointData(
1611 std::string(getName()),
1612 std::string(
"nextWrite"),
1618 if (writeStep >= 0.0) {
1619 openOutputStateFile(checkpointer);
1621 checkpointer->registerCheckpointData(
1622 std::string(getName()),
1623 std::string(
"numframes_sparse"),
1624 &writeActivitySparseCalls,
1630 checkpointer->registerCheckpointData(
1631 std::string(getName()),
1632 std::string(
"numframes"),
1633 &writeActivityCalls,
1642 update_timer =
new Timer(getName(),
"layer",
"update ");
1643 checkpointer->registerTimer(update_timer);
1645 recvsyn_timer =
new Timer(getName(),
"layer",
"recvsyn");
1646 checkpointer->registerTimer(recvsyn_timer);
1648 auto cudaDevice = parent->getDevice();
1650 gpu_update_timer =
new PVCuda::CudaTimer(getName(),
"layer",
"gpuupdate");
1651 gpu_update_timer->setStream(cudaDevice->getStream());
1652 checkpointer->registerTimer(gpu_update_timer);
1654 gpu_recvsyn_timer =
new PVCuda::CudaTimer(getName(),
"layer",
"gpurecvsyn");
1655 gpu_recvsyn_timer->setStream(cudaDevice->getStream());
1656 checkpointer->registerTimer(gpu_recvsyn_timer);
1658 #endif // PV_USE_CUDA 1660 publish_timer =
new Timer(getName(),
"layer",
"publish");
1661 checkpointer->registerTimer(publish_timer);
1663 timescale_timer =
new Timer(getName(),
"layer",
"timescale");
1664 checkpointer->registerTimer(timescale_timer);
1666 io_timer =
new Timer(getName(),
"layer",
"io ");
1667 checkpointer->registerTimer(io_timer);
1670 auto message = std::make_shared<RegisterDataMessage<Checkpointer>>(checkpointer);
1671 mInitVObject->respond(message);
1674 return Response::SUCCESS;
1678 if (triggerLayer != NULL && triggerBehaviorType == UPDATEONLY_TRIGGER) {
1682 return parent->getDeltaTime();
1687 if (triggerLayer != NULL) {
1699 if (mLastUpdateTime == simTime + triggerOffset) {
1702 double timeToCheck = mLastUpdateTime;
1703 if (triggerLayer !=
nullptr && triggerBehaviorType == UPDATEONLY_TRIGGER) {
1704 timeToCheck = triggerLayer->getLastUpdateTime();
1707 if (timeToCheck == simTime && triggerOffset == 0) {
1719 if (triggerLayer ==
nullptr) {
1722 if (triggerBehaviorType != RESETSTATE_TRIGGER) {
1736 Response::Status HyPerLayer::callUpdateState(
double simTime,
double dt) {
1737 auto status = Response::NO_ACTION;
1741 mLastTriggerTime = simTime;
1744 update_timer->start();
1747 gpu_update_timer->start();
1748 float *gSynHead = GSyn == NULL ? NULL : GSyn[0];
1750 status = updateStateGpu(simTime, dt);
1751 gpu_update_timer->stop();
1755 status = updateState(simTime, dt);
1759 updatedDeviceActivity =
true;
1760 updatedDeviceDatastore =
true;
1762 update_timer->stop();
1763 mNeedToPublish =
true;
1764 mLastUpdateTime = simTime;
1770 assert(triggerResetLayer != NULL);
1773 if (parent->columnId() == 0) {
1775 "%s: triggerBehavior is \"resetStateOnTrigger\" but layer does not have a membrane " 1777 getDescription_c());
1779 MPI_Barrier(parent->getCommunicator()->communicator());
1782 float const *resetV = triggerResetLayer->getV();
1783 if (resetV != NULL) {
1784 #ifdef PV_USE_OPENMP_THREADS 1785 #pragma omp parallel for 1786 #endif // PV_USE_OPENMP_THREADS 1787 for (
int k = 0; k < getNumNeuronsAllBatches(); k++) {
1792 float const *resetA = triggerResetLayer->getActivity();
1793 PVLayerLoc const *loc = triggerResetLayer->getLayerLoc();
1794 PVHalo const *halo = &loc->halo;
1795 for (
int b = 0; b < parent->getNBatch(); b++) {
1796 float const *resetABatch = resetA + (b * triggerResetLayer->getNumExtended());
1797 float *VBatch = V + (b * triggerResetLayer->getNumNeurons());
1798 #ifdef PV_USE_OPENMP_THREADS 1799 #pragma omp parallel for 1800 #endif // PV_USE_OPENMP_THREADS 1801 for (
int k = 0; k < getNumNeurons(); k++) {
1802 int kex = kIndexExtended(
1803 k, loc->nx, loc->ny, loc->nf, halo->lt, halo->rt, halo->dn, halo->up);
1804 VBatch[k] = resetABatch[kex];
1814 getDeviceV()->copyToDevice(V);
1817 getDeviceActivity()->copyToDevice(clayer->activity->data);
1819 updatedDeviceActivity =
true;
1820 updatedDeviceDatastore =
true;
1825 int HyPerLayer::resetGSynBuffers(
double timef,
double dt) {
1826 int status = PV_SUCCESS;
1829 resetGSynBuffers_HyPerLayer(
1830 parent->getNBatch(), this->getNumNeurons(), getNumChannels(), GSyn[0]);
1835 int HyPerLayer::runUpdateKernel() {
1839 if (updatedDeviceGSyn) {
1840 copyAllGSynToDevice();
1841 updatedDeviceGSyn =
false;
1857 Response::Status HyPerLayer::updateStateGpu(
double timef,
double dt) {
1858 Fatal() <<
"Update state for layer " << name <<
" is not implemented\n";
1859 return Response::NO_ACTION;
1863 Response::Status HyPerLayer::updateState(
double timef,
double dt) {
1868 float *A = getCLayer()->activity->data;
1870 int num_channels = getNumChannels();
1871 float *gSynHead = GSyn == NULL ? NULL : GSyn[0];
1876 int nbatch = loc->nbatch;
1877 int num_neurons = nx * ny * nf;
1878 if (num_channels == 1) {
1879 applyGSyn_HyPerLayer1Channel(nbatch, num_neurons, V, gSynHead);
1882 applyGSyn_HyPerLayer(nbatch, num_neurons, V, gSynHead);
1884 setActivity_HyPerLayer(
1897 return Response::SUCCESS;
1900 int HyPerLayer::setActivity() {
1902 return setActivity_HyPerLayer(
1905 clayer->activity->data,
1917 void HyPerLayer::updateAllActiveIndices() { publisher->updateAllActiveIndices(); }
1919 void HyPerLayer::updateActiveIndices() { publisher->updateActiveIndices(0); }
1924 bool isReady =
true;
1925 for (
auto &c : recvConns) {
1926 isReady &= c->isAllInputReady();
1931 int HyPerLayer::recvAllSynapticInput() {
1932 int status = PV_SUCCESS;
1934 if (
needUpdate(parent->simulationTime(), parent->getDeltaTime())) {
1935 bool switchGpu =
false;
1937 recvsyn_timer->start();
1939 for (
auto &conn : recvConns) {
1940 pvAssert(conn != NULL);
1943 if (!switchGpu && conn->getReceiveGpu()) {
1945 copyAllGSynToDevice();
1947 gpu_recvsyn_timer->start();
1956 gpu_recvsyn_timer->stop();
1959 recvsyn_timer->stop();
1965 double HyPerLayer::addGpuTimers() {
1967 bool updateNeeded =
needUpdate(parent->simulationTime(), parent->getDeltaTime());
1968 if (mRecvGpu && updateNeeded) {
1969 simTime += gpu_recvsyn_timer->accumulateTime();
1971 if (mUpdateGpu && updateNeeded) {
1972 simTime += gpu_update_timer->accumulateTime();
1977 void HyPerLayer::syncGpu() {
1978 if (mRecvGpu || mUpdateGpu) {
1979 parent->getDevice()->syncDevice();
1983 void HyPerLayer::copyAllGSynToDevice() {
1984 if (mRecvGpu || mUpdateGpu) {
1986 float *h_postGSyn = GSyn[0];
1987 PVCuda::CudaBuffer *d_postGSyn = this->getDeviceGSyn();
1989 d_postGSyn->copyToDevice(h_postGSyn);
1993 void HyPerLayer::copyAllGSynFromDevice() {
1996 float *h_postGSyn = GSyn[0];
1997 PVCuda::CudaBuffer *d_postGSyn = this->getDeviceGSyn();
1999 d_postGSyn->copyFromDevice(h_postGSyn);
2003 void HyPerLayer::copyAllVFromDevice() {
2007 float *h_V = getV();
2008 PVCuda::CudaBuffer *d_V = this->getDeviceV();
2010 d_V->copyFromDevice(h_V);
2014 void HyPerLayer::copyAllActivityFromDevice() {
2018 float *h_activity = getCLayer()->activity->data;
2019 PVCuda::CudaBuffer *d_activity = this->getDeviceActivity();
2021 d_activity->copyFromDevice(h_activity);
2027 int HyPerLayer::publish(
Communicator *comm,
double simTime) {
2028 publish_timer->start();
2030 int status = PV_SUCCESS;
2031 if (mNeedToPublish) {
2032 if (useMirrorBCs()) {
2033 mirrorInteriorToBorder(clayer->activity, clayer->activity);
2035 status = publisher->
publish(mLastUpdateTime);
2036 mNeedToPublish =
false;
2041 publish_timer->stop();
2046 publish_timer->start();
2050 int status = publisher->
wait();
2052 publish_timer->stop();
2066 if (p->getTargetLayer() !=
this) {
2068 "HyPerLayer \"%s\": insertProbe called with probe %p, whose targetLayer is not this " 2069 "layer. Probe was not inserted.\n",
2074 for (
int i = 0; i < numProbes; i++) {
2075 if (p == probes[i]) {
2077 "HyPerLayer \"%s\": insertProbe called with probe %p, which has already been " 2078 "inserted as probe %d.\n",
2090 assert(tmp != NULL);
2092 for (
int i = 0; i < numProbes; i++) {
2098 probes[numProbes] = p;
2103 Response::Status HyPerLayer::outputProbeParams() {
2104 for (
int p = 0; p < numProbes; p++) {
2107 return Response::SUCCESS;
2110 Response::Status HyPerLayer::outputState(
double timef) {
2113 for (
int i = 0; i < numProbes; i++) {
2117 if (timef >= (writeTime - (parent->getDeltaTime() / 2)) && writeStep >= 0) {
2118 int writeStatus = PV_SUCCESS;
2119 writeTime += writeStep;
2121 writeStatus = writeActivitySparse(timef);
2124 writeStatus = writeActivity(timef);
2127 writeStatus != PV_SUCCESS,
2128 "%s: outputState failed on rank %d process.\n",
2130 parent->columnId());
2134 return Response::SUCCESS;
2137 Response::Status HyPerLayer::readStateFromCheckpoint(
Checkpointer *checkpointer) {
2138 if (initializeFromCheckpointFlag) {
2139 readActivityFromCheckpoint(checkpointer);
2140 readVFromCheckpoint(checkpointer);
2141 readDelaysFromCheckpoint(checkpointer);
2142 updateAllActiveIndices();
2143 return Response::SUCCESS;
2146 return Response::NO_ACTION;
2150 void HyPerLayer::readActivityFromCheckpoint(
Checkpointer *checkpointer) {
2151 checkpointer->readNamedCheckpointEntry(std::string(name), std::string(
"A"),
false);
2154 void HyPerLayer::readVFromCheckpoint(
Checkpointer *checkpointer) {
2155 if (getV() !=
nullptr) {
2156 checkpointer->readNamedCheckpointEntry(std::string(name), std::string(
"V"),
false);
2160 void HyPerLayer::readDelaysFromCheckpoint(
Checkpointer *checkpointer) {
2161 checkpointer->readNamedCheckpointEntry(std::string(name), std::string(
"Delays"),
false);
2168 Response::Status HyPerLayer::processCheckpointRead() {
2169 updateAllActiveIndices();
2170 return Response::SUCCESS;
2173 int HyPerLayer::writeActivitySparse(
double timed) {
2176 pvAssert(cube.numItems == loc->nbatch * getNumExtended());
2179 int const numFrames = mpiBatchDimension * loc->nbatch;
2180 for (
int frame = 0; frame < numFrames; frame++) {
2181 int const localBatchIndex = frame % loc->nbatch;
2182 int const mpiBatchIndex = frame / loc->nbatch;
2183 pvAssert(mpiBatchIndex * loc->nbatch + localBatchIndex == frame);
2187 auto *activeIndicesElement = &activeIndicesBatch[localBatchIndex * getNumExtended()];
2189 int nxExt = loc->nx + loc->halo.lt + loc->halo.rt;
2190 int nyExt = loc->ny + loc->halo.dn + loc->halo.up;
2192 for (
long int k = 0; k < cube.numActive[localBatchIndex]; k++) {
2194 int index = (int)entry.index;
2198 int x = kxPos(index, nxExt, nyExt, nf) - loc->halo.lt;
2199 if (x < 0 or x >= loc->nx) {
2202 int y = kyPos(index, nxExt, nyExt, nf) - loc->halo.up;
2203 if (y < 0 or y >= loc->ny) {
2209 int f = featureIndex(index, nxExt, nyExt, nf);
2212 entry.index = (uint32_t)kIndex(x, y, f, loc->nxGlobal, loc->nyGlobal, nf);
2213 list.addEntry(entry);
2216 BufferUtils::gatherSparse(getMPIBlock(), list, mpiBatchIndex, 0 );
2217 if (getMPIBlock()->getRank() == 0) {
2218 long fpos = mOutputStateStream->getOutPos();
2225 header.timestamp = timed;
2226 BufferUtils::writeActivityHeader(*mOutputStateStream, header);
2228 BufferUtils::writeSparseFrame(*mOutputStateStream, &gatheredList, timed);
2231 writeActivitySparseCalls += numFrames;
2232 updateNBands(writeActivitySparseCalls);
2237 int HyPerLayer::writeActivity(
double timed) {
2240 pvAssert(cube.numItems == loc->nbatch * getNumExtended());
2242 PVHalo const &halo = loc->halo;
2243 int const nxExtLocal = loc->nx + halo.lt + halo.rt;
2244 int const nyExtLocal = loc->ny + halo.dn + halo.up;
2245 int const nf = loc->nf;
2248 int const numFrames = mpiBatchDimension * loc->nbatch;
2249 for (
int frame = 0; frame < numFrames; frame++) {
2250 int const localBatchIndex = frame % loc->nbatch;
2251 int const mpiBatchIndex = frame / loc->nbatch;
2252 pvAssert(mpiBatchIndex * loc->nbatch + localBatchIndex == frame);
2254 float *data = &cube.data[localBatchIndex * getNumExtended()];
2255 Buffer<float> localBuffer(data, nxExtLocal, nyExtLocal, nf);
2258 getMPIBlock(), localBuffer, loc->nx, loc->ny, mpiBatchIndex, 0 );
2261 if (getMPIBlock()->
getRank() == 0) {
2262 long fpos = mOutputStateStream->getOutPos();
2269 header.timestamp = timed;
2270 BufferUtils::writeActivityHeader(*mOutputStateStream, header);
2272 BufferUtils::writeFrame<float>(*mOutputStateStream, &blockBuffer, timed);
2275 writeActivityCalls += numFrames;
2276 updateNBands(writeActivityCalls);
2280 void HyPerLayer::updateNBands(
int const numCalls) {
2284 if (mOutputStateStream !=
nullptr) {
2285 long int fpos = mOutputStateStream->getOutPos();
2286 mOutputStateStream->setOutPos(
sizeof(
int) * INDEX_NBANDS,
true );
2287 mOutputStateStream->write(&numCalls, (
long)
sizeof(numCalls));
2288 mOutputStateStream->setOutPos(fpos,
true );
2293 return loc1->nbatch == loc2->nbatch && loc1->nx == loc2->nx && loc1->ny == loc2->ny
2294 && loc1->nf == loc2->nf && loc1->halo.lt == loc2->halo.lt
2295 && loc1->halo.rt == loc2->halo.rt && loc1->halo.dn == loc2->halo.dn
2296 && loc1->halo.up == loc2->halo.up;
2300 if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2303 int nbatch = dest->loc.nbatch;
2304 int nf = dest->loc.nf;
2305 int leftBorder = dest->loc.halo.lt;
2306 int topBorder = dest->loc.halo.up;
2307 size_t sb = strideBExtended(&dest->loc);
2308 size_t sf = strideFExtended(&dest->loc);
2309 size_t sx = strideXExtended(&dest->loc);
2310 size_t sy = strideYExtended(&dest->loc);
2312 for (
int b = 0; b < nbatch; b++) {
2313 float *srcData = src->data + b * sb;
2314 float *destData = dest->data + b * sb;
2316 float *src0 = srcData + topBorder * sy + leftBorder * sx;
2317 float *dst0 = srcData + (topBorder - 1) * sy + (leftBorder - 1) * sx;
2319 for (
int ky = 0; ky < topBorder; ky++) {
2320 float *to = dst0 - ky * sy;
2321 float *from = src0 + ky * sy;
2322 for (
int kx = 0; kx < leftBorder; kx++) {
2323 for (
int kf = 0; kf < nf; kf++) {
2324 to[kf * sf] = from[kf * sf];
2335 if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2338 int nx = clayer->loc.nx;
2339 int nf = clayer->loc.nf;
2340 int leftBorder = dest->loc.halo.lt;
2341 int topBorder = dest->loc.halo.up;
2342 int nbatch = dest->loc.nbatch;
2343 size_t sb = strideBExtended(&dest->loc);
2344 size_t sf = strideFExtended(&dest->loc);
2345 size_t sx = strideXExtended(&dest->loc);
2346 size_t sy = strideYExtended(&dest->loc);
2348 for (
int b = 0; b < nbatch; b++) {
2349 float *srcData = src->data + b * sb;
2350 float *destData = dest->data + b * sb;
2351 float *src0 = srcData + topBorder * sy + leftBorder * sx;
2352 float *dst0 = destData + (topBorder - 1) * sy + leftBorder * sx;
2354 for (
int ky = 0; ky < topBorder; ky++) {
2355 float *to = dst0 - ky * sy;
2356 float *from = src0 + ky * sy;
2357 for (
int kx = 0; kx < nx; kx++) {
2358 for (
int kf = 0; kf < nf; kf++) {
2359 to[kf * sf] = from[kf * sf];
2370 if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2373 int nx = dest->loc.nx;
2374 int nf = dest->loc.nf;
2375 int leftBorder = dest->loc.halo.lt;
2376 int rightBorder = dest->loc.halo.rt;
2377 int topBorder = dest->loc.halo.up;
2378 int nbatch = dest->loc.nbatch;
2379 size_t sb = strideBExtended(&dest->loc);
2380 size_t sf = strideFExtended(&dest->loc);
2381 size_t sx = strideXExtended(&dest->loc);
2382 size_t sy = strideYExtended(&dest->loc);
2384 for (
int b = 0; b < nbatch; b++) {
2385 float *srcData = src->data + b * sb;
2386 float *destData = dest->data + b * sb;
2387 float *src0 = srcData + topBorder * sy + (nx + leftBorder - 1) * sx;
2388 float *dst0 = destData + (topBorder - 1) * sy + (nx + leftBorder) * sx;
2390 for (
int ky = 0; ky < topBorder; ky++) {
2391 float *to = dst0 - ky * sy;
2392 float *from = src0 + ky * sy;
2393 for (
int kx = 0; kx < rightBorder; kx++) {
2394 for (
int kf = 0; kf < nf; kf++) {
2395 to[kf * sf] = from[kf * sf];
2406 if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2409 int ny = dest->loc.ny;
2410 int nf = dest->loc.nf;
2411 int leftBorder = dest->loc.halo.lt;
2412 int topBorder = dest->loc.halo.up;
2413 int nbatch = dest->loc.nbatch;
2414 size_t sb = strideBExtended(&dest->loc);
2415 size_t sf = strideFExtended(&dest->loc);
2416 size_t sx = strideXExtended(&dest->loc);
2417 size_t sy = strideYExtended(&dest->loc);
2419 for (
int b = 0; b < nbatch; b++) {
2420 float *srcData = src->data + b * sb;
2421 float *destData = dest->data + b * sb;
2422 float *src0 = srcData + topBorder * sy + leftBorder * sx;
2423 float *dst0 = destData + topBorder * sy + (leftBorder - 1) * sx;
2425 for (
int ky = 0; ky < ny; ky++) {
2426 float *to = dst0 + ky * sy;
2427 float *from = src0 + ky * sy;
2428 for (
int kx = 0; kx < leftBorder; kx++) {
2429 for (
int kf = 0; kf < nf; kf++) {
2430 to[kf * sf] = from[kf * sf];
2441 if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2444 int nx = clayer->loc.nx;
2445 int ny = clayer->loc.ny;
2446 int nf = clayer->loc.nf;
2447 int leftBorder = dest->loc.halo.lt;
2448 int rightBorder = dest->loc.halo.rt;
2449 int topBorder = dest->loc.halo.up;
2450 int nbatch = dest->loc.nbatch;
2451 size_t sb = strideBExtended(&dest->loc);
2452 size_t sf = strideFExtended(&dest->loc);
2453 size_t sx = strideXExtended(&dest->loc);
2454 size_t sy = strideYExtended(&dest->loc);
2456 for (
int b = 0; b < nbatch; b++) {
2457 float *srcData = src->data + b * sb;
2458 float *destData = dest->data + b * sb;
2459 float *src0 = srcData + topBorder * sy + (nx + leftBorder - 1) * sx;
2460 float *dst0 = destData + topBorder * sy + (nx + leftBorder) * sx;
2462 for (
int ky = 0; ky < ny; ky++) {
2463 float *to = dst0 + ky * sy;
2464 float *from = src0 + ky * sy;
2465 for (
int kx = 0; kx < rightBorder; kx++) {
2466 for (
int kf = 0; kf < nf; kf++) {
2467 to[kf * sf] = from[kf * sf];
2478 if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2481 int ny = dest->loc.ny;
2482 int nf = dest->loc.nf;
2483 int leftBorder = dest->loc.halo.lt;
2484 int topBorder = dest->loc.halo.up;
2485 int bottomBorder = dest->loc.halo.dn;
2486 int nbatch = dest->loc.nbatch;
2487 size_t sb = strideBExtended(&dest->loc);
2488 size_t sf = strideFExtended(&dest->loc);
2489 size_t sx = strideXExtended(&dest->loc);
2490 size_t sy = strideYExtended(&dest->loc);
2492 for (
int b = 0; b < nbatch; b++) {
2493 float *srcData = src->data + b * sb;
2494 float *destData = dest->data + b * sb;
2495 float *src0 = srcData + (ny + topBorder - 1) * sy + leftBorder * sx;
2496 float *dst0 = destData + (ny + topBorder) * sy + (leftBorder - 1) * sx;
2498 for (
int ky = 0; ky < bottomBorder; ky++) {
2499 float *to = dst0 + ky * sy;
2500 float *from = src0 - ky * sy;
2501 for (
int kx = 0; kx < leftBorder; kx++) {
2502 for (
int kf = 0; kf < nf; kf++) {
2503 to[kf * sf] = from[kf * sf];
2514 if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2517 int nx = dest->loc.nx;
2518 int ny = dest->loc.ny;
2519 int nf = dest->loc.nf;
2520 int leftBorder = dest->loc.halo.lt;
2521 int rightBorder = dest->loc.halo.rt;
2522 int topBorder = dest->loc.halo.up;
2523 int bottomBorder = dest->loc.halo.dn;
2524 int nbatch = dest->loc.nbatch;
2525 size_t sb = strideBExtended(&dest->loc);
2526 size_t sf = strideFExtended(&dest->loc);
2527 size_t sx = strideXExtended(&dest->loc);
2528 size_t sy = strideYExtended(&dest->loc);
2530 for (
int b = 0; b < nbatch; b++) {
2531 float *srcData = src->data + b * sb;
2532 float *destData = dest->data + b * sb;
2533 float *src0 = srcData + (ny + topBorder - 1) * sy + leftBorder * sx;
2534 float *dst0 = destData + (ny + topBorder) * sy + leftBorder * sx;
2536 for (
int ky = 0; ky < bottomBorder; ky++) {
2537 float *to = dst0 + ky * sy;
2538 float *from = src0 - ky * sy;
2539 for (
int kx = 0; kx < nx; kx++) {
2540 for (
int kf = 0; kf < nf; kf++) {
2541 to[kf * sf] = from[kf * sf];
2552 if (!localDimensionsEqual(&dest->loc, &src->loc)) {
2555 int nx = dest->loc.nx;
2556 int ny = dest->loc.ny;
2557 int nf = dest->loc.nf;
2558 int leftBorder = dest->loc.halo.lt;
2559 int rightBorder = dest->loc.halo.rt;
2560 int topBorder = dest->loc.halo.up;
2561 int bottomBorder = dest->loc.halo.dn;
2562 int nbatch = dest->loc.nbatch;
2563 size_t sb = strideBExtended(&dest->loc);
2564 size_t sf = strideFExtended(&dest->loc);
2565 size_t sx = strideXExtended(&dest->loc);
2566 size_t sy = strideYExtended(&dest->loc);
2568 for (
int b = 0; b < nbatch; b++) {
2569 float *srcData = src->data + b * sb;
2570 float *destData = dest->data + b * sb;
2571 float *src0 = srcData + (ny + topBorder - 1) * sy + (nx + leftBorder - 1) * sx;
2572 float *dst0 = destData + (ny + topBorder) * sy + (nx + leftBorder) * sx;
2574 for (
int ky = 0; ky < bottomBorder; ky++) {
2575 float *to = dst0 + ky * sy;
2576 float *from = src0 - ky * sy;
2577 for (
int kx = 0; kx < rightBorder; kx++) {
2578 for (
int kf = 0; kf < nf; kf++) {
2579 to[kf * sf] = from[kf * sf];
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
virtual void ioParam_triggerOffset(enum ParamsIOFlag ioFlag)
triggerOffset: If triggerLayer is set, triggers <triggerOffset> timesteps before target trigger ...
virtual void resetStateOnTrigger()
virtual void ioParam_triggerBehavior(enum ParamsIOFlag ioFlag)
triggerBehavior: If triggerLayerName is set, this parameter specifies how the trigger is handled...
virtual int allocateDeviceBuffers()
int getNumColumns() const
int present(const char *groupName, const char *paramName)
virtual void ioParam_writeStep(enum ParamsIOFlag ioFlag)
writeStep: Specifies how often to output a pvp file for this layer
virtual void ioParam_phase(enum ParamsIOFlag ioFlag)
phase: Defines the ordering in which each layer is updated
void allocateRestrictedBuffer(float **buf, const char *bufname)
PVLayerCube createCube(int delay=0)
virtual void ioParam_initializeFromCheckpointFlag(enum ParamsIOFlag ioFlag)
initializeFromCheckpointFlag: If set to true, initialize using checkpoint direcgtory set in HyPerCol...
bool isExchangeFinished(int delay=0)
virtual void ioParam_sparseLayer(enum ParamsIOFlag ioFlag)
sparseLayer: Specifies if the layer should be considered sparese for optimization and output ...
virtual void ioParam_nf(enum ParamsIOFlag ioFlag)
nf: Defines how many features this layer has
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
virtual void ioParam_triggerLayerName(enum ParamsIOFlag ioFlag)
triggerLayerName: Specifies the name of the layer that this layer triggers off of. If set to NULL or the empty string, the layer does not trigger but updates its state on every timestep.
virtual double getDeltaUpdateTime()
static bool completed(Status &a)
int initialize(const char *name, HyPerCol *hc)
virtual void ioParam_valueBC(enum ParamsIOFlag ioFlag)
valueBC: If mirrorBC is set to true, Uses the specified value for the margin area ...
void allocateExtendedBuffer(float **buf, const char *bufname)
int getBatchDimension() const
virtual bool needUpdate(double simTime, double dt)
virtual void ioParam_mirrorBCflag(enum ParamsIOFlag ioFlag)
mirrorBCflag: If set to true, the margin will mirror the data
virtual void ioParam_triggerResetLayerName(enum ParamsIOFlag ioFlag)
triggerResetLayerName: If triggerLayerName is set, this parameter specifies the layer to use for upda...
void addRecvConn(BaseConnection *conn)
virtual void ioParam_InitVType(enum ParamsIOFlag ioFlag)
initVType: Specifies how to initialize the V buffer.
virtual Response::Status outputStateWrapper(double timef, double dt)
int freeExtendedBuffer(float **buf)
int freeRestrictedBuffer(float **buf)
virtual void ioParam_triggerFlag(enum ParamsIOFlag ioFlag)
triggerFlag: (Deprecated) Specifies if this layer is being triggered
virtual double getDeltaTriggerTime()
int publish(double lastUpdateTime)
void copyForward(double lastUpdateTime)
const float * getLayerData(int delay=0)
virtual void ioParam_writeSparseValues(enum ParamsIOFlag ioFlag)
writeSparseValues: No longer used.
virtual void ioParam_updateGpu(enum ParamsIOFlag ioFlag)
updateGpu: When compiled using CUDA or OpenCL GPU acceleration, this flag tells whether this layer's ...
virtual void ioParam_nxScale(enum ParamsIOFlag ioFlag)
nxScale: Defines the relationship between the x column size and the layer size.
virtual bool needReset(double timed, double dt)
virtual void ioParam_nyScale(enum ParamsIOFlag ioFlag)
nyScale: Defines the relationship between the y column size and the layer size.
virtual void ioParam_initialWriteTime(enum ParamsIOFlag ioFlag)
initialWriteTime: Specifies the first timestep to start outputing pvp files