15 #include "HyPerLayer.hpp"    16 #include "checkpointing/CheckpointEntryPvpBuffer.hpp"    17 #include "checkpointing/CheckpointEntryRandState.hpp"    18 #include "columns/HyPerCol.hpp"    19 #include "connections/BaseConnection.hpp"    20 #include "include/default_params.h"    21 #include "include/pv_common.h"    22 #include "io/FileStream.hpp"    34 HyPerLayer::HyPerLayer() { initialize_base(); }
    36 HyPerLayer::HyPerLayer(
const char *name, HyPerCol *hc) {
    46 int HyPerLayer::initialize_base() {
    65    triggerLayerName             = NULL;
    66    triggerBehavior              = NULL;
    67    triggerBehaviorType          = NO_TRIGGER;
    68    triggerResetLayerName        = NULL;
    70    initializeFromCheckpointFlag = 
false;
    72    mLastUpdateTime  = 0.0;
    73    mLastTriggerTime = 0.0;
    77    numSynchronizedMarginWidthLayers = 0;
    78    synchronizedMarginWidthLayers    = NULL;
    81    dataTypeString = NULL;
    85    allocDeviceGSyn          = 
false;
    86    allocDeviceActivity      = 
false;
    87    allocDeviceDatastore     = 
false;
    88    allocDeviceActiveIndices = 
false;
    93    d_ActiveIndices          = NULL;
    95    updatedDeviceActivity    = 
true; 
    96    updatedDeviceDatastore   = 
true;
    97    updatedDeviceGSyn        = 
true;
   103    cudnn_Datastore = NULL;
   104 #endif // PV_USE_CUDNN   105 #endif // PV_USE_CUDA   108    recvsyn_timer   = NULL;
   109    publish_timer   = NULL;
   110    timescale_timer = NULL;
   114    gpu_recvsyn_timer = NULL;
   115    gpu_update_timer  = NULL;
   130    int status = BaseLayer::initialize(name, hc);
   131    if (status != PV_SUCCESS) {
   136    writeTime                = initialWriteTime;
   137    writeActivityCalls       = 0;
   138    writeActivitySparseCalls = 0;
   145    mLastUpdateTime  = parent->getDeltaTime();
   146    mLastTriggerTime = parent->getDeltaTime();
   150 int HyPerLayer::initClayer() {
   152    int status = PV_SUCCESS;
   153    if (clayer == NULL) {
   155             "HyPerLayer \"%s\" error in rank %d process: unable to allocate memory for Clayer.\n",
   161    setLayerLoc(loc, nxScale, nyScale, numFeatures, parent->getNBatch());
   162    assert(loc->halo.lt == 0 && loc->halo.rt == 0 && loc->halo.dn == 0 && loc->halo.up == 0);
   164    int nBatch = parent->getNBatch();
   166    clayer->numNeurons  = loc->nx * loc->ny * loc->nf;
   167    clayer->numExtended = clayer->numNeurons; 
   169    clayer->numNeuronsAllBatches  = nBatch * loc->nx * loc->ny * loc->nf;
   170    clayer->numExtendedAllBatches = clayer->numNeuronsAllBatches;
   172    double xScaled = -log2((
double)nxScale);
   173    double yScaled = -log2((
double)nyScale);
   175    int xScale = (int)nearbyint(xScaled);
   176    int yScale = (int)nearbyint(yScaled);
   178    clayer->xScale = xScale;
   179    clayer->yScale = yScale;
   185 HyPerLayer::~HyPerLayer() {
   186    delete recvsyn_timer;
   188    delete publish_timer;
   189    delete timescale_timer;
   192    delete gpu_recvsyn_timer;
   193    delete gpu_update_timer;
   196    delete mOutputStateStream;
   217    if (cudnn_Datastore) {
   218       delete cudnn_Datastore;
   220 #endif // PV_USE_CUDNN   221 #endif // PV_USE_CUDA   227    free(synchronizedMarginWidthLayers);
   229    free(triggerLayerName);
   230    free(triggerBehavior);
   231    free(triggerResetLayerName);
   232    free(initVTypeString);
   235       for (
int i = 0; i < parent->getNumThreads(); i++) {
   236          free(thread_gSyn[i]);
   243 template <
typename T>
   244 int HyPerLayer::freeBuffer(T **buf) {
   251 template int HyPerLayer::freeBuffer<float>(
float **buf);
   252 template int HyPerLayer::freeBuffer<int>(
int **buf);
   258 int HyPerLayer::freeClayer() {
   259    pvcube_delete(clayer->activity);
   261    freeBuffer(&clayer->prevActivity);
   262    freeBuffer(&clayer->V);
   269 void HyPerLayer::freeChannels() {
   272    if (d_GSyn != NULL) {
   277    if (cudnn_GSyn != NULL) {
   280 #endif // PV_USE_CUDNN   281 #endif // PV_USE_CUDA   285       assert(numChannels > 0);
   293 int HyPerLayer::allocateClayerBuffers() {
   301    allocatePrevActivity();
   302    for (
int k = 0; k < getNumExtendedAllBatches(); k++) {
   303       clayer->prevActivity[k] = -10 * REFRACTORY_PERIOD; 
   308 template <
typename T>
   309 void HyPerLayer::allocateBuffer(T **buf, 
int bufsize, 
const char *bufname) {
   310    *buf = (T *)calloc(bufsize, 
sizeof(T));
   313             "%s: rank %d process unable to allocate memory for %s: %s.\n",
   322 template void HyPerLayer::allocateBuffer<float>(
float **buf, 
int bufsize, 
const char *bufname);
   323 template void HyPerLayer::allocateBuffer<int>(
int **buf, 
int bufsize, 
const char *bufname);
   326    allocateBuffer(buf, getNumNeuronsAllBatches(), bufname);
   330    allocateBuffer(buf, getNumExtendedAllBatches(), bufname);
   333 void HyPerLayer::allocateV() {
   337 void HyPerLayer::allocateActivity() {
   338    clayer->activity = pvcube_new(&clayer->loc, getNumExtendedAllBatches());
   340          clayer->activity == 
nullptr, 
"%s failed to allocate activity cube.\n", getDescription_c());
   343 void HyPerLayer::allocatePrevActivity() {
   347 int HyPerLayer::setLayerLoc(
   353    int status = PV_SUCCESS;
   357    float nxglobalfloat = nxScale * parent->getNxGlobal();
   358    layerLoc->nxGlobal  = (int)nearbyintf(nxglobalfloat);
   359    if (std::fabs(nxglobalfloat - layerLoc->nxGlobal) > 0.0001f) {
   360       if (parent->columnId() == 0) {
   361          ErrorLog(errorMessage);
   363                "nxScale of layer \"%s\" is incompatible with size of column.\n", getName());
   365                "Column nx %d multiplied by nxScale %f must be an integer.\n",
   366                (
double)parent->getNxGlobal(),
   372    float nyglobalfloat = nyScale * parent->getNyGlobal();
   373    layerLoc->nyGlobal  = (int)nearbyintf(nyglobalfloat);
   374    if (std::fabs(nyglobalfloat - layerLoc->nyGlobal) > 0.0001f) {
   375       if (parent->columnId() == 0) {
   376          ErrorLog(errorMessage);
   378                "nyScale of layer \"%s\" is incompatible with size of column.\n", getName());
   380                "Column ny %d multiplied by nyScale %f must be an integer.\n",
   381                (
double)parent->getNyGlobal(),
   391    if (layerLoc->nxGlobal % icComm->numCommColumns() != 0) {
   392       if (parent->columnId() == 0) {
   393          ErrorLog(errorMessage);
   395                "Size of HyPerLayer \"%s\" is not  compatible with the mpi configuration.\n", name);
   397                "The layer has %d pixels horizontally, and there are %d mpi processes in a row, but "   398                "%d does not divide %d.\n",
   400                icComm->numCommColumns(),
   401                icComm->numCommColumns(),
   406    if (layerLoc->nyGlobal % icComm->numCommRows() != 0) {
   407       if (parent->columnId() == 0) {
   408          ErrorLog(errorMessage);
   410                "Size of HyPerLayer \"%s\" is not  compatible with the mpi configuration.\n", name);
   412                "The layer has %d pixels vertically, and there are %d mpi processes in a column, "   413                "but %d does not divide %d.\n",
   415                icComm->numCommRows(),
   416                icComm->numCommRows(),
   421    MPI_Barrier(icComm->communicator()); 
   423    if (status != PV_SUCCESS) {
   424       if (parent->columnId() == 0) {
   425          ErrorLog().printf(
"setLayerLoc failed for %s.\n", getDescription_c());
   429    layerLoc->nx = layerLoc->nxGlobal / icComm->numCommColumns();
   430    layerLoc->ny = layerLoc->nyGlobal / icComm->numCommRows();
   431    assert(layerLoc->nxGlobal == layerLoc->nx * icComm->numCommColumns());
   432    assert(layerLoc->nyGlobal == layerLoc->ny * icComm->numCommRows());
   434    layerLoc->kx0 = layerLoc->nx * icComm->commColumn();
   435    layerLoc->ky0 = layerLoc->ny * icComm->commRow();
   439    layerLoc->nbatch = numBatches;
   441    layerLoc->kb0          = parent->commBatch() * numBatches;
   442    layerLoc->nbatchGlobal = parent->numCommBatches() * numBatches;
   445    layerLoc->halo.lt = 0;
   446    layerLoc->halo.rt = 0;
   447    layerLoc->halo.dn = 0;
   448    layerLoc->halo.up = 0;
   453 void HyPerLayer::calcNumExtended() {
   455    clayer->numExtended   = (loc->nx + loc->halo.lt + loc->halo.rt)
   456                          * (loc->ny + loc->halo.dn + loc->halo.up) * loc->nf;
   457    clayer->numExtendedAllBatches = clayer->numExtended * loc->nbatch;
   460 void HyPerLayer::allocateBuffers() {
   470 void HyPerLayer::allocateGSyn() {
   472    if (numChannels > 0) {
   473       GSyn = (
float **)malloc(numChannels * 
sizeof(
float *));
   474       FatalIf(GSyn == 
nullptr, 
"%s unable to allocate GSyn pointers.\n", getDescription_c());
   476       GSyn[0] = (
float *)calloc(getNumNeuronsAllBatches() * numChannels, 
sizeof(float));
   479       FatalIf(GSyn[0] == 
nullptr, 
"%s unable to allocate GSyn buffer.\n", getDescription_c());
   481       for (
int m = 1; m < numChannels; m++) {
   482          GSyn[m] = GSyn[0] + m * getNumNeuronsAllBatches();
   487 void HyPerLayer::addPublisher() {
   488    MPIBlock const *mpiBlock = parent->getCommunicator()->getLocalMPIBlock();
   489    publisher = 
new Publisher(*mpiBlock, clayer->activity, getNumDelayLevels(), getSparseFlag());
   492 void HyPerLayer::checkpointPvpActivityFloat(
   494       char const *bufferName,
   497    bool registerSucceeded = checkpointer->registerCheckpointEntry(
   501                checkpointer->getMPIBlock(),
   508          "%s failed to register %s for checkpointing.\n",
   513 void HyPerLayer::checkpointRandState(
   515       char const *bufferName,
   518    bool registerSucceeded = checkpointer->registerCheckpointEntry(
   519          std::make_shared<CheckpointEntryRandState>(
   522                checkpointer->getMPIBlock(),
   523                randState->getRNG(0),
   529          "%s failed to register %s for checkpointing.\n",
   534 Response::Status HyPerLayer::initializeState() {
   536    initializeActivity();
   537    return Response::SUCCESS;
   541 Response::Status HyPerLayer::copyInitialStateToGPU() {
   545          PVCuda::CudaBuffer *d_V = getDeviceV();
   547          d_V->copyToDevice(h_V);
   550       PVCuda::CudaBuffer *d_activity = getDeviceActivity();
   552       float *h_activity = getCLayer()->activity->data;
   553       d_activity->copyToDevice(h_activity);
   555    return Response::SUCCESS;
   558 #endif // PV_USE_CUDA   560 void HyPerLayer::initializeV() {
   561    if (getV() != 
nullptr && mInitVObject != 
nullptr) {
   562       mInitVObject->calcV(getV(), getLayerLoc());
   566 void HyPerLayer::initializeActivity() {
   567    int status = setActivity();
   568    FatalIf(status != PV_SUCCESS, 
"%s failed to initialize activity.\n", getDescription_c());
   597    ioParam_dataType(ioFlag);
   601 void HyPerLayer::ioParam_dataType(
enum ParamsIOFlag ioFlag) {
   602    this->parent->parameters()->ioParamString(
   603          ioFlag, this->getName(), 
"dataType", &dataTypeString, NULL, 
false );
   604    if (dataTypeString == NULL) {
   609    if (!strcmp(dataTypeString, 
"float")) {
   612    else if (!strcmp(dataTypeString, 
"int")) {
   616       Fatal() << 
"BaseLayer \"" << name
   617               << 
"\": dataType not recognized, can be \"float\" or \"int\"\n";
   623    parent->parameters()->ioParamValue(
   624          ioFlag, name, 
"updateGpu", &mUpdateGpu, mUpdateGpu, 
true );
   625    mUsingGPUFlag = mUpdateGpu;
   627    bool mUpdateGpu = 
false;
   628    parent->parameters()->ioParamValue(
   629          ioFlag, name, 
"updateGpu", &mUpdateGpu, mUpdateGpu, 
false );
   630    if (parent->columnId() == 0) {
   633             "%s: updateGpu is set to true, but PetaVision was compiled without GPU acceleration.\n",
   636 #endif // PV_USE_CUDA   640    parent->parameters()->ioParamValue(ioFlag, name, 
"nxScale", &nxScale, nxScale);
   644    parent->parameters()->ioParamValue(ioFlag, name, 
"nyScale", &nyScale, nyScale);
   648    parent->parameters()->ioParamValue(ioFlag, name, 
"nf", &numFeatures, numFeatures);
   652    parent->parameters()->ioParamValue(ioFlag, name, 
"phase", &phase, phase);
   653    if (ioFlag == PARAMS_IO_READ && phase < 0) {
   654       if (parent->columnId() == 0)
   656                "%s: phase must be >= 0 (given value was %d).\n", getDescription_c(), phase);
   661    parent->parameters()->ioParamValue(ioFlag, name, 
"mirrorBCflag", &mirrorBCflag, mirrorBCflag);
   665    assert(!parent->parameters()->presentAndNotBeenRead(name, 
"mirrorBCflag"));
   667       parent->parameters()->ioParamValue(ioFlag, name, 
"valueBC", &valueBC, (
float)0);
   672    parent->parameters()->ioParamValue(
   675          "initializeFromCheckpointFlag",
   676          &initializeFromCheckpointFlag,
   677          initializeFromCheckpointFlag,
   682    parent->parameters()->ioParamString(
   687          BaseInitV::mDefaultInitV.data(),
   689    if (ioFlag == PARAMS_IO_READ) {
   690       BaseObject *
object = Factory::instance()->createByKeyword(initVTypeString, name, parent);
   691       mInitVObject       = 
dynamic_cast<BaseInitV *
>(object);
   692       if (mInitVObject == 
nullptr) {
   693          ErrorLog().printf(
"%s: unable to create InitV object\n", getDescription_c());
   697    if (mInitVObject != 
nullptr) {
   703    parent->parameters()->ioParamString(
   704          ioFlag, name, 
"triggerLayerName", &triggerLayerName, NULL, 
false );
   705    if (ioFlag == PARAMS_IO_READ) {
   706       if (triggerLayerName && !strcmp(name, triggerLayerName)) {
   707          if (parent->columnId() == 0) {
   709                   "%s: triggerLayerName cannot be the same as the name of the layer itself.\n",
   712          MPI_Barrier(parent->getCommunicator()->communicator());
   715       triggerFlag = (triggerLayerName != NULL && triggerLayerName[0] != 
'\0');
   725    pvAssert(!parent->parameters()->presentAndNotBeenRead(name, 
"triggerLayerName"));
   726    if (ioFlag == PARAMS_IO_READ && parent->parameters()->
present(name, 
"triggerFlag")) {
   727       bool flagFromParams = 
false;
   728       parent->parameters()->ioParamValue(
   729             ioFlag, name, 
"triggerFlag", &flagFromParams, flagFromParams);
   730       if (parent->columnId() == 0) {
   731          WarnLog(triggerFlagMessage);
   732          triggerFlagMessage.printf(
"%s: triggerFlag has been deprecated.\n", getDescription_c());
   733          triggerFlagMessage.printf(
   734                "   If triggerLayerName is a nonempty string, triggering will be on;\n");
   735          triggerFlagMessage.printf(
   736                "   if triggerLayerName is empty or null, triggering will be off.\n");
   737          if (parent->columnId() == 0) {
   738             if (flagFromParams != triggerFlag) {
   739                ErrorLog(errorMessage);
   740                errorMessage.printf(
"triggerLayerName=", name);
   741                if (triggerLayerName) {
   742                   errorMessage.printf(
"\"%s\"", triggerLayerName);
   745                   errorMessage.printf(
"NULL");
   748                      " implies triggerFlag=%s but triggerFlag was set in params to %s\n",
   749                      triggerFlag ? 
"true" : 
"false",
   750                      flagFromParams ? 
"true" : 
"false");
   754       if (flagFromParams != triggerFlag) {
   755          MPI_Barrier(parent->getCommunicator()->communicator());
   762    assert(!parent->parameters()->presentAndNotBeenRead(name, 
"triggerLayerName"));
   764       parent->parameters()->ioParamValue(
   765             ioFlag, name, 
"triggerOffset", &triggerOffset, triggerOffset);
   766       if (triggerOffset < 0) {
   767          if (parent->columnId() == 0) {
   769                   "%s: TriggerOffset (%f) must be positive\n", getDescription_c(), triggerOffset);
   775    assert(!parent->parameters()->presentAndNotBeenRead(name, 
"triggerLayerName"));
   777       parent->parameters()->ioParamString(
   782             "updateOnlyOnTrigger",
   784       if (triggerBehavior == NULL || !strcmp(triggerBehavior, 
"")) {
   785          free(triggerBehavior);
   786          triggerBehavior     = strdup(
"updateOnlyOnTrigger");
   787          triggerBehaviorType = UPDATEONLY_TRIGGER;
   789       else if (!strcmp(triggerBehavior, 
"updateOnlyOnTrigger")) {
   790          triggerBehaviorType = UPDATEONLY_TRIGGER;
   792       else if (!strcmp(triggerBehavior, 
"resetStateOnTrigger")) {
   793          triggerBehaviorType = RESETSTATE_TRIGGER;
   795       else if (!strcmp(triggerBehavior, 
"ignore")) {
   796          triggerBehaviorType = NO_TRIGGER;
   799          if (parent->columnId() == 0) {
   801                   "%s: triggerBehavior=\"%s\" is unrecognized.\n",
   805          MPI_Barrier(parent->getCommunicator()->communicator());
   810       triggerBehaviorType = NO_TRIGGER;
   815    assert(!parent->parameters()->presentAndNotBeenRead(name, 
"triggerLayerName"));
   817       assert(!parent->parameters()->presentAndNotBeenRead(name, 
"triggerBehavior"));
   818       if (!strcmp(triggerBehavior, 
"resetStateOnTrigger")) {
   819          parent->parameters()->ioParamStringRequired(
   820                ioFlag, name, 
"triggerResetLayerName", &triggerResetLayerName);
   826    parent->parameters()->ioParamValue(
   827          ioFlag, name, 
"writeStep", &writeStep, parent->getDeltaTime());
   831    assert(!parent->parameters()->presentAndNotBeenRead(name, 
"writeStep"));
   832    if (writeStep >= 0.0) {
   833       parent->parameters()->ioParamValue(ioFlag, name, 
"initialWriteTime", &initialWriteTime, 0.0);
   834       if (ioFlag == PARAMS_IO_READ && writeStep > 0.0 && initialWriteTime < 0.0) {
   835          double storeInitialWriteTime = initialWriteTime;
   836          while (initialWriteTime < 0.0) {
   837             initialWriteTime += writeStep;
   839          if (parent->columnId() == 0) {
   840             WarnLog(warningMessage);
   841             warningMessage.printf(
   842                   "%s: initialWriteTime %f is negative.  Adjusting "   843                   "initialWriteTime:\n",
   846             warningMessage.printf(
"    initialWriteTime adjusted to %f\n", initialWriteTime);
   853    if (ioFlag == PARAMS_IO_READ && !parent->parameters()->
present(name, 
"sparseLayer")
   854        && parent->parameters()->
present(name, 
"writeSparseActivity")) {
   855       Fatal().printf(
"writeSparseActivity is obsolete. Use sparseLayer instead.\n");
   858    parent->parameters()->ioParamValue(ioFlag, name, 
"sparseLayer", &sparseLayer, 
false);
   863    if (ioFlag == PARAMS_IO_READ) {
   864       assert(!parent->parameters()->presentAndNotBeenRead(name, 
"sparseLayer"));
   865       if (sparseLayer && parent->parameters()->
present(name, 
"writeSparseValues")) {
   866          WarnLog() << 
"writeSparseValues parameter, defined in " << getDescription()
   867                    << 
", is obsolete.\n";
   868          bool writeSparseValues;
   869          parent->parameters()->ioParamValue(
   870                ioFlag, name, 
"writeSparseValues", &writeSparseValues, 
true );
   871          if (!writeSparseValues) {
   872             WarnLog() << 
"The sparse-values format is used for all sparse layers.\n";
   878 Response::Status HyPerLayer::respond(std::shared_ptr<BaseMessage const> message) {
   879    Response::Status status = BaseLayer::respond(message);
   880    if (status != Response::SUCCESS) {
   883    else if (
auto castMessage = std::dynamic_pointer_cast<LayerSetMaxPhaseMessage const>(message)) {
   884       return respondLayerSetMaxPhase(castMessage);
   886    else if (
auto castMessage = std::dynamic_pointer_cast<LayerWriteParamsMessage const>(message)) {
   887       return respondLayerWriteParams(castMessage);
   891                std::dynamic_pointer_cast<LayerProbeWriteParamsMessage const>(message)) {
   892       return respondLayerProbeWriteParams(castMessage);
   896                std::dynamic_pointer_cast<LayerClearProgressFlagsMessage const>(message)) {
   897       return respondLayerClearProgressFlags(castMessage);
   899    else if (
auto castMessage = std::dynamic_pointer_cast<LayerUpdateStateMessage const>(message)) {
   900       return respondLayerUpdateState(castMessage);
   904                std::dynamic_pointer_cast<LayerRecvSynapticInputMessage const>(message)) {
   905       return respondLayerRecvSynapticInput(castMessage);
   908    else if (
auto castMessage = std::dynamic_pointer_cast<LayerCopyFromGpuMessage const>(message)) {
   909       return respondLayerCopyFromGpu(castMessage);
   911 #endif // PV_USE_CUDA   914                std::dynamic_pointer_cast<LayerAdvanceDataStoreMessage const>(message)) {
   915       return respondLayerAdvanceDataStore(castMessage);
   917    else if (
auto castMessage = std::dynamic_pointer_cast<LayerPublishMessage const>(message)) {
   918       return respondLayerPublish(castMessage);
   920    else if (
auto castMessage = std::dynamic_pointer_cast<LayerOutputStateMessage const>(message)) {
   921       return respondLayerOutputState(castMessage);
   924          auto castMessage = std::dynamic_pointer_cast<LayerCheckNotANumberMessage const>(message)) {
   925       return respondLayerCheckNotANumber(castMessage);
   933 HyPerLayer::respondLayerSetMaxPhase(std::shared_ptr<LayerSetMaxPhaseMessage const> message) {
   934    return setMaxPhase(message->mMaxPhase);
   938 HyPerLayer::respondLayerWriteParams(std::shared_ptr<LayerWriteParamsMessage const> message) {
   940    return Response::SUCCESS;
   943 Response::Status HyPerLayer::respondLayerProbeWriteParams(
   944       std::shared_ptr<LayerProbeWriteParamsMessage const> message) {
   945    return outputProbeParams();
   948 Response::Status HyPerLayer::respondLayerClearProgressFlags(
   949       std::shared_ptr<LayerClearProgressFlagsMessage const> message) {
   950    clearProgressFlags();
   951    return Response::SUCCESS;
   954 Response::Status HyPerLayer::respondLayerRecvSynapticInput(
   955       std::shared_ptr<LayerRecvSynapticInputMessage const> message) {
   956    Response::Status status = Response::SUCCESS;
   957    if (message->mPhase != getPhase()) {
   961    if (message->mRecvOnGpuFlag != mRecvGpu) {
   964 #endif // PV_USE_CUDA   969       *(message->mSomeLayerIsPending) = 
true;
   972    resetGSynBuffers(message->mTime, message->mDeltaT); 
   974    message->mTimer->start();
   975    recvAllSynapticInput();
   977    *(message->mSomeLayerHasActed) = 
true;
   978    message->mTimer->stop();
   984 HyPerLayer::respondLayerUpdateState(std::shared_ptr<LayerUpdateStateMessage const> message) {
   985    Response::Status status = Response::SUCCESS;
   986    if (message->mPhase != getPhase()) {
   990    if (message->mRecvOnGpuFlag != mRecvGpu) {
   993    if (message->mUpdateOnGpuFlag != mUpdateGpu) {
   996 #endif // PV_USE_CUDA  1000    if (*(message->mSomeLayerHasActed) or !mHasReceived) {
  1001       *(message->mSomeLayerIsPending) = 
true;
  1004    status = callUpdateState(message->mTime, message->mDeltaT);
  1007    *(message->mSomeLayerHasActed) = 
true;
  1013 HyPerLayer::respondLayerCopyFromGpu(std::shared_ptr<LayerCopyFromGpuMessage const> message) {
  1014    Response::Status status = Response::SUCCESS;
  1015    if (message->mPhase != getPhase()) {
  1018    message->mTimer->start();
  1019    copyAllActivityFromDevice();
  1020    copyAllVFromDevice();
  1021    copyAllGSynFromDevice();
  1023    message->mTimer->stop();
  1026 #endif // PV_USE_CUDA  1028 Response::Status HyPerLayer::respondLayerAdvanceDataStore(
  1029       std::shared_ptr<LayerAdvanceDataStoreMessage const> message) {
  1030    if (message->mPhase < 0 || message->mPhase == getPhase()) {
  1031       publisher->increaseTimeLevel();
  1033    return Response::SUCCESS;
  1037 HyPerLayer::respondLayerPublish(std::shared_ptr<LayerPublishMessage const> message) {
  1038    if (message->mPhase != getPhase()) {
  1039       return Response::NO_ACTION;
  1041    publish(parent->getCommunicator(), message->mTime);
  1042    return Response::SUCCESS;
  1045 Response::Status HyPerLayer::respondLayerCheckNotANumber(
  1046       std::shared_ptr<LayerCheckNotANumberMessage const> message) {
  1047    Response::Status status = Response::SUCCESS;
  1048    if (message->mPhase != getPhase()) {
  1052    int const N    = getNumExtendedAllBatches();
  1053    for (
int n = 0; n < N; n++) {
  1054       float a = layerData[n];
  1057             "%s has not-a-number values in the activity buffer.  Exiting.\n",
  1058             getDescription_c());
  1064 HyPerLayer::respondLayerOutputState(std::shared_ptr<LayerOutputStateMessage const> message) {
  1065    Response::Status status = Response::SUCCESS;
  1066    if (message->mPhase != getPhase()) {
  1069    status = outputState(message->mTime); 
  1073 void HyPerLayer::clearProgressFlags() {
  1074    mHasReceived = 
false;
  1075    mHasUpdated  = 
false;
  1080 int HyPerLayer::allocateUpdateKernel() {
  1081    Fatal() << 
"Layer \"" << name << 
"\" of type " << mObjectType
  1082            << 
" does not support updating on gpus yet\n";
  1093    const size_t size    = getNumNeuronsAllBatches() * 
sizeof(float);
  1094    const size_t size_ex = getNumExtendedAllBatches() * 
sizeof(float);
  1096    PVCuda::CudaDevice *device = parent->getDevice();
  1100       d_V = device->createBuffer(size, &description);
  1103    if (allocDeviceDatastore) {
  1104       d_Datastore = device->createBuffer(size_ex, &description);
  1105       assert(d_Datastore);
  1107       cudnn_Datastore = device->createBuffer(size_ex, &description);
  1108       assert(cudnn_Datastore);
  1112    if (allocDeviceActiveIndices) {
  1113       d_numActive     = device->createBuffer(parent->getNBatch() * 
sizeof(long), &description);
  1114       d_ActiveIndices = device->createBuffer(
  1116       assert(d_ActiveIndices);
  1119    if (allocDeviceActivity) {
  1120       d_Activity = device->createBuffer(size_ex, &description);
  1124    if (allocDeviceGSyn) {
  1125       d_GSyn = device->createBuffer(size * numChannels, &description);
  1128       cudnn_GSyn = device->createBuffer(size, &description);
  1135 #endif // PV_USE_CUDA  1138 HyPerLayer::communicateInitInfo(std::shared_ptr<CommunicateInitInfoMessage const> message) {
  1154       triggerLayer = message->lookup<
HyPerLayer>(std::string(triggerLayerName));
  1155       if (triggerLayer == NULL) {
  1156          if (parent->columnId() == 0) {
  1158                   "%s: triggerLayerName \"%s\" is not a layer in the HyPerCol.\n",
  1162          MPI_Barrier(parent->getCommunicator()->communicator());
  1165       if (triggerBehaviorType == RESETSTATE_TRIGGER) {
  1166          char const *resetLayerName = NULL; 
  1169          if (triggerResetLayerName == NULL || triggerResetLayerName[0] == 
'\0') {
  1170             resetLayerName    = triggerLayerName;
  1171             triggerResetLayer = triggerLayer;
  1174             resetLayerName    = triggerResetLayerName;
  1175             triggerResetLayer = message->lookup<
HyPerLayer>(std::string(triggerResetLayerName));
  1176             if (triggerResetLayer == NULL) {
  1177                if (parent->columnId() == 0) {
  1179                         "%s: triggerResetLayerName \"%s\" is not a layer in the HyPerCol.\n",
  1181                         triggerResetLayerName);
  1183                MPI_Barrier(parent->getCommunicator()->communicator());
  1189          PVLayerLoc const *triggerLoc = triggerResetLayer->getLayerLoc();
  1190          PVLayerLoc const *localLoc   = this->getLayerLoc();
  1191          if (triggerLoc->nxGlobal != localLoc->nxGlobal
  1192              || triggerLoc->nyGlobal != localLoc->nyGlobal
  1193              || triggerLoc->nf != localLoc->nf) {
  1194             if (parent->columnId() == 0) {
  1195                Fatal(errorMessage);
  1196                errorMessage.printf(
  1197                      "%s: triggerResetLayer \"%s\" has incompatible dimensions.\n",
  1200                errorMessage.printf(
  1201                      "    \"%s\" is %d-by-%d-by-%d and \"%s\" is %d-by-%d-by-%d.\n",
  1207                      triggerLoc->nxGlobal,
  1208                      triggerLoc->nyGlobal,
  1219       this->setAllocDeviceGSyn();
  1220       this->setAllocDeviceV();
  1221       this->setAllocDeviceActivity();
  1225    return Response::SUCCESS;
  1228 Response::Status HyPerLayer::setMaxPhase(
int *maxPhase) {
  1229    if (*maxPhase < phase) {
  1232    return Response::SUCCESS;
  1237          conn->getPost() != 
this,
  1238          "%s called addRecvConn for %s, but \"%s\" is not the post-synaptic layer for \"%s\"\n.",
  1239          conn->getDescription_c(),
  1245    if (!conn->getReceiveGpu()) {
  1246       recvConns.insert(recvConns.begin(), conn);
  1252       recvConns.push_back(conn);
  1260 int HyPerLayer::openOutputStateFile(
Checkpointer *checkpointer) {
  1261    pvAssert(writeStep >= 0);
  1263    if (checkpointer->getMPIBlock()->
getRank() == 0) {
  1264       std::string outputStatePath(getName());
  1265       outputStatePath.append(
".pvp");
  1267       std::string checkpointLabel(getName());
  1268       checkpointLabel.append(
"_filepos");
  1270       bool createFlag    = checkpointer->getCheckpointReadDirectory().empty();
  1272             outputStatePath.c_str(), createFlag, checkpointer, checkpointLabel);
  1277 void HyPerLayer::synchronizeMarginWidth(
HyPerLayer *layer) {
  1278    if (layer == 
this) {
  1281    assert(layer->getLayerLoc() != NULL && this->getLayerLoc() != NULL);
  1282    HyPerLayer **newSynchronizedMarginWidthLayers =
  1284    assert(newSynchronizedMarginWidthLayers);
  1285    if (numSynchronizedMarginWidthLayers > 0) {
  1286       for (
int k = 0; k < numSynchronizedMarginWidthLayers; k++) {
  1287          newSynchronizedMarginWidthLayers[k] = synchronizedMarginWidthLayers[k];
  1289       free(synchronizedMarginWidthLayers);
  1292       assert(synchronizedMarginWidthLayers == NULL);
  1294    synchronizedMarginWidthLayers = newSynchronizedMarginWidthLayers;
  1295    synchronizedMarginWidthLayers[numSynchronizedMarginWidthLayers] = layer;
  1296    numSynchronizedMarginWidthLayers++;
  1298    equalizeMargins(
this, layer);
  1304    int border1, border2, maxborder, result;
  1305    int status = PV_SUCCESS;
  1307    border1   = layer1->getLayerLoc()->halo.lt;
  1308    border2   = layer2->getLayerLoc()->halo.lt;
  1309    maxborder = border1 > border2 ? border1 : border2;
  1310    layer1->requireMarginWidth(maxborder, &result, 
'x');
  1311    if (result != maxborder) {
  1312       status = PV_FAILURE;
  1314    layer2->requireMarginWidth(maxborder, &result, 
'x');
  1315    if (result != maxborder) {
  1316       status = PV_FAILURE;
  1318    if (status != PV_SUCCESS) {
  1320             "Error in rank %d process: unable to synchronize x-margin widths of layers \"%s\" and "  1322             layer1->parent->columnId(),
  1329          layer1->getLayerLoc()->halo.lt == layer2->getLayerLoc()->halo.lt
  1330          && layer1->getLayerLoc()->halo.rt == layer2->getLayerLoc()->halo.rt
  1331          && layer1->getLayerLoc()->halo.lt == layer1->getLayerLoc()->halo.rt
  1332          && layer1->getLayerLoc()->halo.lt == maxborder);
  1334    border1   = layer1->getLayerLoc()->halo.dn;
  1335    border2   = layer2->getLayerLoc()->halo.dn;
  1336    maxborder = border1 > border2 ? border1 : border2;
  1337    layer1->requireMarginWidth(maxborder, &result, 
'y');
  1338    if (result != maxborder) {
  1339       status = PV_FAILURE;
  1341    layer2->requireMarginWidth(maxborder, &result, 
'y');
  1342    if (result != maxborder) {
  1343       status = PV_FAILURE;
  1345    if (status != PV_SUCCESS) {
  1347             "Error in rank %d process: unable to synchronize y-margin widths of layers \"%s\" and "  1349             layer1->parent->columnId(),
  1356          layer1->getLayerLoc()->halo.dn == layer2->getLayerLoc()->halo.dn
  1357          && layer1->getLayerLoc()->halo.up == layer2->getLayerLoc()->halo.up
  1358          && layer1->getLayerLoc()->halo.dn == layer1->getLayerLoc()->halo.up
  1359          && layer1->getLayerLoc()->halo.dn == maxborder);
  1363 Response::Status HyPerLayer::allocateDataStructures() {
  1367    auto status = Response::SUCCESS;
  1373       if (deltaUpdateTime != -1 && triggerOffset >= deltaUpdateTime) {
  1375                "%s error in rank %d process: TriggerOffset (%f) must be lower than the change in "  1376                "update time (%f) \n",
  1384    allocateClayerBuffers();
  1390    PVHalo const *halo    = &loc->halo;
  1393    if (!useMirrorBCs() && getValueBC() != 0.0f) {
  1395       for (
int batch = 0; batch < loc->nbatch; batch++) {
  1396          for (
int b = 0; b < halo->up; b++) {
  1397             for (
int k = 0; k < (nx + halo->lt + halo->rt) * nf; k++) {
  1398                clayer->activity->data[idx] = getValueBC();
  1402          for (
int y = 0; y < ny; y++) {
  1403             for (
int k = 0; k < halo->lt * nf; k++) {
  1404                clayer->activity->data[idx] = getValueBC();
  1408             for (
int k = 0; k < halo->rt * nf; k++) {
  1409                clayer->activity->data[idx] = getValueBC();
  1413          for (
int b = 0; b < halo->dn; b++) {
  1414             for (
int k = 0; k < (nx + halo->lt + halo->rt) * nf; k++) {
  1415                clayer->activity->data[idx] = getValueBC();
  1420       assert(idx == getNumExtendedAllBatches());
  1427    if (parent->getNumThreads() > 1) {
  1428       thread_gSyn = (
float **)malloc(
sizeof(
float *) * parent->getNumThreads());
  1429       assert(thread_gSyn);
  1432       for (
int i = 0; i < parent->getNumThreads(); i++) {
  1433          float *tempMem = (
float *)malloc(
sizeof(
float) * getNumNeuronsAllBatches());
  1436                   "HyPerLayer \"%s\" error: rank %d unable to allocate %zu memory for thread_gSyn: "  1440                   sizeof(float) * getNumNeuronsAllBatches(),
  1443          thread_gSyn[i] = tempMem;
  1451    if (deviceStatus == 0) {
  1452       status = Response::SUCCESS;
  1456             "%s unable to allocate device memory in rank %d process: %s\n",
  1463       deviceStatus = allocateUpdateKernel();
  1464       if (deviceStatus == 0) {
  1465          status = Response::SUCCESS;
  1480 int HyPerLayer::increaseDelayLevels(
int neededDelay) {
  1481    if (numDelayLevels < neededDelay + 1)
  1482       numDelayLevels = neededDelay + 1;
  1483    if (numDelayLevels > MAX_F_DELAY)
  1484       numDelayLevels = MAX_F_DELAY;
  1485    return numDelayLevels;
  1488 int HyPerLayer::requireMarginWidth(
int marginWidthNeeded, 
int *marginWidthResult, 
char axis) {
  1492    PVHalo *halo    = &loc->halo;
  1495          *marginWidthResult = xmargin;
  1496          if (xmargin < marginWidthNeeded) {
  1498             if (parent->columnId() == 0) {
  1500                      "%s: adjusting x-margin width from %d to %d\n",
  1505             xmargin  = marginWidthNeeded;
  1509             assert(axis == 
'x' && getLayerLoc()->halo.lt == getLayerLoc()->halo.rt);
  1510             *marginWidthResult = xmargin;
  1511             if (synchronizedMarginWidthLayers != NULL) {
  1512                for (
int k = 0; k < numSynchronizedMarginWidthLayers; k++) {
  1513                   HyPerLayer *l = synchronizedMarginWidthLayers[k];
  1514                   if (l->getLayerLoc()->halo.lt < marginWidthNeeded) {
  1515                      synchronizedMarginWidthLayers[k]->requireMarginWidth(
  1516                            marginWidthNeeded, marginWidthResult, axis);
  1518                   assert(l->getLayerLoc()->halo.lt == getLayerLoc()->halo.lt);
  1519                   assert(l->getLayerLoc()->halo.rt == getLayerLoc()->halo.rt);
  1525          *marginWidthResult = ymargin;
  1526          if (ymargin < marginWidthNeeded) {
  1528             if (parent->columnId() == 0) {
  1530                      "%s: adjusting y-margin width from %d to %d\n",
  1535             ymargin  = marginWidthNeeded;
  1539             assert(axis == 
'y' && getLayerLoc()->halo.dn == getLayerLoc()->halo.up);
  1540             *marginWidthResult = ymargin;
  1541             if (synchronizedMarginWidthLayers != NULL) {
  1542                for (
int k = 0; k < numSynchronizedMarginWidthLayers; k++) {
  1543                   HyPerLayer *l = synchronizedMarginWidthLayers[k];
  1544                   if (l->getLayerLoc()->halo.up < marginWidthNeeded) {
  1545                      synchronizedMarginWidthLayers[k]->requireMarginWidth(
  1546                            marginWidthNeeded, marginWidthResult, axis);
  1548                   assert(l->getLayerLoc()->halo.dn == getLayerLoc()->halo.dn);
  1549                   assert(l->getLayerLoc()->halo.up == getLayerLoc()->halo.up);
  1554       default: assert(0); 
break;
  1559 int HyPerLayer::requireChannel(
int channelNeeded, 
int *numChannelsResult) {
  1560    if (channelNeeded >= numChannels) {
  1561       int numOldChannels = numChannels;
  1562       numChannels        = channelNeeded + 1;
  1564    *numChannelsResult = numChannels;
  1579    assert(cube->numItems == border->numItems);
  1580    assert(localDimensionsEqual(&cube->loc, &border->loc));
  1582    mirrorToNorthWest(border, cube);
  1583    mirrorToNorth(border, cube);
  1584    mirrorToNorthEast(border, cube);
  1585    mirrorToWest(border, cube);
  1586    mirrorToEast(border, cube);
  1587    mirrorToSouthWest(border, cube);
  1588    mirrorToSouth(border, cube);
  1589    mirrorToSouthEast(border, cube);
  1593 Response::Status HyPerLayer::registerData(
Checkpointer *checkpointer) {
  1594    auto status = BaseLayer::registerData(checkpointer);
  1598    checkpointPvpActivityFloat(checkpointer, 
"A", getActivity(), 
true );
  1599    if (getV() != 
nullptr) {
  1600       checkpointPvpActivityFloat(checkpointer, 
"V", getV(), 
false );
  1602    publisher->checkpointDataStore(checkpointer, getName(), 
"Delays");
  1603    checkpointer->registerCheckpointData(
  1604          std::string(getName()),
  1605          std::string(
"lastUpdateTime"),
  1610    checkpointer->registerCheckpointData(
  1611          std::string(getName()),
  1612          std::string(
"nextWrite"),
  1618    if (writeStep >= 0.0) {
  1619       openOutputStateFile(checkpointer);
  1621          checkpointer->registerCheckpointData(
  1622                std::string(getName()),
  1623                std::string(
"numframes_sparse"),
  1624                &writeActivitySparseCalls,
  1630          checkpointer->registerCheckpointData(
  1631                std::string(getName()),
  1632                std::string(
"numframes"),
  1633                &writeActivityCalls,
  1642    update_timer = 
new Timer(getName(), 
"layer", 
"update ");
  1643    checkpointer->registerTimer(update_timer);
  1645    recvsyn_timer = 
new Timer(getName(), 
"layer", 
"recvsyn");
  1646    checkpointer->registerTimer(recvsyn_timer);
  1648    auto cudaDevice = parent->getDevice();
  1650       gpu_update_timer = 
new PVCuda::CudaTimer(getName(), 
"layer", 
"gpuupdate");
  1651       gpu_update_timer->setStream(cudaDevice->getStream());
  1652       checkpointer->registerTimer(gpu_update_timer);
  1654       gpu_recvsyn_timer = 
new PVCuda::CudaTimer(getName(), 
"layer", 
"gpurecvsyn");
  1655       gpu_recvsyn_timer->setStream(cudaDevice->getStream());
  1656       checkpointer->registerTimer(gpu_recvsyn_timer);
  1658 #endif // PV_USE_CUDA  1660    publish_timer = 
new Timer(getName(), 
"layer", 
"publish");
  1661    checkpointer->registerTimer(publish_timer);
  1663    timescale_timer = 
new Timer(getName(), 
"layer", 
"timescale");
  1664    checkpointer->registerTimer(timescale_timer);
  1666    io_timer = 
new Timer(getName(), 
"layer", 
"io     ");
  1667    checkpointer->registerTimer(io_timer);
  1670       auto message = std::make_shared<RegisterDataMessage<Checkpointer>>(checkpointer);
  1671       mInitVObject->respond(message);
  1674    return Response::SUCCESS;
  1678    if (triggerLayer != NULL && triggerBehaviorType == UPDATEONLY_TRIGGER) {
  1682       return parent->getDeltaTime();
  1687    if (triggerLayer != NULL) {
  1699    if (mLastUpdateTime == simTime + triggerOffset) {
  1702    double timeToCheck = mLastUpdateTime;
  1703    if (triggerLayer != 
nullptr && triggerBehaviorType == UPDATEONLY_TRIGGER) {
  1704       timeToCheck = triggerLayer->getLastUpdateTime();
  1707       if (timeToCheck == simTime && triggerOffset == 0) {
  1719    if (triggerLayer == 
nullptr) {
  1722    if (triggerBehaviorType != RESETSTATE_TRIGGER) {
  1736 Response::Status HyPerLayer::callUpdateState(
double simTime, 
double dt) {
  1737    auto status = Response::NO_ACTION;
  1741          mLastTriggerTime = simTime;
  1744       update_timer->start();
  1747          gpu_update_timer->start();
  1748          float *gSynHead = GSyn == NULL ? NULL : GSyn[0];
  1750          status = updateStateGpu(simTime, dt);
  1751          gpu_update_timer->stop();
  1755          status = updateState(simTime, dt);
  1759       updatedDeviceActivity  = 
true;
  1760       updatedDeviceDatastore = 
true;
  1762       update_timer->stop();
  1763       mNeedToPublish  = 
true;
  1764       mLastUpdateTime = simTime;
  1770    assert(triggerResetLayer != NULL);
  1773       if (parent->columnId() == 0) {
  1775                "%s: triggerBehavior is \"resetStateOnTrigger\" but layer does not have a membrane "  1777                getDescription_c());
  1779       MPI_Barrier(parent->getCommunicator()->communicator());
  1782    float const *resetV = triggerResetLayer->getV();
  1783    if (resetV != NULL) {
  1784 #ifdef PV_USE_OPENMP_THREADS  1785 #pragma omp parallel for  1786 #endif // PV_USE_OPENMP_THREADS  1787       for (
int k = 0; k < getNumNeuronsAllBatches(); k++) {
  1792       float const *resetA   = triggerResetLayer->getActivity();
  1793       PVLayerLoc const *loc = triggerResetLayer->getLayerLoc();
  1794       PVHalo const *halo    = &loc->halo;
  1795       for (
int b = 0; b < parent->getNBatch(); b++) {
  1796          float const *resetABatch = resetA + (b * triggerResetLayer->getNumExtended());
  1797          float *VBatch            = V + (b * triggerResetLayer->getNumNeurons());
  1798 #ifdef PV_USE_OPENMP_THREADS  1799 #pragma omp parallel for  1800 #endif // PV_USE_OPENMP_THREADS  1801          for (
int k = 0; k < getNumNeurons(); k++) {
  1802             int kex = kIndexExtended(
  1803                   k, loc->nx, loc->ny, loc->nf, halo->lt, halo->rt, halo->dn, halo->up);
  1804             VBatch[k] = resetABatch[kex];
  1814       getDeviceV()->copyToDevice(V);
  1817       getDeviceActivity()->copyToDevice(clayer->activity->data);
  1819       updatedDeviceActivity  = 
true;
  1820       updatedDeviceDatastore = 
true;
  1825 int HyPerLayer::resetGSynBuffers(
double timef, 
double dt) {
  1826    int status = PV_SUCCESS;
  1829    resetGSynBuffers_HyPerLayer(
  1830          parent->getNBatch(), this->getNumNeurons(), getNumChannels(), GSyn[0]);
  1835 int HyPerLayer::runUpdateKernel() {
  1839    if (updatedDeviceGSyn) {
  1840       copyAllGSynToDevice();
  1841       updatedDeviceGSyn = 
false;
  1857 Response::Status HyPerLayer::updateStateGpu(
double timef, 
double dt) {
  1858    Fatal() << 
"Update state for layer " << name << 
" is not implemented\n";
  1859    return Response::NO_ACTION; 
  1863 Response::Status HyPerLayer::updateState(
double timef, 
double dt) {
  1868    float *A              = getCLayer()->activity->data;
  1870    int num_channels      = getNumChannels();
  1871    float *gSynHead       = GSyn == NULL ? NULL : GSyn[0];
  1876    int nbatch      = loc->nbatch;
  1877    int num_neurons = nx * ny * nf;
  1878    if (num_channels == 1) {
  1879       applyGSyn_HyPerLayer1Channel(nbatch, num_neurons, V, gSynHead);
  1882       applyGSyn_HyPerLayer(nbatch, num_neurons, V, gSynHead);
  1884    setActivity_HyPerLayer(
  1897    return Response::SUCCESS;
  1900 int HyPerLayer::setActivity() {
  1902    return setActivity_HyPerLayer(
  1905          clayer->activity->data,
  1917 void HyPerLayer::updateAllActiveIndices() { publisher->updateAllActiveIndices(); }
  1919 void HyPerLayer::updateActiveIndices() { publisher->updateActiveIndices(0); }
  1924    bool isReady = 
true;
  1925    for (
auto &c : recvConns) {
  1926       isReady &= c->isAllInputReady();
  1931 int HyPerLayer::recvAllSynapticInput() {
  1932    int status = PV_SUCCESS;
  1934    if (
needUpdate(parent->simulationTime(), parent->getDeltaTime())) {
  1935       bool switchGpu = 
false;
  1937       recvsyn_timer->start();
  1939       for (
auto &conn : recvConns) {
  1940          pvAssert(conn != NULL);
  1943          if (!switchGpu && conn->getReceiveGpu()) {
  1945             copyAllGSynToDevice();
  1947             gpu_recvsyn_timer->start();
  1956          gpu_recvsyn_timer->stop();
  1959       recvsyn_timer->stop();
  1965 double HyPerLayer::addGpuTimers() {
  1967    bool updateNeeded = 
needUpdate(parent->simulationTime(), parent->getDeltaTime());
  1968    if (mRecvGpu && updateNeeded) {
  1969       simTime += gpu_recvsyn_timer->accumulateTime();
  1971    if (mUpdateGpu && updateNeeded) {
  1972       simTime += gpu_update_timer->accumulateTime();
  1977 void HyPerLayer::syncGpu() {
  1978    if (mRecvGpu || mUpdateGpu) {
  1979       parent->getDevice()->syncDevice();
  1983 void HyPerLayer::copyAllGSynToDevice() {
  1984    if (mRecvGpu || mUpdateGpu) {
  1986       float *h_postGSyn              = GSyn[0];
  1987       PVCuda::CudaBuffer *d_postGSyn = this->getDeviceGSyn();
  1989       d_postGSyn->copyToDevice(h_postGSyn);
  1993 void HyPerLayer::copyAllGSynFromDevice() {
  1996       float *h_postGSyn              = GSyn[0];
  1997       PVCuda::CudaBuffer *d_postGSyn = this->getDeviceGSyn();
  1999       d_postGSyn->copyFromDevice(h_postGSyn);
  2003 void HyPerLayer::copyAllVFromDevice() {
  2007       float *h_V              = getV();
  2008       PVCuda::CudaBuffer *d_V = this->getDeviceV();
  2010       d_V->copyFromDevice(h_V);
  2014 void HyPerLayer::copyAllActivityFromDevice() {
  2018       float *h_activity              = getCLayer()->activity->data;
  2019       PVCuda::CudaBuffer *d_activity = this->getDeviceActivity();
  2021       d_activity->copyFromDevice(h_activity);
  2027 int HyPerLayer::publish(
Communicator *comm, 
double simTime) {
  2028    publish_timer->start();
  2030    int status = PV_SUCCESS;
  2031    if (mNeedToPublish) {
  2032       if (useMirrorBCs()) {
  2033          mirrorInteriorToBorder(clayer->activity, clayer->activity);
  2035       status         = publisher->
publish(mLastUpdateTime);
  2036       mNeedToPublish = 
false;
  2041    publish_timer->stop();
  2046    publish_timer->start();
  2050    int status = publisher->
wait();
  2052    publish_timer->stop();
  2066    if (p->getTargetLayer() != 
this) {
  2068             "HyPerLayer \"%s\": insertProbe called with probe %p, whose targetLayer is not this "  2069             "layer.  Probe was not inserted.\n",
  2074    for (
int i = 0; i < numProbes; i++) {
  2075       if (p == probes[i]) {
  2077                "HyPerLayer \"%s\": insertProbe called with probe %p, which has already been "  2078                "inserted as probe %d.\n",
  2090    assert(tmp != NULL);
  2092    for (
int i = 0; i < numProbes; i++) {
  2098    probes[numProbes] = p;
  2103 Response::Status HyPerLayer::outputProbeParams() {
  2104    for (
int p = 0; p < numProbes; p++) {
  2107    return Response::SUCCESS;
  2110 Response::Status HyPerLayer::outputState(
double timef) {
  2113    for (
int i = 0; i < numProbes; i++) {
  2117    if (timef >= (writeTime - (parent->getDeltaTime() / 2)) && writeStep >= 0) {
  2118       int writeStatus = PV_SUCCESS;
  2119       writeTime += writeStep;
  2121          writeStatus = writeActivitySparse(timef);
  2124          writeStatus = writeActivity(timef);
  2127             writeStatus != PV_SUCCESS,
  2128             "%s: outputState failed on rank %d process.\n",
  2130             parent->columnId());
  2134    return Response::SUCCESS;
  2137 Response::Status HyPerLayer::readStateFromCheckpoint(
Checkpointer *checkpointer) {
  2138    if (initializeFromCheckpointFlag) {
  2139       readActivityFromCheckpoint(checkpointer);
  2140       readVFromCheckpoint(checkpointer);
  2141       readDelaysFromCheckpoint(checkpointer);
  2142       updateAllActiveIndices();
  2143       return Response::SUCCESS;
  2146       return Response::NO_ACTION;
  2150 void HyPerLayer::readActivityFromCheckpoint(
Checkpointer *checkpointer) {
  2151    checkpointer->readNamedCheckpointEntry(std::string(name), std::string(
"A"), 
false);
  2154 void HyPerLayer::readVFromCheckpoint(
Checkpointer *checkpointer) {
  2155    if (getV() != 
nullptr) {
  2156       checkpointer->readNamedCheckpointEntry(std::string(name), std::string(
"V"), 
false);
  2160 void HyPerLayer::readDelaysFromCheckpoint(
Checkpointer *checkpointer) {
  2161    checkpointer->readNamedCheckpointEntry(std::string(name), std::string(
"Delays"), 
false);
  2168 Response::Status HyPerLayer::processCheckpointRead() {
  2169    updateAllActiveIndices();
  2170    return Response::SUCCESS;
  2173 int HyPerLayer::writeActivitySparse(
double timed) {
  2176    pvAssert(cube.numItems == loc->nbatch * getNumExtended());
  2179    int const numFrames         = mpiBatchDimension * loc->nbatch;
  2180    for (
int frame = 0; frame < numFrames; frame++) {
  2181       int const localBatchIndex = frame % loc->nbatch;
  2182       int const mpiBatchIndex   = frame / loc->nbatch; 
  2183       pvAssert(mpiBatchIndex * loc->nbatch + localBatchIndex == frame);
  2187       auto *activeIndicesElement = &activeIndicesBatch[localBatchIndex * getNumExtended()];
  2189       int nxExt                  = loc->nx + loc->halo.lt + loc->halo.rt;
  2190       int nyExt                  = loc->ny + loc->halo.dn + loc->halo.up;
  2192       for (
long int k = 0; k < cube.numActive[localBatchIndex]; k++) {
  2194          int index                      = (int)entry.index;
  2198          int x = kxPos(index, nxExt, nyExt, nf) - loc->halo.lt;
  2199          if (x < 0 or x >= loc->nx) {
  2202          int y = kyPos(index, nxExt, nyExt, nf) - loc->halo.up;
  2203          if (y < 0 or y >= loc->ny) {
  2209          int f = featureIndex(index, nxExt, nyExt, nf);
  2212          entry.index = (uint32_t)kIndex(x, y, f, loc->nxGlobal, loc->nyGlobal, nf);
  2213          list.addEntry(entry);
  2216             BufferUtils::gatherSparse(getMPIBlock(), list, mpiBatchIndex, 0 );
  2217       if (getMPIBlock()->getRank() == 0) {
  2218          long fpos = mOutputStateStream->getOutPos();
  2225             header.timestamp = timed;
  2226             BufferUtils::writeActivityHeader(*mOutputStateStream, header);
  2228          BufferUtils::writeSparseFrame(*mOutputStateStream, &gatheredList, timed);
  2231    writeActivitySparseCalls += numFrames;
  2232    updateNBands(writeActivitySparseCalls);
  2237 int HyPerLayer::writeActivity(
double timed) {
  2240    pvAssert(cube.numItems == loc->nbatch * getNumExtended());
  2242    PVHalo const &halo   = loc->halo;
  2243    int const nxExtLocal = loc->nx + halo.lt + halo.rt;
  2244    int const nyExtLocal = loc->ny + halo.dn + halo.up;
  2245    int const nf         = loc->nf;
  2248    int const numFrames         = mpiBatchDimension * loc->nbatch;
  2249    for (
int frame = 0; frame < numFrames; frame++) {
  2250       int const localBatchIndex = frame % loc->nbatch;
  2251       int const mpiBatchIndex   = frame / loc->nbatch; 
  2252       pvAssert(mpiBatchIndex * loc->nbatch + localBatchIndex == frame);
  2254       float *data = &cube.data[localBatchIndex * getNumExtended()];
  2255       Buffer<float> localBuffer(data, nxExtLocal, nyExtLocal, nf);
  2258             getMPIBlock(), localBuffer, loc->nx, loc->ny, mpiBatchIndex, 0 );
  2261       if (getMPIBlock()->
getRank() == 0) {
  2262          long fpos = mOutputStateStream->getOutPos();
  2269             header.timestamp = timed;
  2270             BufferUtils::writeActivityHeader(*mOutputStateStream, header);
  2272          BufferUtils::writeFrame<float>(*mOutputStateStream, &blockBuffer, timed);
  2275    writeActivityCalls += numFrames;
  2276    updateNBands(writeActivityCalls);
  2280 void HyPerLayer::updateNBands(
int const numCalls) {
  2284    if (mOutputStateStream != 
nullptr) {
  2285       long int fpos = mOutputStateStream->getOutPos();
  2286       mOutputStateStream->setOutPos(
sizeof(
int) * INDEX_NBANDS, 
true );
  2287       mOutputStateStream->write(&numCalls, (
long)
sizeof(numCalls));
  2288       mOutputStateStream->setOutPos(fpos, 
true );
  2293    return loc1->nbatch == loc2->nbatch && loc1->nx == loc2->nx && loc1->ny == loc2->ny
  2294           && loc1->nf == loc2->nf && loc1->halo.lt == loc2->halo.lt
  2295           && loc1->halo.rt == loc2->halo.rt && loc1->halo.dn == loc2->halo.dn
  2296           && loc1->halo.up == loc2->halo.up;
  2300    if (!localDimensionsEqual(&dest->loc, &src->loc)) {
  2303    int nbatch     = dest->loc.nbatch;
  2304    int nf         = dest->loc.nf;
  2305    int leftBorder = dest->loc.halo.lt;
  2306    int topBorder  = dest->loc.halo.up;
  2307    size_t sb      = strideBExtended(&dest->loc);
  2308    size_t sf      = strideFExtended(&dest->loc);
  2309    size_t sx      = strideXExtended(&dest->loc);
  2310    size_t sy      = strideYExtended(&dest->loc);
  2312    for (
int b = 0; b < nbatch; b++) {
  2313       float *srcData  = src->data + b * sb;
  2314       float *destData = dest->data + b * sb;
  2316       float *src0 = srcData + topBorder * sy + leftBorder * sx;
  2317       float *dst0 = srcData + (topBorder - 1) * sy + (leftBorder - 1) * sx;
  2319       for (
int ky = 0; ky < topBorder; ky++) {
  2320          float *to   = dst0 - ky * sy;
  2321          float *from = src0 + ky * sy;
  2322          for (
int kx = 0; kx < leftBorder; kx++) {
  2323             for (
int kf = 0; kf < nf; kf++) {
  2324                to[kf * sf] = from[kf * sf];
  2335    if (!localDimensionsEqual(&dest->loc, &src->loc)) {
  2338    int nx         = clayer->loc.nx;
  2339    int nf         = clayer->loc.nf;
  2340    int leftBorder = dest->loc.halo.lt;
  2341    int topBorder  = dest->loc.halo.up;
  2342    int nbatch     = dest->loc.nbatch;
  2343    size_t sb      = strideBExtended(&dest->loc);
  2344    size_t sf      = strideFExtended(&dest->loc);
  2345    size_t sx      = strideXExtended(&dest->loc);
  2346    size_t sy      = strideYExtended(&dest->loc);
  2348    for (
int b = 0; b < nbatch; b++) {
  2349       float *srcData  = src->data + b * sb;
  2350       float *destData = dest->data + b * sb;
  2351       float *src0     = srcData + topBorder * sy + leftBorder * sx;
  2352       float *dst0     = destData + (topBorder - 1) * sy + leftBorder * sx;
  2354       for (
int ky = 0; ky < topBorder; ky++) {
  2355          float *to   = dst0 - ky * sy;
  2356          float *from = src0 + ky * sy;
  2357          for (
int kx = 0; kx < nx; kx++) {
  2358             for (
int kf = 0; kf < nf; kf++) {
  2359                to[kf * sf] = from[kf * sf];
  2370    if (!localDimensionsEqual(&dest->loc, &src->loc)) {
  2373    int nx          = dest->loc.nx;
  2374    int nf          = dest->loc.nf;
  2375    int leftBorder  = dest->loc.halo.lt;
  2376    int rightBorder = dest->loc.halo.rt;
  2377    int topBorder   = dest->loc.halo.up;
  2378    int nbatch      = dest->loc.nbatch;
  2379    size_t sb       = strideBExtended(&dest->loc);
  2380    size_t sf       = strideFExtended(&dest->loc);
  2381    size_t sx       = strideXExtended(&dest->loc);
  2382    size_t sy       = strideYExtended(&dest->loc);
  2384    for (
int b = 0; b < nbatch; b++) {
  2385       float *srcData  = src->data + b * sb;
  2386       float *destData = dest->data + b * sb;
  2387       float *src0     = srcData + topBorder * sy + (nx + leftBorder - 1) * sx;
  2388       float *dst0     = destData + (topBorder - 1) * sy + (nx + leftBorder) * sx;
  2390       for (
int ky = 0; ky < topBorder; ky++) {
  2391          float *to   = dst0 - ky * sy;
  2392          float *from = src0 + ky * sy;
  2393          for (
int kx = 0; kx < rightBorder; kx++) {
  2394             for (
int kf = 0; kf < nf; kf++) {
  2395                to[kf * sf] = from[kf * sf];
  2406    if (!localDimensionsEqual(&dest->loc, &src->loc)) {
  2409    int ny         = dest->loc.ny;
  2410    int nf         = dest->loc.nf;
  2411    int leftBorder = dest->loc.halo.lt;
  2412    int topBorder  = dest->loc.halo.up;
  2413    int nbatch     = dest->loc.nbatch;
  2414    size_t sb      = strideBExtended(&dest->loc);
  2415    size_t sf      = strideFExtended(&dest->loc);
  2416    size_t sx      = strideXExtended(&dest->loc);
  2417    size_t sy      = strideYExtended(&dest->loc);
  2419    for (
int b = 0; b < nbatch; b++) {
  2420       float *srcData  = src->data + b * sb;
  2421       float *destData = dest->data + b * sb;
  2422       float *src0     = srcData + topBorder * sy + leftBorder * sx;
  2423       float *dst0     = destData + topBorder * sy + (leftBorder - 1) * sx;
  2425       for (
int ky = 0; ky < ny; ky++) {
  2426          float *to   = dst0 + ky * sy;
  2427          float *from = src0 + ky * sy;
  2428          for (
int kx = 0; kx < leftBorder; kx++) {
  2429             for (
int kf = 0; kf < nf; kf++) {
  2430                to[kf * sf] = from[kf * sf];
  2441    if (!localDimensionsEqual(&dest->loc, &src->loc)) {
  2444    int nx          = clayer->loc.nx;
  2445    int ny          = clayer->loc.ny;
  2446    int nf          = clayer->loc.nf;
  2447    int leftBorder  = dest->loc.halo.lt;
  2448    int rightBorder = dest->loc.halo.rt;
  2449    int topBorder   = dest->loc.halo.up;
  2450    int nbatch      = dest->loc.nbatch;
  2451    size_t sb       = strideBExtended(&dest->loc);
  2452    size_t sf       = strideFExtended(&dest->loc);
  2453    size_t sx       = strideXExtended(&dest->loc);
  2454    size_t sy       = strideYExtended(&dest->loc);
  2456    for (
int b = 0; b < nbatch; b++) {
  2457       float *srcData  = src->data + b * sb;
  2458       float *destData = dest->data + b * sb;
  2459       float *src0     = srcData + topBorder * sy + (nx + leftBorder - 1) * sx;
  2460       float *dst0     = destData + topBorder * sy + (nx + leftBorder) * sx;
  2462       for (
int ky = 0; ky < ny; ky++) {
  2463          float *to   = dst0 + ky * sy;
  2464          float *from = src0 + ky * sy;
  2465          for (
int kx = 0; kx < rightBorder; kx++) {
  2466             for (
int kf = 0; kf < nf; kf++) {
  2467                to[kf * sf] = from[kf * sf];
  2478    if (!localDimensionsEqual(&dest->loc, &src->loc)) {
  2481    int ny           = dest->loc.ny;
  2482    int nf           = dest->loc.nf;
  2483    int leftBorder   = dest->loc.halo.lt;
  2484    int topBorder    = dest->loc.halo.up;
  2485    int bottomBorder = dest->loc.halo.dn;
  2486    int nbatch       = dest->loc.nbatch;
  2487    size_t sb        = strideBExtended(&dest->loc);
  2488    size_t sf        = strideFExtended(&dest->loc);
  2489    size_t sx        = strideXExtended(&dest->loc);
  2490    size_t sy        = strideYExtended(&dest->loc);
  2492    for (
int b = 0; b < nbatch; b++) {
  2493       float *srcData  = src->data + b * sb;
  2494       float *destData = dest->data + b * sb;
  2495       float *src0     = srcData + (ny + topBorder - 1) * sy + leftBorder * sx;
  2496       float *dst0     = destData + (ny + topBorder) * sy + (leftBorder - 1) * sx;
  2498       for (
int ky = 0; ky < bottomBorder; ky++) {
  2499          float *to   = dst0 + ky * sy;
  2500          float *from = src0 - ky * sy;
  2501          for (
int kx = 0; kx < leftBorder; kx++) {
  2502             for (
int kf = 0; kf < nf; kf++) {
  2503                to[kf * sf] = from[kf * sf];
  2514    if (!localDimensionsEqual(&dest->loc, &src->loc)) {
  2517    int nx           = dest->loc.nx;
  2518    int ny           = dest->loc.ny;
  2519    int nf           = dest->loc.nf;
  2520    int leftBorder   = dest->loc.halo.lt;
  2521    int rightBorder  = dest->loc.halo.rt;
  2522    int topBorder    = dest->loc.halo.up;
  2523    int bottomBorder = dest->loc.halo.dn;
  2524    int nbatch       = dest->loc.nbatch;
  2525    size_t sb        = strideBExtended(&dest->loc);
  2526    size_t sf        = strideFExtended(&dest->loc);
  2527    size_t sx        = strideXExtended(&dest->loc);
  2528    size_t sy        = strideYExtended(&dest->loc);
  2530    for (
int b = 0; b < nbatch; b++) {
  2531       float *srcData  = src->data + b * sb;
  2532       float *destData = dest->data + b * sb;
  2533       float *src0     = srcData + (ny + topBorder - 1) * sy + leftBorder * sx;
  2534       float *dst0     = destData + (ny + topBorder) * sy + leftBorder * sx;
  2536       for (
int ky = 0; ky < bottomBorder; ky++) {
  2537          float *to   = dst0 + ky * sy;
  2538          float *from = src0 - ky * sy;
  2539          for (
int kx = 0; kx < nx; kx++) {
  2540             for (
int kf = 0; kf < nf; kf++) {
  2541                to[kf * sf] = from[kf * sf];
  2552    if (!localDimensionsEqual(&dest->loc, &src->loc)) {
  2555    int nx           = dest->loc.nx;
  2556    int ny           = dest->loc.ny;
  2557    int nf           = dest->loc.nf;
  2558    int leftBorder   = dest->loc.halo.lt;
  2559    int rightBorder  = dest->loc.halo.rt;
  2560    int topBorder    = dest->loc.halo.up;
  2561    int bottomBorder = dest->loc.halo.dn;
  2562    int nbatch       = dest->loc.nbatch;
  2563    size_t sb        = strideBExtended(&dest->loc);
  2564    size_t sf        = strideFExtended(&dest->loc);
  2565    size_t sx        = strideXExtended(&dest->loc);
  2566    size_t sy        = strideYExtended(&dest->loc);
  2568    for (
int b = 0; b < nbatch; b++) {
  2569       float *srcData  = src->data + b * sb;
  2570       float *destData = dest->data + b * sb;
  2571       float *src0     = srcData + (ny + topBorder - 1) * sy + (nx + leftBorder - 1) * sx;
  2572       float *dst0     = destData + (ny + topBorder) * sy + (nx + leftBorder) * sx;
  2574       for (
int ky = 0; ky < bottomBorder; ky++) {
  2575          float *to   = dst0 + ky * sy;
  2576          float *from = src0 - ky * sy;
  2577          for (
int kx = 0; kx < rightBorder; kx++) {
  2578             for (
int kf = 0; kf < nf; kf++) {
  2579                to[kf * sf] = from[kf * sf];
 
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
virtual void ioParam_triggerOffset(enum ParamsIOFlag ioFlag)
triggerOffset: If triggerLayer is set, triggers <triggerOffset> timesteps before target trigger ...
virtual void resetStateOnTrigger()
virtual void ioParam_triggerBehavior(enum ParamsIOFlag ioFlag)
triggerBehavior: If triggerLayerName is set, this parameter specifies how the trigger is handled...
virtual int allocateDeviceBuffers()
int getNumColumns() const 
int present(const char *groupName, const char *paramName)
virtual void ioParam_writeStep(enum ParamsIOFlag ioFlag)
writeStep: Specifies how often to output a pvp file for this layer 
virtual void ioParam_phase(enum ParamsIOFlag ioFlag)
phase: Defines the ordering in which each layer is updated 
void allocateRestrictedBuffer(float **buf, const char *bufname)
PVLayerCube createCube(int delay=0)
virtual void ioParam_initializeFromCheckpointFlag(enum ParamsIOFlag ioFlag)
initializeFromCheckpointFlag: If set to true, initialize using checkpoint direcgtory set in HyPerCol...
bool isExchangeFinished(int delay=0)
virtual void ioParam_sparseLayer(enum ParamsIOFlag ioFlag)
sparseLayer: Specifies if the layer should be considered sparese for optimization and output ...
virtual void ioParam_nf(enum ParamsIOFlag ioFlag)
nf: Defines how many features this layer has 
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
virtual void ioParam_triggerLayerName(enum ParamsIOFlag ioFlag)
triggerLayerName: Specifies the name of the layer that this layer triggers off of. If set to NULL or the empty string, the layer does not trigger but updates its state on every timestep. 
virtual double getDeltaUpdateTime()
static bool completed(Status &a)
int initialize(const char *name, HyPerCol *hc)
virtual void ioParam_valueBC(enum ParamsIOFlag ioFlag)
valueBC: If mirrorBC is set to true, Uses the specified value for the margin area ...
void allocateExtendedBuffer(float **buf, const char *bufname)
int getBatchDimension() const 
virtual bool needUpdate(double simTime, double dt)
virtual void ioParam_mirrorBCflag(enum ParamsIOFlag ioFlag)
mirrorBCflag: If set to true, the margin will mirror the data 
virtual void ioParam_triggerResetLayerName(enum ParamsIOFlag ioFlag)
triggerResetLayerName: If triggerLayerName is set, this parameter specifies the layer to use for upda...
void addRecvConn(BaseConnection *conn)
virtual void ioParam_InitVType(enum ParamsIOFlag ioFlag)
initVType: Specifies how to initialize the V buffer. 
virtual Response::Status outputStateWrapper(double timef, double dt)
int freeExtendedBuffer(float **buf)
int freeRestrictedBuffer(float **buf)
virtual void ioParam_triggerFlag(enum ParamsIOFlag ioFlag)
triggerFlag: (Deprecated) Specifies if this layer is being triggered 
virtual double getDeltaTriggerTime()
int publish(double lastUpdateTime)
void copyForward(double lastUpdateTime)
const float * getLayerData(int delay=0)
virtual void ioParam_writeSparseValues(enum ParamsIOFlag ioFlag)
writeSparseValues: No longer used. 
virtual void ioParam_updateGpu(enum ParamsIOFlag ioFlag)
updateGpu: When compiled using CUDA or OpenCL GPU acceleration, this flag tells whether this layer's ...
virtual void ioParam_nxScale(enum ParamsIOFlag ioFlag)
nxScale: Defines the relationship between the x column size and the layer size. 
virtual bool needReset(double timed, double dt)
virtual void ioParam_nyScale(enum ParamsIOFlag ioFlag)
nyScale: Defines the relationship between the y column size and the layer size. 
virtual void ioParam_initialWriteTime(enum ParamsIOFlag ioFlag)
initialWriteTime: Specifies the first timestep to start outputing pvp files