9 #define DEFAULT_DELTA_T 1.0 // time step size (msec)    11 #include "HyPerCol.hpp"    12 #include "columns/Communicator.hpp"    13 #include "columns/Factory.hpp"    14 #include "columns/RandomSeed.hpp"    15 #include "io/PrintStream.hpp"    17 #include "pvGitRevision.h"    32 #include <sys/types.h>    36 #ifdef PV_USE_OPENMP_THREADS    48 HyPerCol::HyPerCol(PV_Init *initObj) {
    53 HyPerCol::~HyPerCol() {
    57    if (getCommunicator()->globalCommRank() == 0) {
    58       PrintStream pStream(getOutputStream());
    59       mCheckpointer->writeTimers(pStream);
    62    mObjectHierarchy.clear(
true );
    63    for (
auto iterator = mPhaseRecvTimers.begin(); iterator != mPhaseRecvTimers.end();) {
    65       iterator = mPhaseRecvTimers.erase(iterator);
    70    free(mPrintParamsFilename);
    74 int HyPerCol::initialize_base() {
    79    mParamsProcessedFlag      = 
false;
    81    mCheckpointReadFlag       = 
false;
    83    mDeltaTime                = DEFAULT_DELTA_T;
    84    mWriteTimeScaleFieldnames = 
true;
    85    mProgressInterval         = 1.0;
    86    mWriteProgressToErr       = 
false;
    89    mLayerStatus              = 
nullptr;
    90    mConnectionStatus         = 
nullptr;
    91    mPrintParamsFilename      = 
nullptr;
    92    mPrintParamsStream        = 
nullptr;
    93    mLuaPrintParamsStream     = 
nullptr;
    98    mOwnsCommunicator         = 
true;
   100    mCommunicator             = 
nullptr;
   102    mPhaseRecvTimers.clear();
   104    mErrorOnNotANumber = 
false;
   107    mCudaDevice = 
nullptr;
   112 int HyPerCol::initialize(PV_Init *initObj) {
   113    mPVInitObj    = initObj;
   114    mCommunicator = mPVInitObj->getCommunicator();
   116    if (mParams == 
nullptr) {
   117       if (mCommunicator->globalCommRank() == 0) {
   118          ErrorLog() << 
"HyPerCol::initialize: params have not been set." << std::endl;
   119          MPI_Barrier(mCommunicator->communicator());
   123    std::string working_dir = mPVInitObj->getStringArgument(
"WorkingDirectory");
   124    working_dir             = expandLeadingTilde(working_dir);
   126    int numGroups          = mParams->numberOfGroups();
   127    std::string paramsFile = initObj->getStringArgument(
"ParamsFile");
   128    if (numGroups == 0) {
   129       ErrorLog() << 
"Params \"" << paramsFile << 
"\" does not define any groups.\n";
   132    if (strcmp(mParams->groupKeywordFromIndex(0), 
"HyPerCol")) {
   133       std::string paramsFile = initObj->getStringArgument(
"ParamsFile");
   134       ErrorLog() << 
"First group in the params file \"" << paramsFile
   135                  << 
"\" does not define a HyPerCol.\n";
   138    mName = strdup(mParams->groupNameFromIndex(0));
   147    if (columnId() == 0 && !working_dir.empty()) {
   148       int status = chdir(working_dir.c_str());
   151          chdirMessage.printf(
"Unable to switch directory to \"%s\"\n", working_dir.c_str());
   152          chdirMessage.printf(
"chdir error: %s\n", strerror(errno));
   156 #ifdef PV_USE_MPI // Fail if there was a parsing error, but make sure nonroot   161    if (globalRank() == rootproc) {
   162       parsedStatus = this->mParams->getParseStatus();
   164    MPI_Bcast(&parsedStatus, 1, MPI_INT, rootproc, getCommunicator()->globalCommunicator());
   166    int parsedStatus                         = this->mParams->getParseStatus();
   168    if (parsedStatus != 0) {
   172    mRandomSeed = mPVInitObj->getUnsignedIntArgument(
"RandomSeed");
   174    mCheckpointer = 
new Checkpointer(
   175          std::string(mName), mCommunicator->getGlobalMPIBlock(), mPVInitObj->
getArguments());
   177    ioParams(PARAMS_IO_READ);
   180    mFinalStep   = (
long int)nearbyint(mStopTime / mDeltaTime);
   181    mCheckpointer->provideFinalStep(mFinalStep);
   182    mNextProgressTime = 0.0;
   184    RandomSeed::instance()->initialize(mRandomSeed);
   185    if (getCommunicator()->globalCommRank() == 0) {
   186       InfoLog() << 
"RandomSeed initialized to " << mRandomSeed << 
".\n";
   189    mRunTimer = 
new Timer(mName, 
"column", 
"run    ");
   190    mCheckpointer->registerTimer(mRunTimer);
   191    mCheckpointer->registerCheckpointData(
   199    mCheckpointReadFlag = !mCheckpointer->getCheckpointReadDirectory().empty();
   202    for (
int k = 1; k < numGroups; k++) { 
   203       const char *kw   = mParams->groupKeywordFromIndex(k);
   204       const char *name = mParams->groupNameFromIndex(k);
   205       if (!strcmp(kw, 
"HyPerCol")) {
   206          if (globalRank() == 0) {
   207             std::string paramsFile = initObj->getStringArgument(
"ParamsFile");
   208             ErrorLog() << 
"Group " << k + 1 << 
" in params file (\"" << paramsFile
   209                        << 
"\") is a HyPerCol; only the first group can be a HyPercol.\n";
   214          BaseObject *addedObject = 
nullptr;
   216             addedObject = Factory::instance()->createByKeyword(kw, name, 
this);
   217          } 
catch (std::exception 
const &e) {
   218             Fatal() << e.what() << std::endl;
   220          if (addedObject == 
nullptr) {
   221             ErrorLog().printf(
"Unable to create %s \"%s\".\n", kw, name);
   224          addObject(addedObject);
   230 void HyPerCol::setDescription() {
   231    description = 
"HyPerCol \"";
   232    description.append(getName()).append(
"\"");
   235 void HyPerCol::ioParams(
enum ParamsIOFlag ioFlag) {
   236    ioParamsStartGroup(ioFlag, mName);
   237    ioParamsFillGroup(ioFlag);
   238    ioParamsFinishGroup(ioFlag);
   241 int HyPerCol::ioParamsStartGroup(
enum ParamsIOFlag ioFlag, 
const char *group_name) {
   242    if (ioFlag == PARAMS_IO_WRITE && mCheckpointer->getMPIBlock()->
getRank() == 0) {
   243       pvAssert(mPrintParamsStream);
   244       pvAssert(mLuaPrintParamsStream);
   245       const char *keyword = mParams->groupKeywordFromName(group_name);
   246       mPrintParamsStream->printf(
"\n");
   247       mPrintParamsStream->printf(
"%s \"%s\" = {\n", keyword, group_name);
   248       mLuaPrintParamsStream->printf(
"%s = {\n", group_name);
   249       mLuaPrintParamsStream->printf(
"groupType = \"%s\";\n", keyword);
   254 int HyPerCol::ioParamsFillGroup(
enum ParamsIOFlag ioFlag) {
   259    mCheckpointer->ioParams(ioFlag, parameters());
   270 int HyPerCol::ioParamsFinishGroup(
enum ParamsIOFlag ioFlag) {
   271    if (ioFlag == PARAMS_IO_WRITE && mPrintParamsStream != 
nullptr) {
   272       pvAssert(mLuaPrintParamsStream);
   273       mPrintParamsStream->printf(
"};\n");
   274       mLuaPrintParamsStream->printf(
"};\n\n");
   280    parameters()->ioParamValue(ioFlag, mName, 
"dt", &mDeltaTime, mDeltaTime);
   284    parameters()->ioParamValue(ioFlag, mName, 
"stopTime", &mStopTime, mStopTime);
   288    parameters()->ioParamValue(
   289          ioFlag, mName, 
"progressInterval", &mProgressInterval, mProgressInterval);
   293    parameters()->ioParamValue(
   294          ioFlag, mName, 
"writeProgressToErr", &mWriteProgressToErr, mWriteProgressToErr);
   298    parameters()->ioParamString(
   299          ioFlag, mName, 
"printParamsFilename", &mPrintParamsFilename, 
"pv.params");
   300    if (mPrintParamsFilename == 
nullptr || mPrintParamsFilename[0] == 
'\0') {
   301       if (mCheckpointer->getMPIBlock()->
getRank() == 0) {
   302          ErrorLog().printf(
"printParamsFilename cannot be null or the empty string.\n");
   304       MPI_Barrier(mCheckpointer->getMPIBlock()->
getComm());
   316             if (mParams->
present(mName, 
"randomSeed")) {
   317                mRandomSeed = (
unsigned long)mParams->
value(mName, 
"randomSeed");
   320                mRandomSeed = seedRandomFromWallClock();
   323          if (mRandomSeed < RandomSeed::minSeed) {
   325                   "Error: random seed %u is too small. Use a seed of at "   330       case PARAMS_IO_WRITE: parameters()->writeParam(
"randomSeed", mRandomSeed); 
break;
   331       default: assert(0); 
break;
   336    parameters()->ioParamValueRequired(ioFlag, mName, 
"nx", &mNumXGlobal);
   340    parameters()->ioParamValueRequired(ioFlag, mName, 
"ny", &mNumYGlobal);
   344    parameters()->ioParamValue(ioFlag, mName, 
"nbatch", &mNumBatchGlobal, mNumBatchGlobal);
   348          mNumBatchGlobal % mCommunicator->numCommBatches() != 0,
   349          "The total number of batches (%d) must be a multiple of the batch "   352          mCommunicator->numCommBatches());
   353    mNumBatch = mNumBatchGlobal / mCommunicator->numCommBatches();
   357    parameters()->ioParamValue(
   358          ioFlag, mName, 
"errorOnNotANumber", &mErrorOnNotANumber, mErrorOnNotANumber);
   376    pvAssert(mPrintParamsFilename && mPrintParamsFilename[0]);
   377    if (mPrintParamsFilename[0] != 
'/') {
   378       std::string printParamsFilename(mPrintParamsFilename);
   380       processParams(printParamsPath.c_str());
   385          processParams(mPrintParamsFilename);
   394    std::string 
const &gpu_devices = mPVInitObj->getStringArgument(
"GPUDevices");
   395    initializeCUDA(gpu_devices);
   400    MPI_Barrier(mCommunicator->globalCommunicator());
   401    if (thread_status != PV_SUCCESS) {
   405 #ifdef PV_USE_OPENMP_THREADS   406    pvAssert(mNumThreads > 0); 
   408    omp_set_num_threads(mNumThreads);
   409 #endif // PV_USE_OPENMP_THREADS   411    notifyLoop(std::make_shared<AllocateDataMessage>());
   413    notifyLoop(std::make_shared<LayerSetMaxPhaseMessage>(&mNumPhases));
   416    mPhaseRecvTimers.clear();
   417    for (
int phase = 0; phase < mNumPhases; phase++) {
   418       std::string timerTypeString(
"phRecv");
   419       timerTypeString.append(std::to_string(phase));
   420       Timer *phaseRecvTimer = 
new Timer(mName, 
"column", timerTypeString.c_str());
   421       mPhaseRecvTimers.push_back(phaseRecvTimer);
   422       mCheckpointer->registerTimer(phaseRecvTimer);
   428    InfoLog().printf(
"[%d]: HyPerCol: running...\n", mCommunicator->globalCommRank());
   437    notifyLoop(std::make_shared<InitializeStateMessage>());
   438    if (mCheckpointReadFlag) {
   439       mCheckpointer->checkpointRead(&mSimTime, &mCurrentStep);
   442       mCheckpointer->readStateFromCheckpoint();
   450    notifyLoop(std::make_shared<CopyInitialStateToGPUMessage>());
   451 #endif // PV_USE_CUDA   455    notifyLoop(std::make_shared<ConnectionNormalizeMessage>());
   456    notifyLoop(std::make_shared<ConnectionFinalizeUpdateMessage>(mSimTime, mDeltaTime));
   459    for (
int phase = 0; phase < mNumPhases; phase++) {
   460       notifyLoop(std::make_shared<LayerPublishMessage>(phase, mSimTime));
   464    if (!mCheckpointReadFlag) {
   465       notifyLoop(std::make_shared<ConnectionOutputMessage>(mSimTime, mDeltaTime));
   466       for (
int phase = 0; phase < mNumPhases; phase++) {
   467          notifyLoop(std::make_shared<LayerOutputStateMessage>(phase, mSimTime));
   474 int HyPerCol::run(
double stopTime, 
double dt) {
   475    mStopTime  = stopTime;
   479    getOutputStream().flush();
   481    bool dryRunFlag = mPVInitObj->getBooleanArgument(
"DryRun");
   488    runClock.start_clock();
   491    advanceTimeLoop(runClock, 10 );
   493    notifyLoop(std::make_shared<CleanupMessage>());
   496    InfoLog().printf(
"[%d]: HyPerCol: done...\n", mCommunicator->globalCommRank());
   500    mCheckpointer->finalCheckpoint(mSimTime);
   503    runClock.stop_clock();
   504    if (getCommunicator()->globalCommRank() == 0) {
   505       runClock.print_elapsed(getOutputStream());
   517    bool printMsgs0   = printMessagesFlag && globalRank() == 0;
   518    int thread_status = PV_SUCCESS;
   520 #ifdef PV_USE_OPENMP_THREADS   522    int comm_size   = mCommunicator->globalCommSize();
   525             "Maximum number of OpenMP threads%s is %d\n"   526             "Number of MPI processes is %d.\n",
   527             comm_size == 1 ? 
"" : 
" (over all processes)",
   532    if (numThreadsArg.mUseDefault) {
   533       num_threads = max_threads / comm_size; 
   534       if (num_threads == 0) {
   538                   "Warning: more MPI processes than available threads.  "   539                   "Processors may be oversubscribed.\n");
   544       num_threads = numThreadsArg.mValue;
   546    if (num_threads > 0) {
   548          InfoLog().printf(
"Number of threads used is %d\n", num_threads);
   551    else if (num_threads == 0) {
   552       thread_status = PV_FAILURE;
   555                "%s: number of threads must be positive (was set to zero)\n",
   560       assert(num_threads < 0);
   561       thread_status = PV_FAILURE;
   564                "%s was compiled with PV_USE_OPENMP_THREADS; "   565                "therefore the \"-t\" argument is "   570 #else // PV_USE_OPENMP_THREADS   572    if (numThreadsArg.mUseDefault) {
   575          InfoLog().printf(
"Number of threads used is 1 (Compiled without OpenMP.\n");
   579       num_threads = numThreadsArg.mValue;
   580       if (num_threads < 0) {
   583       if (num_threads != 1) {
   584          thread_status = PV_FAILURE;
   588       if (thread_status != PV_SUCCESS) {
   590                "%s error: PetaVision must be compiled with "   591                "OpenMP to run with threads.\n",
   595 #endif // PV_USE_OPENMP_THREADS   596    mNumThreads = num_threads;
   597    return thread_status;
   600 int HyPerCol::processParams(
char const *path) {
   601    if (!mParamsProcessedFlag) {
   602       auto const &objectMap = mObjectHierarchy.getObjectMap();
   603       notifyLoop(std::make_shared<CommunicateInitInfoMessage>(objectMap));
   608    parameters()->warnUnread();
   609    if (path != 
nullptr && path[0] != 
'\0') {
   613       if (globalRank() == 0) {
   615                "HyPerCol \"%s\": path for printing parameters file was "   620    mParamsProcessedFlag = 
true;
   624 void HyPerCol::advanceTimeLoop(
Clock &runClock, 
int const runClockStartingStep) {
   628    while (mSimTime < mStopTime - mDeltaTime / 2.0) {
   629       mCheckpointer->checkpointWrite(mSimTime);
   630       advanceTime(mSimTime);
   634       if (step == runClockStartingStep) {
   635          runClock.start_clock();
   642 int HyPerCol::advanceTime(
double sim_time) {
   643    if (mSimTime >= mNextProgressTime) {
   644       mNextProgressTime += mProgressInterval;
   645       if (mCommunicator->globalCommRank() == 0) {
   646          std::ostream &progressStream = mWriteProgressToErr ? getErrorStream() : getOutputStream();
   649          progressStream << 
"   time==" << sim_time << 
"  "   650                         << ctime(¤t_time); 
   651          progressStream.flush();
   662    mSimTime = sim_time + mDeltaTime;
   664    notifyLoop(std::make_shared<AdaptTimestepMessage>());
   670    int status = PV_SUCCESS;
   673    for (
int phase = 0; phase < mNumPhases; phase++) {
   674       notifyLoop(std::make_shared<LayerClearProgressFlagsMessage>());
   677       bool someLayerIsPending = 
false;
   678       bool someLayerHasActed  = 
false;
   682       auto recvMessage = std::make_shared<LayerRecvSynapticInputMessage>(
   684             mPhaseRecvTimers.at(phase),
   690       auto updateMessage = std::make_shared<LayerUpdateStateMessage>(
   698       nonblockingLayerUpdate(recvMessage, updateMessage);
   700       recvMessage = std::make_shared<LayerRecvSynapticInputMessage>(
   702             mPhaseRecvTimers.at(phase),
   708       updateMessage = std::make_shared<LayerUpdateStateMessage>(
   716       nonblockingLayerUpdate(recvMessage, updateMessage);
   718       if (getDevice() != 
nullptr) {
   719          getDevice()->syncDevice();
   723       nonblockingLayerUpdate(
   724             std::make_shared<LayerUpdateStateMessage>(
   731                   &someLayerHasActed));
   733       if (getDevice() != 
nullptr) {
   734          getDevice()->syncDevice();
   735          notifyLoop(std::make_shared<LayerCopyFromGpuMessage>(phase, mPhaseRecvTimers.at(phase)));
   739       nonblockingLayerUpdate(
   740             std::make_shared<LayerUpdateStateMessage>(
   747                   &someLayerHasActed));
   749       auto recvMessage = std::make_shared<LayerRecvSynapticInputMessage>(
   751             mPhaseRecvTimers.at(phase),
   756       auto updateMessage = std::make_shared<LayerUpdateStateMessage>(
   757             phase, mSimTime, mDeltaTime, &someLayerIsPending, &someLayerHasActed);
   758       nonblockingLayerUpdate(recvMessage, updateMessage);
   761       notifyLoop(std::make_shared<LayerAdvanceDataStoreMessage>(phase));
   764       notifyLoop(std::make_shared<LayerPublishMessage>(phase, mSimTime));
   768       notifyLoop(std::make_shared<LayerOutputStateMessage>(phase, mSimTime));
   769       if (mErrorOnNotANumber) {
   770          notifyLoop(std::make_shared<LayerCheckNotANumberMessage>(phase));
   776    notifyLoop(std::make_shared<ConnectionUpdateMessage>(mSimTime, mDeltaTime));
   777    notifyLoop(std::make_shared<ConnectionNormalizeMessage>());
   778    notifyLoop(std::make_shared<ConnectionFinalizeUpdateMessage>(mSimTime, mDeltaTime));
   779    notifyLoop(std::make_shared<ConnectionOutputMessage>(mSimTime, mDeltaTime));
   783    notifyLoop(std::make_shared<ColProbeOutputStateMessage>(mSimTime, mDeltaTime));
   788 void HyPerCol::nonblockingLayerUpdate(
   789       std::shared_ptr<LayerUpdateStateMessage const> updateMessage) {
   791    *(updateMessage->mSomeLayerIsPending) = 
true;
   792    *(updateMessage->mSomeLayerHasActed)  = 
false;
   794    long int idleCounter = 0;
   795    while (*(updateMessage->mSomeLayerIsPending)) {
   796       *(updateMessage->mSomeLayerIsPending) = 
false;
   797       *(updateMessage->mSomeLayerHasActed)  = 
false;
   798       notifyLoop(updateMessage);
   800       if (!*(updateMessage->mSomeLayerHasActed)) {
   805    if (idleCounter > 1L) {
   806       InfoLog() << 
"t = " << mSimTime << 
", phase " << updateMessage->mPhase
   808                 << 
", recvGpu" << updateMessage->mRecvOnGpuFlag << 
", updateGpu"   809                 << updateMessage->mUpdateOnGpuFlag
   810 #endif // PV_USE_CUDA   811                 << 
", idle count " << idleCounter << 
"\n";
   815 void HyPerCol::nonblockingLayerUpdate(
   816       std::shared_ptr<LayerRecvSynapticInputMessage const> recvMessage,
   817       std::shared_ptr<LayerUpdateStateMessage const> updateMessage) {
   819    pvAssert(recvMessage->mSomeLayerIsPending == updateMessage->mSomeLayerIsPending);
   820    pvAssert(recvMessage->mSomeLayerHasActed == updateMessage->mSomeLayerHasActed);
   822    *(updateMessage->mSomeLayerIsPending) = 
true;
   823    *(updateMessage->mSomeLayerHasActed)  = 
false;
   825    long int idleCounter = 0;
   826    while (*(recvMessage->mSomeLayerIsPending)) {
   827       *(updateMessage->mSomeLayerIsPending) = 
false;
   828       *(updateMessage->mSomeLayerHasActed)  = 
false;
   829       notifyLoop(recvMessage);
   830       notifyLoop(updateMessage);
   832       if (!*(updateMessage->mSomeLayerHasActed)) {
   837    if (idleCounter > 1L) {
   838       InfoLog() << 
"t = " << mSimTime << 
", phase " << updateMessage->mPhase
   840                 << 
", recvGpu" << updateMessage->mRecvOnGpuFlag << 
", updateGpu"   841                 << updateMessage->mUpdateOnGpuFlag
   842 #endif // PV_USE_CUDA   843                 << 
", idle count " << idleCounter << 
"\n";
   847 Response::Status HyPerCol::respond(std::shared_ptr<BaseMessage const> message) {
   848    if (
auto castMessage = std::dynamic_pointer_cast<PrepareCheckpointWriteMessage const>(message)) {
   849       return respondPrepareCheckpointWrite(castMessage);
   852       return Response::SUCCESS;
   856 Response::Status HyPerCol::respondPrepareCheckpointWrite(
   857       std::shared_ptr<PrepareCheckpointWriteMessage const> message) {
   858    std::string path(message->mDirectory);
   859    path.append(
"/").append(
"pv.params");
   860    outputParams(path.c_str());
   861    return Response::SUCCESS;
   864 void HyPerCol::outputParams(
char const *path) {
   865    assert(path != 
nullptr && path[0] != 
'\0');
   866    int rank = mCheckpointer->getMPIBlock()->
getRank();
   867    assert(mPrintParamsStream == 
nullptr);
   868    char *tmp = strdup(path); 
   869    if (tmp == 
nullptr) {
   870       Fatal().printf(
"HyPerCol::outputParams unable to allocate memory: %s\n", strerror(errno));
   872    char *containingdir = dirname(tmp);
   873    ensureDirExists(mCheckpointer->getMPIBlock(), containingdir);
   876       mPrintParamsStream = 
new FileStream(path, std::ios_base::out, getVerifyWrites());
   878       std::string luaPath(path);
   879       luaPath.append(
".lua");
   880       char luapath[PV_PATH_MAX];
   881       mLuaPrintParamsStream =
   882             new FileStream(luaPath.c_str(), std::ios_base::out, getVerifyWrites());
   883       parameters()->setPrintParamsStream(mPrintParamsStream);
   884       parameters()->setPrintLuaStream(mLuaPrintParamsStream);
   887       outputParamsHeadComments(mPrintParamsStream, 
"//");
   890       outputParamsHeadComments(mLuaPrintParamsStream, 
"--");
   892       mLuaPrintParamsStream->printf(
   893             "package.path = package.path .. \";\" .. \"" PV_DIR 
"/../parameterWrapper/?.lua\"\n");
   894       mLuaPrintParamsStream->printf(
"local pv = require \"PVModule\"\n\n");
   895       mLuaPrintParamsStream->printf(
   896             "NULL = function() end; -- to allow string parameters to be set to NULL\n\n");
   897       mLuaPrintParamsStream->printf(
"-- Base table variable to store\n");
   898       mLuaPrintParamsStream->printf(
"local pvParameters = {\n");
   902    ioParams(PARAMS_IO_WRITE);
   907    notifyLoop(std::make_shared<LayerWriteParamsMessage>());
   908    notifyLoop(std::make_shared<ConnectionWriteParamsMessage>());
   909    notifyLoop(std::make_shared<ColProbeWriteParamsMessage>());
   910    notifyLoop(std::make_shared<LayerProbeWriteParamsMessage>());
   911    notifyLoop(std::make_shared<ConnectionProbeWriteParamsMessage>());
   914       mLuaPrintParamsStream->printf(
"} --End of pvParameters\n");
   915       mLuaPrintParamsStream->printf(
   916             "\n-- Print out PetaVision approved parameter file to the console\n");
   917       mLuaPrintParamsStream->printf(
"paramsFileString = pv.createParamsFileString(pvParameters)\n");
   918       mLuaPrintParamsStream->printf(
"io.write(paramsFileString)\n");
   921    if (mPrintParamsStream) {
   922       delete mPrintParamsStream;
   923       mPrintParamsStream = 
nullptr;
   924       parameters()->setPrintParamsStream(mPrintParamsStream);
   926    if (mLuaPrintParamsStream) {
   927       delete mLuaPrintParamsStream;
   928       mLuaPrintParamsStream = 
nullptr;
   929       parameters()->setPrintLuaStream(mLuaPrintParamsStream);
   933 void HyPerCol::outputParamsHeadComments(
FileStream *fileStream, 
char const *commentToken) {
   934    time_t t = time(
nullptr);
   935    fileStream->printf(
"%s PetaVision, " PV_GIT_REVISION 
"\n", commentToken);
   936    fileStream->printf(
"%s Run time %s", commentToken, ctime(&t)); 
   938    MPIBlock const *mpiBlock = mCheckpointer->getMPIBlock();
   941          "%s Compiled with Open MPI %d.%d.%d (MPI Standard %d.%d).\n",
   945          OMPI_RELEASE_VERSION,
   949          "%s MPI configuration has %d rows, %d columns, and batch dimension %d.\n",
   958             "%s CheckpointCells have %d rows, %d columns, and batch dimension %d.\n",
   965    fileStream->printf(
"%s Compiled without MPI.\n", commentToken);
   968    int const cudaMajor  = CUDA_VERSION / 1000;
   969    int const cudaMinor  = (CUDA_VERSION % 1000) / 10;
   970    int const cudnnMajor = CUDNN_MAJOR;
   971    int const cudnnMinor = CUDNN_MINOR;
   972    int const cudnnPatch = CUDNN_PATCHLEVEL;
   974          "%s Compiled with CUDA version %d.%d; cuDNN version %d.%d.%d\n",
   982    fileStream->printf(
"%s Compiled without CUDA.\n", commentToken);
   984 #ifdef PV_USE_OPENMP_THREADS   985    std::string openmpVersion;
   987       case 201511: openmpVersion = 
"4.5"; 
break;
   988       case 201307: openmpVersion = 
"4.0"; 
break;
   989       case 201107: openmpVersion = 
"3.1"; 
break;
   990       case 200805: openmpVersion = 
"3.0"; 
break;
   991       default: openmpVersion     = 
"is unrecognized"; 
break;
   994          "%s Compiled with OpenMP parallel code, API version %s (%06d) ",
   996          openmpVersion.c_str(),
   998    if (mNumThreads > 0) {
   999       fileStream->printf(
"and run using %d threads.\n", mNumThreads);
  1001    else if (mNumThreads == 0) {
  1002       fileStream->printf(
"but number of threads was set to zero (error).\n");
  1005       fileStream->printf(
"but the -t option was not specified.\n");
  1008    fileStream->printf(
"%s Compiled without OpenMP parallel code ", commentToken);
  1009    if (mNumThreads == 1) {
  1010       fileStream->printf(
".\n");
  1012    else if (mNumThreads == 0) {
  1013       fileStream->printf(
"but number of threads was set to zero (error).\n");
  1017             "but number of threads specified was %d instead of 1. (error).\n", mNumThreads);
  1019 #endif // PV_USE_OPENMP_THREADS  1020    if (mCheckpointReadFlag) {
  1022             "%s Started from checkpoint \"%s\"\n",
  1024             mCheckpointer->getCheckpointReadDirectory().c_str());
  1028 int HyPerCol::getAutoGPUDevice() {
  1029    int returnGpuIdx = -1;
  1031    int mpiRank = mCommunicator->globalCommRank();
  1032    int numMpi  = mCommunicator->globalCommSize();
  1033    char hostNameStr[PV_PATH_MAX];
  1034    gethostname(hostNameStr, PV_PATH_MAX);
  1035    size_t hostNameLen = strlen(hostNameStr) + 1; 
  1041       char rankToHost[numMpi][PV_PATH_MAX];
  1044       int rankToMaxGpu[numMpi];
  1046       int rankToGpu[numMpi];
  1049       for (
int rank = 0; rank < numMpi; rank++) {
  1051             strcpy(rankToHost[rank], hostNameStr);
  1052             rankToMaxGpu[rank] = PVCuda::CudaDevice::getNumDevices();
  1061                   mCommunicator->globalCommunicator(),
  1064                   &(rankToMaxGpu[rank]),
  1069                   mCommunicator->globalCommunicator(),
  1079       std::map<std::string, std::vector<int>> hostMap;
  1080       for (
int rank = 0; rank < numMpi; rank++) {
  1081          hostMap[std::string(rankToHost[rank])].push_back(rank);
  1085       for (
auto &host : hostMap) {
  1086          std::vector<int> rankVec = host.second;
  1087          int numRanksPerHost      = rankVec.size();
  1088          assert(numRanksPerHost > 0);
  1090          int maxGpus = rankToMaxGpu[rankVec[0]];
  1092          if (numRanksPerHost != maxGpus) {
  1093             WarnLog(assignGpuWarning);
  1094             assignGpuWarning.printf(
  1095                   "HyPerCol::getAutoGPUDevice: Host \"%s\" (rank[s] ", host.first.c_str());
  1096             for (
int v_i = 0; v_i < numRanksPerHost; v_i++) {
  1097                if (v_i != numRanksPerHost - 1) {
  1098                   assignGpuWarning.printf(
"%d, ", rankVec[v_i]);
  1101                   assignGpuWarning.printf(
"%d", rankVec[v_i]);
  1104             assignGpuWarning.printf(
  1105                   ") is being %s, with %d mpi processes mapped to %d total GPU[s]\n",
  1106                   numRanksPerHost < maxGpus ? 
"underloaded" : 
"overloaded",
  1112          for (
int v_i = 0; v_i < numRanksPerHost; v_i++) {
  1113             rankToGpu[rankVec[v_i]] = v_i % maxGpus;
  1118       for (
int rank = 0; rank < numMpi; rank++) {
  1119          InfoLog() << 
"Rank " << rank << 
" on host \"" << rankToHost[rank] << 
"\" ("  1120                    << rankToMaxGpu[rank] << 
" GPU[s]) using GPU index " << rankToGpu[rank] << 
"\n";
  1122             returnGpuIdx = rankToGpu[rank];
  1125             MPI_Send(&(rankToGpu[rank]), 1, MPI_INT, rank, 0, mCommunicator->globalCommunicator());
  1132       MPI_Send(hostNameStr, hostNameLen, MPI_CHAR, 0, 0, mCommunicator->globalCommunicator());
  1134       int maxGpu = PVCuda::CudaDevice::getNumDevices();
  1135       MPI_Send(&maxGpu, 1, MPI_INT, 0, 0, mCommunicator->globalCommunicator());
  1143             mCommunicator->globalCommunicator(),
  1146    assert(returnGpuIdx >= 0 && returnGpuIdx < PVCuda::CudaDevice::getNumDevices());
  1151    return returnGpuIdx;
  1155 void HyPerCol::initializeCUDA(std::string 
const &in_device) {
  1157    bool needGPU    = 
false;
  1158    auto &objectMap = mObjectHierarchy.getObjectMap();
  1159    for (
auto &obj : objectMap) {
  1172    int numMpi = mCommunicator->globalCommSize();
  1176    if (in_device.empty()) {
  1177       if (getCommunicator()->globalCommRank() == 0) {
  1178          InfoLog() << 
"Auto-assigning GPUs\n";
  1180       device = getAutoGPUDevice();
  1183       std::vector<int> deviceVec;
  1184       std::stringstream ss(in_device);
  1187       while (std::getline(ss, stoken, 
',')) {
  1189          for (
auto &ch : stoken) {
  1192                      "Device specification error: %s contains "  1193                      "unrecognized characters. Must be "  1194                      "comma separated integers greater or equal to 0 "  1195                      "with no other characters "  1196                      "allowed (including spaces).\n",
  1200          deviceVec.push_back(atoi(stoken.c_str()));
  1206       if (deviceVec.size() == 1) {
  1207          device = deviceVec[0];
  1209       else if (deviceVec.size() >= numMpi) {
  1210          device = deviceVec[mCommunicator->globalCommRank()];
  1214                "Device specification error: Number of devices "  1215                "specified (%zu) must be either 1 or "  1216                ">= than number of mpi processes (%d).\n",
  1220       InfoLog() << 
"Global MPI Process " << mCommunicator->globalCommRank() << 
" using device "  1224    int globalSize = mCommunicator->globalCommSize();
  1225    for (
int r = 0; r < globalSize; r++) {
  1226       if (r == globalRank()) {
  1227          mCudaDevice = 
new PVCuda::CudaDevice(device);
  1229       MPI_Barrier(mCommunicator->globalCommunicator());
  1233    if (globalRank() == 0) {
  1234       mCudaDevice->query_device_info();
  1238    notifyLoop(std::make_shared<SetCudaDeviceMessage>(mCudaDevice));
  1241 int HyPerCol::finalizeCUDA() {
  1246 #endif // PV_USE_CUDA  1249    bool succeeded = mObjectHierarchy.addObject(obj->getName(), obj);
  1250    FatalIf(!succeeded, 
"Adding %s failed.\n", getDescription_c());
  1254    auto &objectMap = mObjectHierarchy.getObjectMap();
  1255    auto search     = objectMap.find(objectName);
  1256    return search == objectMap.end() ? 
nullptr : search->second;
  1260    if (mObjectHierarchy.getObjectVector().empty()) {
  1261       if (currentObject != 
nullptr) {
  1262          throw std::domain_error(
"HyPerCol::getNextObject called with empty hierarchy");
  1269       auto objectVector = mObjectHierarchy.getObjectVector();
  1270       if (currentObject == 
nullptr) {
  1271          return objectVector[0];
  1274          for (
auto iterator = objectVector.begin(); iterator != objectVector.end(); iterator++) {
  1276             if (
object == currentObject) {
  1278                return iterator == objectVector.end() ? 
nullptr : *iterator;
  1281          throw std::domain_error(
"HyPerCol::getNextObject argument not in hierarchy");
  1286 unsigned int HyPerCol::seedRandomFromWallClock() {
  1287    unsigned long t = 0UL;
  1289    if (mCommunicator->globalCommRank() == rootproc) {
  1290       t = time((time_t *)
nullptr);
  1292    MPI_Bcast(&t, 1, MPI_UNSIGNED, rootproc, mCommunicator->globalCommunicator());
 int getGlobalNumColumns() const 
Observer * getNextObject(Observer const *currentObject) const 
virtual void ioParam_writeProgressToErr(enum ParamsIOFlag ioFlag)
writeProgressToErr: Whether to print timestep progress to the error stream instead of the output stre...
int getNumColumns() const 
int present(const char *groupName, const char *paramName)
virtual void ioParam_randomSeed(enum ParamsIOFlag ioFlag)
randomSeed: The seed for the random number generator for reproducability 
virtual void addObserver(Observer *observer) override
virtual void ioParam_dt(enum ParamsIOFlag ioFlag)
dt: The default delta time to use. 
Arguments const * getArguments() const 
virtual void ioParam_stopTime(enum ParamsIOFlag ioFlag)
mStopTime: The set stopping time for the run 
double value(const char *groupName, const char *paramName)
int setNumThreads(bool printMessagesFlag)
int getGlobalBatchDimension() const 
virtual void ioParam_errorOnNotANumber(enum ParamsIOFlag ioFlag)
errorOnNotANumber: Specifies if the run should check on each timestep for nans in activity...
virtual void ioParam_ny(enum ParamsIOFlag ioFlag)
ny: Specifies the size of the column 
int getGlobalRank() const 
int getBatchDimension() const 
int getGlobalNumRows() const 
virtual void ioParam_nBatch(enum ParamsIOFlag ioFlag)
ny: Specifies the batch size of the column 
virtual void ioParam_nx(enum ParamsIOFlag ioFlag)
nx: Specifies the size of the column 
int getMaxThreads() const 
virtual void ioParam_progressInterval(enum ParamsIOFlag ioFlag)
mProgressInterval: Specifies how often a progress report prints out 
Observer * getObjectFromName(std::string const &objectName) const 
virtual void ioParam_printParamsFilename(enum ParamsIOFlag ioFlag)
mPrintParamsFilename: Specifies the output mParams filename. 
std::string makeOutputPathFilename(std::string const &path)
char const * getProgramName() const