9 #define DEFAULT_DELTA_T 1.0 // time step size (msec) 11 #include "HyPerCol.hpp" 12 #include "columns/Communicator.hpp" 13 #include "columns/Factory.hpp" 14 #include "columns/RandomSeed.hpp" 15 #include "io/PrintStream.hpp" 17 #include "pvGitRevision.h" 32 #include <sys/types.h> 36 #ifdef PV_USE_OPENMP_THREADS 48 HyPerCol::HyPerCol(PV_Init *initObj) {
53 HyPerCol::~HyPerCol() {
57 if (getCommunicator()->globalCommRank() == 0) {
58 PrintStream pStream(getOutputStream());
59 mCheckpointer->writeTimers(pStream);
62 mObjectHierarchy.clear(
true );
63 for (
auto iterator = mPhaseRecvTimers.begin(); iterator != mPhaseRecvTimers.end();) {
65 iterator = mPhaseRecvTimers.erase(iterator);
70 free(mPrintParamsFilename);
74 int HyPerCol::initialize_base() {
79 mParamsProcessedFlag =
false;
81 mCheckpointReadFlag =
false;
83 mDeltaTime = DEFAULT_DELTA_T;
84 mWriteTimeScaleFieldnames =
true;
85 mProgressInterval = 1.0;
86 mWriteProgressToErr =
false;
89 mLayerStatus =
nullptr;
90 mConnectionStatus =
nullptr;
91 mPrintParamsFilename =
nullptr;
92 mPrintParamsStream =
nullptr;
93 mLuaPrintParamsStream =
nullptr;
98 mOwnsCommunicator =
true;
100 mCommunicator =
nullptr;
102 mPhaseRecvTimers.clear();
104 mErrorOnNotANumber =
false;
107 mCudaDevice =
nullptr;
112 int HyPerCol::initialize(PV_Init *initObj) {
113 mPVInitObj = initObj;
114 mCommunicator = mPVInitObj->getCommunicator();
116 if (mParams ==
nullptr) {
117 if (mCommunicator->globalCommRank() == 0) {
118 ErrorLog() <<
"HyPerCol::initialize: params have not been set." << std::endl;
119 MPI_Barrier(mCommunicator->communicator());
123 std::string working_dir = mPVInitObj->getStringArgument(
"WorkingDirectory");
124 working_dir = expandLeadingTilde(working_dir);
126 int numGroups = mParams->numberOfGroups();
127 std::string paramsFile = initObj->getStringArgument(
"ParamsFile");
128 if (numGroups == 0) {
129 ErrorLog() <<
"Params \"" << paramsFile <<
"\" does not define any groups.\n";
132 if (strcmp(mParams->groupKeywordFromIndex(0),
"HyPerCol")) {
133 std::string paramsFile = initObj->getStringArgument(
"ParamsFile");
134 ErrorLog() <<
"First group in the params file \"" << paramsFile
135 <<
"\" does not define a HyPerCol.\n";
138 mName = strdup(mParams->groupNameFromIndex(0));
147 if (columnId() == 0 && !working_dir.empty()) {
148 int status = chdir(working_dir.c_str());
151 chdirMessage.printf(
"Unable to switch directory to \"%s\"\n", working_dir.c_str());
152 chdirMessage.printf(
"chdir error: %s\n", strerror(errno));
156 #ifdef PV_USE_MPI // Fail if there was a parsing error, but make sure nonroot 161 if (globalRank() == rootproc) {
162 parsedStatus = this->mParams->getParseStatus();
164 MPI_Bcast(&parsedStatus, 1, MPI_INT, rootproc, getCommunicator()->globalCommunicator());
166 int parsedStatus = this->mParams->getParseStatus();
168 if (parsedStatus != 0) {
172 mRandomSeed = mPVInitObj->getUnsignedIntArgument(
"RandomSeed");
174 mCheckpointer =
new Checkpointer(
175 std::string(mName), mCommunicator->getGlobalMPIBlock(), mPVInitObj->
getArguments());
177 ioParams(PARAMS_IO_READ);
180 mFinalStep = (
long int)nearbyint(mStopTime / mDeltaTime);
181 mCheckpointer->provideFinalStep(mFinalStep);
182 mNextProgressTime = 0.0;
184 RandomSeed::instance()->initialize(mRandomSeed);
185 if (getCommunicator()->globalCommRank() == 0) {
186 InfoLog() <<
"RandomSeed initialized to " << mRandomSeed <<
".\n";
189 mRunTimer =
new Timer(mName,
"column",
"run ");
190 mCheckpointer->registerTimer(mRunTimer);
191 mCheckpointer->registerCheckpointData(
199 mCheckpointReadFlag = !mCheckpointer->getCheckpointReadDirectory().empty();
202 for (
int k = 1; k < numGroups; k++) {
203 const char *kw = mParams->groupKeywordFromIndex(k);
204 const char *name = mParams->groupNameFromIndex(k);
205 if (!strcmp(kw,
"HyPerCol")) {
206 if (globalRank() == 0) {
207 std::string paramsFile = initObj->getStringArgument(
"ParamsFile");
208 ErrorLog() <<
"Group " << k + 1 <<
" in params file (\"" << paramsFile
209 <<
"\") is a HyPerCol; only the first group can be a HyPercol.\n";
214 BaseObject *addedObject =
nullptr;
216 addedObject = Factory::instance()->createByKeyword(kw, name,
this);
217 }
catch (std::exception
const &e) {
218 Fatal() << e.what() << std::endl;
220 if (addedObject ==
nullptr) {
221 ErrorLog().printf(
"Unable to create %s \"%s\".\n", kw, name);
224 addObject(addedObject);
230 void HyPerCol::setDescription() {
231 description =
"HyPerCol \"";
232 description.append(getName()).append(
"\"");
235 void HyPerCol::ioParams(
enum ParamsIOFlag ioFlag) {
236 ioParamsStartGroup(ioFlag, mName);
237 ioParamsFillGroup(ioFlag);
238 ioParamsFinishGroup(ioFlag);
241 int HyPerCol::ioParamsStartGroup(
enum ParamsIOFlag ioFlag,
const char *group_name) {
242 if (ioFlag == PARAMS_IO_WRITE && mCheckpointer->getMPIBlock()->
getRank() == 0) {
243 pvAssert(mPrintParamsStream);
244 pvAssert(mLuaPrintParamsStream);
245 const char *keyword = mParams->groupKeywordFromName(group_name);
246 mPrintParamsStream->printf(
"\n");
247 mPrintParamsStream->printf(
"%s \"%s\" = {\n", keyword, group_name);
248 mLuaPrintParamsStream->printf(
"%s = {\n", group_name);
249 mLuaPrintParamsStream->printf(
"groupType = \"%s\";\n", keyword);
254 int HyPerCol::ioParamsFillGroup(
enum ParamsIOFlag ioFlag) {
259 mCheckpointer->ioParams(ioFlag, parameters());
270 int HyPerCol::ioParamsFinishGroup(
enum ParamsIOFlag ioFlag) {
271 if (ioFlag == PARAMS_IO_WRITE && mPrintParamsStream !=
nullptr) {
272 pvAssert(mLuaPrintParamsStream);
273 mPrintParamsStream->printf(
"};\n");
274 mLuaPrintParamsStream->printf(
"};\n\n");
280 parameters()->ioParamValue(ioFlag, mName,
"dt", &mDeltaTime, mDeltaTime);
284 parameters()->ioParamValue(ioFlag, mName,
"stopTime", &mStopTime, mStopTime);
288 parameters()->ioParamValue(
289 ioFlag, mName,
"progressInterval", &mProgressInterval, mProgressInterval);
293 parameters()->ioParamValue(
294 ioFlag, mName,
"writeProgressToErr", &mWriteProgressToErr, mWriteProgressToErr);
298 parameters()->ioParamString(
299 ioFlag, mName,
"printParamsFilename", &mPrintParamsFilename,
"pv.params");
300 if (mPrintParamsFilename ==
nullptr || mPrintParamsFilename[0] ==
'\0') {
301 if (mCheckpointer->getMPIBlock()->
getRank() == 0) {
302 ErrorLog().printf(
"printParamsFilename cannot be null or the empty string.\n");
304 MPI_Barrier(mCheckpointer->getMPIBlock()->
getComm());
316 if (mParams->
present(mName,
"randomSeed")) {
317 mRandomSeed = (
unsigned long)mParams->
value(mName,
"randomSeed");
320 mRandomSeed = seedRandomFromWallClock();
323 if (mRandomSeed < RandomSeed::minSeed) {
325 "Error: random seed %u is too small. Use a seed of at " 330 case PARAMS_IO_WRITE: parameters()->writeParam(
"randomSeed", mRandomSeed);
break;
331 default: assert(0);
break;
336 parameters()->ioParamValueRequired(ioFlag, mName,
"nx", &mNumXGlobal);
340 parameters()->ioParamValueRequired(ioFlag, mName,
"ny", &mNumYGlobal);
344 parameters()->ioParamValue(ioFlag, mName,
"nbatch", &mNumBatchGlobal, mNumBatchGlobal);
348 mNumBatchGlobal % mCommunicator->numCommBatches() != 0,
349 "The total number of batches (%d) must be a multiple of the batch " 352 mCommunicator->numCommBatches());
353 mNumBatch = mNumBatchGlobal / mCommunicator->numCommBatches();
357 parameters()->ioParamValue(
358 ioFlag, mName,
"errorOnNotANumber", &mErrorOnNotANumber, mErrorOnNotANumber);
376 pvAssert(mPrintParamsFilename && mPrintParamsFilename[0]);
377 if (mPrintParamsFilename[0] !=
'/') {
378 std::string printParamsFilename(mPrintParamsFilename);
380 processParams(printParamsPath.c_str());
385 processParams(mPrintParamsFilename);
394 std::string
const &gpu_devices = mPVInitObj->getStringArgument(
"GPUDevices");
395 initializeCUDA(gpu_devices);
400 MPI_Barrier(mCommunicator->globalCommunicator());
401 if (thread_status != PV_SUCCESS) {
405 #ifdef PV_USE_OPENMP_THREADS 406 pvAssert(mNumThreads > 0);
408 omp_set_num_threads(mNumThreads);
409 #endif // PV_USE_OPENMP_THREADS 411 notifyLoop(std::make_shared<AllocateDataMessage>());
413 notifyLoop(std::make_shared<LayerSetMaxPhaseMessage>(&mNumPhases));
416 mPhaseRecvTimers.clear();
417 for (
int phase = 0; phase < mNumPhases; phase++) {
418 std::string timerTypeString(
"phRecv");
419 timerTypeString.append(std::to_string(phase));
420 Timer *phaseRecvTimer =
new Timer(mName,
"column", timerTypeString.c_str());
421 mPhaseRecvTimers.push_back(phaseRecvTimer);
422 mCheckpointer->registerTimer(phaseRecvTimer);
428 InfoLog().printf(
"[%d]: HyPerCol: running...\n", mCommunicator->globalCommRank());
437 notifyLoop(std::make_shared<InitializeStateMessage>());
438 if (mCheckpointReadFlag) {
439 mCheckpointer->checkpointRead(&mSimTime, &mCurrentStep);
442 mCheckpointer->readStateFromCheckpoint();
450 notifyLoop(std::make_shared<CopyInitialStateToGPUMessage>());
451 #endif // PV_USE_CUDA 455 notifyLoop(std::make_shared<ConnectionNormalizeMessage>());
456 notifyLoop(std::make_shared<ConnectionFinalizeUpdateMessage>(mSimTime, mDeltaTime));
459 for (
int phase = 0; phase < mNumPhases; phase++) {
460 notifyLoop(std::make_shared<LayerPublishMessage>(phase, mSimTime));
464 if (!mCheckpointReadFlag) {
465 notifyLoop(std::make_shared<ConnectionOutputMessage>(mSimTime, mDeltaTime));
466 for (
int phase = 0; phase < mNumPhases; phase++) {
467 notifyLoop(std::make_shared<LayerOutputStateMessage>(phase, mSimTime));
474 int HyPerCol::run(
double stopTime,
double dt) {
475 mStopTime = stopTime;
479 getOutputStream().flush();
481 bool dryRunFlag = mPVInitObj->getBooleanArgument(
"DryRun");
488 runClock.start_clock();
491 advanceTimeLoop(runClock, 10 );
493 notifyLoop(std::make_shared<CleanupMessage>());
496 InfoLog().printf(
"[%d]: HyPerCol: done...\n", mCommunicator->globalCommRank());
500 mCheckpointer->finalCheckpoint(mSimTime);
503 runClock.stop_clock();
504 if (getCommunicator()->globalCommRank() == 0) {
505 runClock.print_elapsed(getOutputStream());
517 bool printMsgs0 = printMessagesFlag && globalRank() == 0;
518 int thread_status = PV_SUCCESS;
520 #ifdef PV_USE_OPENMP_THREADS 522 int comm_size = mCommunicator->globalCommSize();
525 "Maximum number of OpenMP threads%s is %d\n" 526 "Number of MPI processes is %d.\n",
527 comm_size == 1 ?
"" :
" (over all processes)",
532 if (numThreadsArg.mUseDefault) {
533 num_threads = max_threads / comm_size;
534 if (num_threads == 0) {
538 "Warning: more MPI processes than available threads. " 539 "Processors may be oversubscribed.\n");
544 num_threads = numThreadsArg.mValue;
546 if (num_threads > 0) {
548 InfoLog().printf(
"Number of threads used is %d\n", num_threads);
551 else if (num_threads == 0) {
552 thread_status = PV_FAILURE;
555 "%s: number of threads must be positive (was set to zero)\n",
560 assert(num_threads < 0);
561 thread_status = PV_FAILURE;
564 "%s was compiled with PV_USE_OPENMP_THREADS; " 565 "therefore the \"-t\" argument is " 570 #else // PV_USE_OPENMP_THREADS 572 if (numThreadsArg.mUseDefault) {
575 InfoLog().printf(
"Number of threads used is 1 (Compiled without OpenMP.\n");
579 num_threads = numThreadsArg.mValue;
580 if (num_threads < 0) {
583 if (num_threads != 1) {
584 thread_status = PV_FAILURE;
588 if (thread_status != PV_SUCCESS) {
590 "%s error: PetaVision must be compiled with " 591 "OpenMP to run with threads.\n",
595 #endif // PV_USE_OPENMP_THREADS 596 mNumThreads = num_threads;
597 return thread_status;
600 int HyPerCol::processParams(
char const *path) {
601 if (!mParamsProcessedFlag) {
602 auto const &objectMap = mObjectHierarchy.getObjectMap();
603 notifyLoop(std::make_shared<CommunicateInitInfoMessage>(objectMap));
608 parameters()->warnUnread();
609 if (path !=
nullptr && path[0] !=
'\0') {
613 if (globalRank() == 0) {
615 "HyPerCol \"%s\": path for printing parameters file was " 620 mParamsProcessedFlag =
true;
624 void HyPerCol::advanceTimeLoop(
Clock &runClock,
int const runClockStartingStep) {
628 while (mSimTime < mStopTime - mDeltaTime / 2.0) {
629 mCheckpointer->checkpointWrite(mSimTime);
630 advanceTime(mSimTime);
634 if (step == runClockStartingStep) {
635 runClock.start_clock();
642 int HyPerCol::advanceTime(
double sim_time) {
643 if (mSimTime >= mNextProgressTime) {
644 mNextProgressTime += mProgressInterval;
645 if (mCommunicator->globalCommRank() == 0) {
646 std::ostream &progressStream = mWriteProgressToErr ? getErrorStream() : getOutputStream();
649 progressStream <<
" time==" << sim_time <<
" " 650 << ctime(¤t_time);
651 progressStream.flush();
662 mSimTime = sim_time + mDeltaTime;
664 notifyLoop(std::make_shared<AdaptTimestepMessage>());
670 int status = PV_SUCCESS;
673 for (
int phase = 0; phase < mNumPhases; phase++) {
674 notifyLoop(std::make_shared<LayerClearProgressFlagsMessage>());
677 bool someLayerIsPending =
false;
678 bool someLayerHasActed =
false;
682 auto recvMessage = std::make_shared<LayerRecvSynapticInputMessage>(
684 mPhaseRecvTimers.at(phase),
690 auto updateMessage = std::make_shared<LayerUpdateStateMessage>(
698 nonblockingLayerUpdate(recvMessage, updateMessage);
700 recvMessage = std::make_shared<LayerRecvSynapticInputMessage>(
702 mPhaseRecvTimers.at(phase),
708 updateMessage = std::make_shared<LayerUpdateStateMessage>(
716 nonblockingLayerUpdate(recvMessage, updateMessage);
718 if (getDevice() !=
nullptr) {
719 getDevice()->syncDevice();
723 nonblockingLayerUpdate(
724 std::make_shared<LayerUpdateStateMessage>(
731 &someLayerHasActed));
733 if (getDevice() !=
nullptr) {
734 getDevice()->syncDevice();
735 notifyLoop(std::make_shared<LayerCopyFromGpuMessage>(phase, mPhaseRecvTimers.at(phase)));
739 nonblockingLayerUpdate(
740 std::make_shared<LayerUpdateStateMessage>(
747 &someLayerHasActed));
749 auto recvMessage = std::make_shared<LayerRecvSynapticInputMessage>(
751 mPhaseRecvTimers.at(phase),
756 auto updateMessage = std::make_shared<LayerUpdateStateMessage>(
757 phase, mSimTime, mDeltaTime, &someLayerIsPending, &someLayerHasActed);
758 nonblockingLayerUpdate(recvMessage, updateMessage);
761 notifyLoop(std::make_shared<LayerAdvanceDataStoreMessage>(phase));
764 notifyLoop(std::make_shared<LayerPublishMessage>(phase, mSimTime));
768 notifyLoop(std::make_shared<LayerOutputStateMessage>(phase, mSimTime));
769 if (mErrorOnNotANumber) {
770 notifyLoop(std::make_shared<LayerCheckNotANumberMessage>(phase));
776 notifyLoop(std::make_shared<ConnectionUpdateMessage>(mSimTime, mDeltaTime));
777 notifyLoop(std::make_shared<ConnectionNormalizeMessage>());
778 notifyLoop(std::make_shared<ConnectionFinalizeUpdateMessage>(mSimTime, mDeltaTime));
779 notifyLoop(std::make_shared<ConnectionOutputMessage>(mSimTime, mDeltaTime));
783 notifyLoop(std::make_shared<ColProbeOutputStateMessage>(mSimTime, mDeltaTime));
788 void HyPerCol::nonblockingLayerUpdate(
789 std::shared_ptr<LayerUpdateStateMessage const> updateMessage) {
791 *(updateMessage->mSomeLayerIsPending) =
true;
792 *(updateMessage->mSomeLayerHasActed) =
false;
794 long int idleCounter = 0;
795 while (*(updateMessage->mSomeLayerIsPending)) {
796 *(updateMessage->mSomeLayerIsPending) =
false;
797 *(updateMessage->mSomeLayerHasActed) =
false;
798 notifyLoop(updateMessage);
800 if (!*(updateMessage->mSomeLayerHasActed)) {
805 if (idleCounter > 1L) {
806 InfoLog() <<
"t = " << mSimTime <<
", phase " << updateMessage->mPhase
808 <<
", recvGpu" << updateMessage->mRecvOnGpuFlag <<
", updateGpu" 809 << updateMessage->mUpdateOnGpuFlag
810 #endif // PV_USE_CUDA 811 <<
", idle count " << idleCounter <<
"\n";
815 void HyPerCol::nonblockingLayerUpdate(
816 std::shared_ptr<LayerRecvSynapticInputMessage const> recvMessage,
817 std::shared_ptr<LayerUpdateStateMessage const> updateMessage) {
819 pvAssert(recvMessage->mSomeLayerIsPending == updateMessage->mSomeLayerIsPending);
820 pvAssert(recvMessage->mSomeLayerHasActed == updateMessage->mSomeLayerHasActed);
822 *(updateMessage->mSomeLayerIsPending) =
true;
823 *(updateMessage->mSomeLayerHasActed) =
false;
825 long int idleCounter = 0;
826 while (*(recvMessage->mSomeLayerIsPending)) {
827 *(updateMessage->mSomeLayerIsPending) =
false;
828 *(updateMessage->mSomeLayerHasActed) =
false;
829 notifyLoop(recvMessage);
830 notifyLoop(updateMessage);
832 if (!*(updateMessage->mSomeLayerHasActed)) {
837 if (idleCounter > 1L) {
838 InfoLog() <<
"t = " << mSimTime <<
", phase " << updateMessage->mPhase
840 <<
", recvGpu" << updateMessage->mRecvOnGpuFlag <<
", updateGpu" 841 << updateMessage->mUpdateOnGpuFlag
842 #endif // PV_USE_CUDA 843 <<
", idle count " << idleCounter <<
"\n";
847 Response::Status HyPerCol::respond(std::shared_ptr<BaseMessage const> message) {
848 if (
auto castMessage = std::dynamic_pointer_cast<PrepareCheckpointWriteMessage const>(message)) {
849 return respondPrepareCheckpointWrite(castMessage);
852 return Response::SUCCESS;
856 Response::Status HyPerCol::respondPrepareCheckpointWrite(
857 std::shared_ptr<PrepareCheckpointWriteMessage const> message) {
858 std::string path(message->mDirectory);
859 path.append(
"/").append(
"pv.params");
860 outputParams(path.c_str());
861 return Response::SUCCESS;
864 void HyPerCol::outputParams(
char const *path) {
865 assert(path !=
nullptr && path[0] !=
'\0');
866 int rank = mCheckpointer->getMPIBlock()->
getRank();
867 assert(mPrintParamsStream ==
nullptr);
868 char *tmp = strdup(path);
869 if (tmp ==
nullptr) {
870 Fatal().printf(
"HyPerCol::outputParams unable to allocate memory: %s\n", strerror(errno));
872 char *containingdir = dirname(tmp);
873 ensureDirExists(mCheckpointer->getMPIBlock(), containingdir);
876 mPrintParamsStream =
new FileStream(path, std::ios_base::out, getVerifyWrites());
878 std::string luaPath(path);
879 luaPath.append(
".lua");
880 char luapath[PV_PATH_MAX];
881 mLuaPrintParamsStream =
882 new FileStream(luaPath.c_str(), std::ios_base::out, getVerifyWrites());
883 parameters()->setPrintParamsStream(mPrintParamsStream);
884 parameters()->setPrintLuaStream(mLuaPrintParamsStream);
887 outputParamsHeadComments(mPrintParamsStream,
"//");
890 outputParamsHeadComments(mLuaPrintParamsStream,
"--");
892 mLuaPrintParamsStream->printf(
893 "package.path = package.path .. \";\" .. \"" PV_DIR
"/../parameterWrapper/?.lua\"\n");
894 mLuaPrintParamsStream->printf(
"local pv = require \"PVModule\"\n\n");
895 mLuaPrintParamsStream->printf(
896 "NULL = function() end; -- to allow string parameters to be set to NULL\n\n");
897 mLuaPrintParamsStream->printf(
"-- Base table variable to store\n");
898 mLuaPrintParamsStream->printf(
"local pvParameters = {\n");
902 ioParams(PARAMS_IO_WRITE);
907 notifyLoop(std::make_shared<LayerWriteParamsMessage>());
908 notifyLoop(std::make_shared<ConnectionWriteParamsMessage>());
909 notifyLoop(std::make_shared<ColProbeWriteParamsMessage>());
910 notifyLoop(std::make_shared<LayerProbeWriteParamsMessage>());
911 notifyLoop(std::make_shared<ConnectionProbeWriteParamsMessage>());
914 mLuaPrintParamsStream->printf(
"} --End of pvParameters\n");
915 mLuaPrintParamsStream->printf(
916 "\n-- Print out PetaVision approved parameter file to the console\n");
917 mLuaPrintParamsStream->printf(
"paramsFileString = pv.createParamsFileString(pvParameters)\n");
918 mLuaPrintParamsStream->printf(
"io.write(paramsFileString)\n");
921 if (mPrintParamsStream) {
922 delete mPrintParamsStream;
923 mPrintParamsStream =
nullptr;
924 parameters()->setPrintParamsStream(mPrintParamsStream);
926 if (mLuaPrintParamsStream) {
927 delete mLuaPrintParamsStream;
928 mLuaPrintParamsStream =
nullptr;
929 parameters()->setPrintLuaStream(mLuaPrintParamsStream);
933 void HyPerCol::outputParamsHeadComments(
FileStream *fileStream,
char const *commentToken) {
934 time_t t = time(
nullptr);
935 fileStream->printf(
"%s PetaVision, " PV_GIT_REVISION
"\n", commentToken);
936 fileStream->printf(
"%s Run time %s", commentToken, ctime(&t));
938 MPIBlock const *mpiBlock = mCheckpointer->getMPIBlock();
941 "%s Compiled with Open MPI %d.%d.%d (MPI Standard %d.%d).\n",
945 OMPI_RELEASE_VERSION,
949 "%s MPI configuration has %d rows, %d columns, and batch dimension %d.\n",
958 "%s CheckpointCells have %d rows, %d columns, and batch dimension %d.\n",
965 fileStream->printf(
"%s Compiled without MPI.\n", commentToken);
968 int const cudaMajor = CUDA_VERSION / 1000;
969 int const cudaMinor = (CUDA_VERSION % 1000) / 10;
970 int const cudnnMajor = CUDNN_MAJOR;
971 int const cudnnMinor = CUDNN_MINOR;
972 int const cudnnPatch = CUDNN_PATCHLEVEL;
974 "%s Compiled with CUDA version %d.%d; cuDNN version %d.%d.%d\n",
982 fileStream->printf(
"%s Compiled without CUDA.\n", commentToken);
984 #ifdef PV_USE_OPENMP_THREADS 985 std::string openmpVersion;
987 case 201511: openmpVersion =
"4.5";
break;
988 case 201307: openmpVersion =
"4.0";
break;
989 case 201107: openmpVersion =
"3.1";
break;
990 case 200805: openmpVersion =
"3.0";
break;
991 default: openmpVersion =
"is unrecognized";
break;
994 "%s Compiled with OpenMP parallel code, API version %s (%06d) ",
996 openmpVersion.c_str(),
998 if (mNumThreads > 0) {
999 fileStream->printf(
"and run using %d threads.\n", mNumThreads);
1001 else if (mNumThreads == 0) {
1002 fileStream->printf(
"but number of threads was set to zero (error).\n");
1005 fileStream->printf(
"but the -t option was not specified.\n");
1008 fileStream->printf(
"%s Compiled without OpenMP parallel code ", commentToken);
1009 if (mNumThreads == 1) {
1010 fileStream->printf(
".\n");
1012 else if (mNumThreads == 0) {
1013 fileStream->printf(
"but number of threads was set to zero (error).\n");
1017 "but number of threads specified was %d instead of 1. (error).\n", mNumThreads);
1019 #endif // PV_USE_OPENMP_THREADS 1020 if (mCheckpointReadFlag) {
1022 "%s Started from checkpoint \"%s\"\n",
1024 mCheckpointer->getCheckpointReadDirectory().c_str());
1028 int HyPerCol::getAutoGPUDevice() {
1029 int returnGpuIdx = -1;
1031 int mpiRank = mCommunicator->globalCommRank();
1032 int numMpi = mCommunicator->globalCommSize();
1033 char hostNameStr[PV_PATH_MAX];
1034 gethostname(hostNameStr, PV_PATH_MAX);
1035 size_t hostNameLen = strlen(hostNameStr) + 1;
1041 char rankToHost[numMpi][PV_PATH_MAX];
1044 int rankToMaxGpu[numMpi];
1046 int rankToGpu[numMpi];
1049 for (
int rank = 0; rank < numMpi; rank++) {
1051 strcpy(rankToHost[rank], hostNameStr);
1052 rankToMaxGpu[rank] = PVCuda::CudaDevice::getNumDevices();
1061 mCommunicator->globalCommunicator(),
1064 &(rankToMaxGpu[rank]),
1069 mCommunicator->globalCommunicator(),
1079 std::map<std::string, std::vector<int>> hostMap;
1080 for (
int rank = 0; rank < numMpi; rank++) {
1081 hostMap[std::string(rankToHost[rank])].push_back(rank);
1085 for (
auto &host : hostMap) {
1086 std::vector<int> rankVec = host.second;
1087 int numRanksPerHost = rankVec.size();
1088 assert(numRanksPerHost > 0);
1090 int maxGpus = rankToMaxGpu[rankVec[0]];
1092 if (numRanksPerHost != maxGpus) {
1093 WarnLog(assignGpuWarning);
1094 assignGpuWarning.printf(
1095 "HyPerCol::getAutoGPUDevice: Host \"%s\" (rank[s] ", host.first.c_str());
1096 for (
int v_i = 0; v_i < numRanksPerHost; v_i++) {
1097 if (v_i != numRanksPerHost - 1) {
1098 assignGpuWarning.printf(
"%d, ", rankVec[v_i]);
1101 assignGpuWarning.printf(
"%d", rankVec[v_i]);
1104 assignGpuWarning.printf(
1105 ") is being %s, with %d mpi processes mapped to %d total GPU[s]\n",
1106 numRanksPerHost < maxGpus ?
"underloaded" :
"overloaded",
1112 for (
int v_i = 0; v_i < numRanksPerHost; v_i++) {
1113 rankToGpu[rankVec[v_i]] = v_i % maxGpus;
1118 for (
int rank = 0; rank < numMpi; rank++) {
1119 InfoLog() <<
"Rank " << rank <<
" on host \"" << rankToHost[rank] <<
"\" (" 1120 << rankToMaxGpu[rank] <<
" GPU[s]) using GPU index " << rankToGpu[rank] <<
"\n";
1122 returnGpuIdx = rankToGpu[rank];
1125 MPI_Send(&(rankToGpu[rank]), 1, MPI_INT, rank, 0, mCommunicator->globalCommunicator());
1132 MPI_Send(hostNameStr, hostNameLen, MPI_CHAR, 0, 0, mCommunicator->globalCommunicator());
1134 int maxGpu = PVCuda::CudaDevice::getNumDevices();
1135 MPI_Send(&maxGpu, 1, MPI_INT, 0, 0, mCommunicator->globalCommunicator());
1143 mCommunicator->globalCommunicator(),
1146 assert(returnGpuIdx >= 0 && returnGpuIdx < PVCuda::CudaDevice::getNumDevices());
1151 return returnGpuIdx;
1155 void HyPerCol::initializeCUDA(std::string
const &in_device) {
1157 bool needGPU =
false;
1158 auto &objectMap = mObjectHierarchy.getObjectMap();
1159 for (
auto &obj : objectMap) {
1172 int numMpi = mCommunicator->globalCommSize();
1176 if (in_device.empty()) {
1177 if (getCommunicator()->globalCommRank() == 0) {
1178 InfoLog() <<
"Auto-assigning GPUs\n";
1180 device = getAutoGPUDevice();
1183 std::vector<int> deviceVec;
1184 std::stringstream ss(in_device);
1187 while (std::getline(ss, stoken,
',')) {
1189 for (
auto &ch : stoken) {
1192 "Device specification error: %s contains " 1193 "unrecognized characters. Must be " 1194 "comma separated integers greater or equal to 0 " 1195 "with no other characters " 1196 "allowed (including spaces).\n",
1200 deviceVec.push_back(atoi(stoken.c_str()));
1206 if (deviceVec.size() == 1) {
1207 device = deviceVec[0];
1209 else if (deviceVec.size() >= numMpi) {
1210 device = deviceVec[mCommunicator->globalCommRank()];
1214 "Device specification error: Number of devices " 1215 "specified (%zu) must be either 1 or " 1216 ">= than number of mpi processes (%d).\n",
1220 InfoLog() <<
"Global MPI Process " << mCommunicator->globalCommRank() <<
" using device " 1224 int globalSize = mCommunicator->globalCommSize();
1225 for (
int r = 0; r < globalSize; r++) {
1226 if (r == globalRank()) {
1227 mCudaDevice =
new PVCuda::CudaDevice(device);
1229 MPI_Barrier(mCommunicator->globalCommunicator());
1233 if (globalRank() == 0) {
1234 mCudaDevice->query_device_info();
1238 notifyLoop(std::make_shared<SetCudaDeviceMessage>(mCudaDevice));
1241 int HyPerCol::finalizeCUDA() {
1246 #endif // PV_USE_CUDA 1249 bool succeeded = mObjectHierarchy.addObject(obj->getName(), obj);
1250 FatalIf(!succeeded,
"Adding %s failed.\n", getDescription_c());
1254 auto &objectMap = mObjectHierarchy.getObjectMap();
1255 auto search = objectMap.find(objectName);
1256 return search == objectMap.end() ?
nullptr : search->second;
1260 if (mObjectHierarchy.getObjectVector().empty()) {
1261 if (currentObject !=
nullptr) {
1262 throw std::domain_error(
"HyPerCol::getNextObject called with empty hierarchy");
1269 auto objectVector = mObjectHierarchy.getObjectVector();
1270 if (currentObject ==
nullptr) {
1271 return objectVector[0];
1274 for (
auto iterator = objectVector.begin(); iterator != objectVector.end(); iterator++) {
1276 if (
object == currentObject) {
1278 return iterator == objectVector.end() ?
nullptr : *iterator;
1281 throw std::domain_error(
"HyPerCol::getNextObject argument not in hierarchy");
1286 unsigned int HyPerCol::seedRandomFromWallClock() {
1287 unsigned long t = 0UL;
1289 if (mCommunicator->globalCommRank() == rootproc) {
1290 t = time((time_t *)
nullptr);
1292 MPI_Bcast(&t, 1, MPI_UNSIGNED, rootproc, mCommunicator->globalCommunicator());
int getGlobalNumColumns() const
Observer * getNextObject(Observer const *currentObject) const
virtual void ioParam_writeProgressToErr(enum ParamsIOFlag ioFlag)
writeProgressToErr: Whether to print timestep progress to the error stream instead of the output stre...
int getNumColumns() const
int present(const char *groupName, const char *paramName)
virtual void ioParam_randomSeed(enum ParamsIOFlag ioFlag)
randomSeed: The seed for the random number generator for reproducability
virtual void addObserver(Observer *observer) override
virtual void ioParam_dt(enum ParamsIOFlag ioFlag)
dt: The default delta time to use.
Arguments const * getArguments() const
virtual void ioParam_stopTime(enum ParamsIOFlag ioFlag)
mStopTime: The set stopping time for the run
double value(const char *groupName, const char *paramName)
int setNumThreads(bool printMessagesFlag)
int getGlobalBatchDimension() const
virtual void ioParam_errorOnNotANumber(enum ParamsIOFlag ioFlag)
errorOnNotANumber: Specifies if the run should check on each timestep for nans in activity...
virtual void ioParam_ny(enum ParamsIOFlag ioFlag)
ny: Specifies the size of the column
int getGlobalRank() const
int getBatchDimension() const
int getGlobalNumRows() const
virtual void ioParam_nBatch(enum ParamsIOFlag ioFlag)
ny: Specifies the batch size of the column
virtual void ioParam_nx(enum ParamsIOFlag ioFlag)
nx: Specifies the size of the column
int getMaxThreads() const
virtual void ioParam_progressInterval(enum ParamsIOFlag ioFlag)
mProgressInterval: Specifies how often a progress report prints out
Observer * getObjectFromName(std::string const &objectName) const
virtual void ioParam_printParamsFilename(enum ParamsIOFlag ioFlag)
mPrintParamsFilename: Specifies the output mParams filename.
std::string makeOutputPathFilename(std::string const &path)
char const * getProgramName() const