PetaVision  Alpha
Checkpointer.hpp
1 /*
2  * Checkpointer.hpp
3  *
4  * Created on Sep 28, 2016
5  * Author: Pete Schultz
6  */
7 
8 #ifndef CHECKPOINTER_HPP_
9 #define CHECKPOINTER_HPP_
10 
11 #include "checkpointing/CheckpointEntry.hpp"
12 #include "checkpointing/CheckpointEntryData.hpp"
13 #include "io/PVParams.hpp"
14 // #include "io/io.hpp"
15 #include "observerpattern/Subject.hpp"
16 // #include "structures/MPIBlock.hpp"
17 #include "utils/Timer.hpp"
18 #include <ctime>
19 // #include <map>
20 // #include <memory>
21 // #include <string>
22 
23 namespace PV {
24 
25 class Checkpointer : public Subject {
26  private:
38  virtual void ioParam_verifyWrites(enum ParamsIOFlag ioFlag, PVParams *params);
39 
44  virtual void ioParam_outputPath(enum ParamsIOFlag ioFlag, PVParams *params);
45 
49  void ioParam_checkpointWrite(enum ParamsIOFlag ioFlag, PVParams *params);
50 
56  void ioParam_checkpointWriteDir(enum ParamsIOFlag ioFlag, PVParams *params);
57 
67  void ioParam_checkpointWriteTriggerMode(enum ParamsIOFlag ioFlag, PVParams *params);
68 
73  void ioParam_checkpointWriteStepInterval(enum ParamsIOFlag ioFlag, PVParams *params);
74 
79  void ioParam_checkpointWriteTimeInterval(enum ParamsIOFlag ioFlag, PVParams *params);
80 
87  void ioParam_checkpointWriteClockInterval(enum ParamsIOFlag ioFlag, PVParams *params);
88 
93  void ioParam_checkpointWriteClockUnit(enum ParamsIOFlag ioFlag, PVParams *params);
94 
107  void ioParam_checkpointIndexWidth(enum ParamsIOFlag ioFlag, PVParams *params);
108 
113  void ioParam_suppressNonplasticCheckpoints(enum ParamsIOFlag ioFlag, PVParams *params);
114 
119  void ioParam_deleteOlderCheckpoints(enum ParamsIOFlag ioFlag, PVParams *params);
120 
126  void ioParam_numCheckpointsKept(enum ParamsIOFlag ioFlag, PVParams *params);
127 
133  void ioParam_initializeFromCheckpointDir(enum ParamsIOFlag ioFlag, PVParams *params);
134 
141  void ioParam_lastCheckpointDir(enum ParamsIOFlag ioFlag, PVParams *params);
144  enum CheckpointWriteTriggerMode { NONE, STEP, SIMTIME, WALLCLOCK };
145  enum WallClockUnit { SECOND, MINUTE, HOUR, DAY };
146 
147  public:
148  struct TimeInfo {
149  double mSimTime = 0.0;
150  long int mCurrentCheckpointStep = 0L;
151  };
152  Checkpointer(
153  std::string const &name,
154  MPIBlock const *globalMPIBlock,
155  Arguments const *arguments);
156  ~Checkpointer();
157 
164  std::string makeOutputPathFilename(std::string const &path);
165 
166  void ioParams(enum ParamsIOFlag ioFlag, PVParams *params);
167  void provideFinalStep(long int finalStep);
168 
169  template <typename T>
170  bool registerCheckpointData(
171  std::string const &objName,
172  std::string const &dataName,
173  T *dataPointer,
174  size_t numValues,
175  bool broadcast,
176  bool constantEntireRun);
177 
178  bool registerCheckpointEntry(
179  std::shared_ptr<CheckpointEntry> checkpointEntry,
180  bool constantEntireRun);
181 
182  void registerTimer(Timer const *timer);
183  virtual void addObserver(Observer *observer) override;
184 
185  void readNamedCheckpointEntry(
186  std::string const &objName,
187  std::string const &dataName,
188  bool constantEntireRun);
189  void
190  readNamedCheckpointEntry(std::string const &checkpointEntryName, bool constantEntireRun = false);
191  void readStateFromCheckpoint();
192  void checkpointRead(double *simTimePointer, long int *currentStepPointer);
193  void checkpointWrite(double simTime);
194  void finalCheckpoint(double simTime);
195  void writeTimers(PrintStream &stream) const;
196 
197  MPIBlock const *getMPIBlock() { return mMPIBlock; }
198  bool doesVerifyWrites() { return mVerifyWrites; }
199  std::string const &getOutputPath() { return mOutputPath; }
200  bool getCheckpointWriteFlag() const { return mCheckpointWriteFlag; }
201  char const *getCheckpointWriteDir() const { return mCheckpointWriteDir; }
202  enum CheckpointWriteTriggerMode getCheckpointWriteTriggerMode() const {
203  return mCheckpointWriteTriggerMode;
204  }
205  long int getCheckpointWriteStepInterval() const { return mCheckpointWriteStepInterval; }
206  double getCheckpointWriteSimtimeInterval() const { return mCheckpointWriteSimtimeInterval; }
207  bool getSuppressNonplasticCheckpoints() const { return mSuppressNonplasticCheckpoints; }
208  std::string const &getCheckpointReadDirectory() const { return mCheckpointReadDirectory; }
209  char const *getLastCheckpointDir() const { return mLastCheckpointDir; }
210  char const *getInitializeFromCheckpointDir() const { return mInitializeFromCheckpointDir; }
211  std::string const &getBlockDirectoryName() const { return mBlockDirectoryName; }
212 
213  private:
214  void initMPIBlock(MPIBlock const *globalMPIBlock, Arguments const *arguments);
215  void initBlockDirectoryName();
216  void ioParamsFillGroup(enum ParamsIOFlag ioFlag, PVParams *params);
217 
228  void findWarmStartDirectory();
229  std::string makeCheckpointDirectoryFromCurrentStep();
230 
235  bool receivedSignal();
236 
242  bool scheduledCheckpoint();
243 
249  bool scheduledStep();
250 
256  bool scheduledSimTime();
257 
264  bool scheduledWallclock();
265 
272  void checkpointWriteSignal();
273 
281  void checkpointNow();
282 
288  void checkpointToDirectory(std::string const &checkpointDirectory);
289 
294  void rotateOldCheckpoints(std::string const &newCheckpointDirectory);
295  void writeTimers(std::string const &directory);
296  std::string generateBlockPath(std::string const &baseDirectory);
297  void verifyDirectory(char const *directory, std::string const &description);
298 
299  private:
300  std::string mName;
301  MPIBlock *mMPIBlock = nullptr;
302  std::string mBlockDirectoryName;
303  std::vector<std::shared_ptr<CheckpointEntry>> mCheckpointRegistry; // Needs to be a vector so
304  // that each MPI process
305  // iterates over the entries
306  // in the same order.
307  ObserverTable mObserverTable;
308  TimeInfo mTimeInfo;
309  std::shared_ptr<CheckpointEntryData<TimeInfo>> mTimeInfoCheckpointEntry = nullptr;
310  bool mWarmStart = false;
311  bool mVerifyWrites = true;
312  std::string mOutputPath = "";
313  bool mCheckpointWriteFlag = false;
314  char *mCheckpointWriteDir = nullptr;
315  char *mCheckpointWriteTriggerModeString = nullptr;
316  enum CheckpointWriteTriggerMode mCheckpointWriteTriggerMode = NONE;
317  long int mCheckpointWriteStepInterval = 1L;
318  double mCheckpointWriteSimtimeInterval = 1.0;
319  std::time_t mCheckpointWriteWallclockInterval = 1L;
320  char *mCheckpointWriteWallclockUnit = nullptr;
321  std::time_t mCheckpointWriteWallclockIntervalSeconds = 1L;
322  int mCheckpointIndexWidth = -1;
323  bool mSuppressNonplasticCheckpoints = false;
324  bool mDeleteOlderCheckpoints = false;
325  int mNumCheckpointsKept = 2;
326  char *mLastCheckpointDir = nullptr;
327  char *mInitializeFromCheckpointDir = nullptr;
328  std::string mCheckpointReadDirectory;
329  long int mNextCheckpointStep = 0L; // kept only for consistency with HyPerCol
330  double mNextCheckpointSimtime = 0.0;
331  std::time_t mLastCheckpointWallclock = (std::time_t)0;
332  std::time_t mNextCheckpointWallclock = (std::time_t)0;
333  int mWidthOfFinalStepNumber = 0;
334  int mOldCheckpointDirectoriesIndex =
335  0; // A pointer to the oldest checkpoint in the mOldCheckpointDirectories vector.
336  std::vector<std::string> mOldCheckpointDirectories; // A ring buffer of existing checkpoints,
337  // used if mDeleteOlderCheckpoints is true.
338  std::vector<Timer const *> mTimers;
339  Timer *mCheckpointTimer = nullptr;
340 
341  static std::string const mDefaultOutputPath;
342 };
343 
344 } // namespace PV
345 
346 #include "Checkpointer.tpp"
347 
348 #endif // CHECKPOINTER_HPP_
void ioParam_suppressNonplasticCheckpoints(enum ParamsIOFlag ioFlag, PVParams *params)
void checkpointWriteSignal()
void ioParam_checkpointWriteClockUnit(enum ParamsIOFlag ioFlag, PVParams *params)
checkpointWriteClockInteval: If checkpointWrite on clock, specifies the units used in checkpointWrite...
void rotateOldCheckpoints(std::string const &newCheckpointDirectory)
virtual void addObserver(Observer *observer) override
void ioParam_deleteOlderCheckpoints(enum ParamsIOFlag ioFlag, PVParams *params)
deleteOlderCheckpoints: If checkpointWrite, specifies if the run should delete older checkpoints when...
void ioParam_checkpointWriteTriggerMode(enum ParamsIOFlag ioFlag, PVParams *params)
mCheckpointWriteTriggerMode: If checkpointWrite is set, specifies the method to checkpoint.
virtual void ioParam_verifyWrites(enum ParamsIOFlag ioFlag, PVParams *params)
verifyWrites: If true, calls to FileStream::write are checked by opening the file in read mode and re...
void ioParam_initializeFromCheckpointDir(enum ParamsIOFlag ioFlag, PVParams *params)
initializeFromCheckpointDir: Sets directory used by Checkpointer::initializeFromCheckpoint(). Layers and connections use this directory if they set their initializeFromCheckpointFlag parameter.
bool scheduledCheckpoint()
virtual void ioParam_outputPath(enum ParamsIOFlag ioFlag, PVParams *params)
mOutputPath: Specifies the absolute or relative output path of the run
void ioParam_checkpointWriteDir(enum ParamsIOFlag ioFlag, PVParams *params)
checkpointWriteDir: If checkpointWrite is set, specifies the output checkpoint directory.
void ioParam_checkpointWriteStepInterval(enum ParamsIOFlag ioFlag, PVParams *params)
checkpointWriteStepInterval: If checkpointWrite on step, specifies the number of steps between checkp...
void checkpointToDirectory(std::string const &checkpointDirectory)
void ioParam_lastCheckpointDir(enum ParamsIOFlag ioFlag, PVParams *params)
lastCheckpointDir: If checkpointWrite is not set, this required parameter specifies the directory to ...
void ioParam_checkpointIndexWidth(enum ParamsIOFlag ioFlag, PVParams *params)
If checkpointWrite is true, checkpointIndexWidth specifies the minimum width for the step number appe...
void ioParam_checkpointWrite(enum ParamsIOFlag ioFlag, PVParams *params)
checkpointWrite: Flag to determine if the run writes checkpoints.
void ioParam_numCheckpointsKept(enum ParamsIOFlag ioFlag, PVParams *params)
mNumCheckpointsKept: If mDeleteOlderCheckpoints is set, keep this many checkpoints before deleting th...
void ioParam_checkpointWriteTimeInterval(enum ParamsIOFlag ioFlag, PVParams *params)
checkpointWriteTimeInteval: If checkpointWrite on time, specifies the amount of simulation time betwe...
void extractCheckpointReadDirectory()
void ioParam_checkpointWriteClockInterval(enum ParamsIOFlag ioFlag, PVParams *params)
checkpointWriteClockInteval: If checkpointWrite on clock, specifies the amount of clock time between ...
std::string makeOutputPathFilename(std::string const &path)