PetaVision  Alpha
PostsynapticPerspectiveStochasticDelivery.cpp
1 /*
2  * PostsynapticPerspectiveStochasticDelivery.cpp
3  *
4  * Created on: Aug 24, 2017
5  * Author: Pete Schultz
6  */
7 
8 #include "PostsynapticPerspectiveStochasticDelivery.hpp"
9 #include "columns/HyPerCol.hpp"
10 
11 namespace PV {
12 
13 PostsynapticPerspectiveStochasticDelivery::PostsynapticPerspectiveStochasticDelivery(
14  char const *name,
15  HyPerCol *hc) {
16  initialize(name, hc);
17 }
18 
19 PostsynapticPerspectiveStochasticDelivery::PostsynapticPerspectiveStochasticDelivery() {}
20 
21 PostsynapticPerspectiveStochasticDelivery::~PostsynapticPerspectiveStochasticDelivery() {
22  delete mRandState;
23 }
24 
25 int PostsynapticPerspectiveStochasticDelivery::initialize(char const *name, HyPerCol *hc) {
26  return BaseObject::initialize(name, hc);
27 }
28 
29 void PostsynapticPerspectiveStochasticDelivery::setObjectType() {
30  mObjectType = "PostsynapticPerspectiveStochasticDelivery";
31 }
32 
34  int status = HyPerDelivery::ioParamsFillGroup(ioFlag);
35  return status;
36 }
37 
39  mReceiveGpu = false; // If it's true, we should be using a different class.
40 }
41 
42 Response::Status PostsynapticPerspectiveStochasticDelivery::communicateInitInfo(
43  std::shared_ptr<CommunicateInitInfoMessage const> message) {
44  auto status = HyPerDelivery::communicateInitInfo(message);
45  if (!Response::completed(status)) {
46  return status;
47  }
48  // HyPerDelivery::communicateInitInfo() postpones until mWeightsPair communicates.
49  pvAssert(mWeightsPair and mWeightsPair->getInitInfoCommunicatedFlag());
50  if (!mWeightsPair->getInitInfoCommunicatedFlag()) {
51  return Response::POSTPONE;
52  }
53  mWeightsPair->needPost();
54  return Response::SUCCESS;
55 }
56 
57 Response::Status PostsynapticPerspectiveStochasticDelivery::allocateDataStructures() {
58  auto status = HyPerDelivery::allocateDataStructures();
59  if (!Response::completed(status)) {
60  return status;
61  }
62  mRandState = new Random(mPostLayer->getLayerLoc(), false /*restricted, not extended*/);
63  return Response::SUCCESS;
64 }
65 
67  // Check if we need to update based on connection's channel
68  if (getChannelCode() == CHANNEL_NOUPDATE) {
69  return;
70  }
71  float *postChannel = mPostLayer->getChannel(getChannelCode());
72  pvAssert(postChannel);
73 
74  int numAxonalArbors = mArborList->getNumAxonalArbors();
75  for (int arbor = 0; arbor < numAxonalArbors; arbor++) {
76  int delay = mArborList->getDelay(arbor);
77  PVLayerCube activityCube = mPreLayer->getPublisher()->createCube(delay);
78 
79  // Get number of neurons restricted target
80  const int numPostRestricted = mPostLayer->getNumNeurons();
81 
82  const PVLayerLoc *sourceLoc = mPreLayer->getLayerLoc();
83  const PVLayerLoc *targetLoc = mPostLayer->getLayerLoc();
84 
85  const int sourceNx = sourceLoc->nx;
86  const int sourceNy = sourceLoc->ny;
87  const int sourceNf = sourceLoc->nf;
88  const int targetNx = targetLoc->nx;
89  const int targetNy = targetLoc->ny;
90  const int targetNf = targetLoc->nf;
91  const int nbatch = targetLoc->nbatch;
92 
93  const PVHalo *sourceHalo = &sourceLoc->halo;
94  const PVHalo *targetHalo = &targetLoc->halo;
95 
96  // get source layer's extended y stride
97  int sy = (sourceNx + sourceHalo->lt + sourceHalo->rt) * sourceNf;
98 
99  // The start of the gsyn buffer
100  float *gSynPatchHead = mPostLayer->getChannel(getChannelCode());
101 
102  // Get source layer's patch y stride
103  Weights *postWeights = mWeightsPair->getPostWeights();
104  int syp = postWeights->getPatchStrideY();
105  int yPatchSize = postWeights->getPatchSizeY();
106  int numPerStride = postWeights->getPatchSizeX() * postWeights->getPatchSizeF();
107  int neuronIndexStride = targetNf < 4 ? 1 : targetNf / 4;
108 
109  for (int b = 0; b < nbatch; b++) {
110  int sourceNxExt = sourceNx + sourceHalo->rt + sourceHalo->lt;
111  int sourceNyExt = sourceNy + sourceHalo->dn + sourceHalo->up;
112  int sourceNumExtended = sourceNxExt * sourceNyExt * sourceNf;
113 
114  float *activityBatch = activityCube.data + b * sourceNumExtended;
115  float *gSynPatchHeadBatch = gSynPatchHead + b * numPostRestricted;
116 
117  // Iterate over each line in the y axis, the goal is to keep weights in the cache
118  for (int ky = 0; ky < yPatchSize; ky++) {
119 // Threading over feature was the important change that improved cache performance by
120 // 5-10x. dynamic scheduling also gave another performance increase over static.
121 #ifdef PV_USE_OPENMP_THREADS
122 #pragma omp parallel for schedule(static)
123 #endif
124  for (int feature = 0; feature < neuronIndexStride; feature++) {
125  for (int idx = feature; idx < numPostRestricted; idx += neuronIndexStride) {
126  float *gSyn = gSynPatchHeadBatch + idx;
127  taus_uint4 *rng = mRandState->getRNG(idx);
128 
129  int idxExtended = kIndexExtended(
130  idx,
131  targetNx,
132  targetNy,
133  targetNf,
134  targetHalo->lt,
135  targetHalo->rt,
136  targetHalo->dn,
137  targetHalo->up);
138  int startSourceExt = postWeights->getGeometry()->getUnshrunkenStart(idxExtended);
139  float *a = activityBatch + startSourceExt + ky * sy;
140 
141  int kTargetExt = kIndexExtended(
142  idx,
143  targetNx,
144  targetNy,
145  targetNf,
146  targetHalo->lt,
147  targetHalo->rt,
148  targetHalo->dn,
149  targetHalo->up);
150  float *weightBuf = postWeights->getDataFromPatchIndex(arbor, kTargetExt);
151  float *weightValues = weightBuf + ky * syp;
152 
153  float dv = 0.0f;
154  for (int k = 0; k < numPerStride; ++k) {
155  *rng = cl_random_get(*rng);
156  double p = (double)rng->s0 / cl_random_max(); // 0.0 < p < 1.0
157  dv += (p < (double)(a[k] * mDeltaTimeFactor)) * weightValues[k];
158  }
159  *gSyn += dv;
160  }
161  }
162  }
163  }
164  }
165 #ifdef PV_USE_CUDA
166  // CPU updated GSyn, now need to update GSyn on GPU
167  mPostLayer->setUpdatedDeviceGSynFlag(true);
168 #endif // PV_USE_CUDA
169 }
170 
171 void PostsynapticPerspectiveStochasticDelivery::deliverUnitInput(float *recvBuffer) {
172  // Get number of neurons restricted target
173  const int numPostRestricted = mPostLayer->getNumNeurons();
174 
175  const PVLayerLoc *targetLoc = mPostLayer->getLayerLoc();
176 
177  const int targetNx = targetLoc->nx;
178  const int targetNy = targetLoc->ny;
179  const int targetNf = targetLoc->nf;
180  const int nbatch = targetLoc->nbatch;
181 
182  const PVHalo *targetHalo = &targetLoc->halo;
183 
184  // Get source layer's patch y stride
185  Weights *postWeights = mWeightsPair->getPostWeights();
186  int syp = postWeights->getPatchStrideY();
187  int yPatchSize = postWeights->getPatchSizeY();
188  int numPerStride = postWeights->getPatchSizeX() * postWeights->getPatchSizeF();
189  int neuronIndexStride = targetNf < 4 ? 1 : targetNf / 4;
190 
191  int numAxonalArbors = mArborList->getNumAxonalArbors();
192  for (int arbor = 0; arbor < numAxonalArbors; arbor++) {
193  for (int b = 0; b < nbatch; b++) {
194  float *recvBatch = recvBuffer + b * numPostRestricted;
195 
196  // Iterate over each line in the y axis, the goal is to keep weights in the cache
197  for (int ky = 0; ky < yPatchSize; ky++) {
198 // Threading over feature was the important change that improved cache performance by
199 // 5-10x. dynamic scheduling also gave another performance increase over static.
200 #ifdef PV_USE_OPENMP_THREADS
201 #pragma omp parallel for schedule(static)
202 #endif
203  for (int feature = 0; feature < neuronIndexStride; feature++) {
204  for (int idx = feature; idx < numPostRestricted; idx += neuronIndexStride) {
205  float *recvLocation = recvBatch + idx;
206  taus_uint4 *rng = mRandState->getRNG(idx);
207 
208  int kTargetExt = kIndexExtended(
209  idx,
210  targetNx,
211  targetNy,
212  targetNf,
213  targetHalo->lt,
214  targetHalo->rt,
215  targetHalo->dn,
216  targetHalo->up);
217  float *weightBuf = postWeights->getDataFromPatchIndex(arbor, kTargetExt);
218  float *weightValues = weightBuf + ky * syp;
219 
220  float dv = 0.0f;
221  for (int k = 0; k < numPerStride; ++k) {
222  *rng = cl_random_get(*rng);
223  double p = (double)rng->s0 / cl_random_max(); // 0.0 < p < 1.0
224  dv += (p < (double)mDeltaTimeFactor) * weightValues[k];
225  }
226  *recvLocation += mDeltaTimeFactor * dv;
227  }
228  }
229  }
230  }
231  }
232 }
233 
234 } // end namespace PV
int getPatchSizeX() const
Definition: Weights.hpp:219
PVLayerCube createCube(int delay=0)
Definition: Publisher.cpp:60
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
static bool completed(Status &a)
Definition: Response.hpp:49
int getPatchSizeY() const
Definition: Weights.hpp:222
std::shared_ptr< PatchGeometry > getGeometry() const
Definition: Weights.hpp:148
int getPatchStrideY() const
Definition: Weights.hpp:248
int getNumAxonalArbors() const
Definition: ArborList.hpp:52
virtual void ioParam_receiveGpu(enum ParamsIOFlag ioFlag) override
receiveGpu: PostsynapticPerspectiveStochasticDelivery always sets receiveGpu to false. The receiveGpu=true case is handled by the PostsynapticPerspectiveGPUDelivery class.
float * getDataFromPatchIndex(int arbor, int patchIndex)
Definition: Weights.cpp:205
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
int getPatchSizeF() const
Definition: Weights.hpp:225
bool getInitInfoCommunicatedFlag() const
Definition: BaseObject.hpp:95