PetaVision  Alpha
PostsynapticPerspectiveConvolveDelivery.cpp
1 /*
2  * PostsynapticPerspectiveConvolveDelivery.cpp
3  *
4  * Created on: Aug 24, 2017
5  * Author: Pete Schultz
6  */
7 
8 #include "PostsynapticPerspectiveConvolveDelivery.hpp"
9 #include "columns/HyPerCol.hpp"
10 
11 namespace PV {
12 
13 PostsynapticPerspectiveConvolveDelivery::PostsynapticPerspectiveConvolveDelivery(
14  char const *name,
15  HyPerCol *hc) {
16  initialize(name, hc);
17 }
18 
19 PostsynapticPerspectiveConvolveDelivery::PostsynapticPerspectiveConvolveDelivery() {}
20 
21 PostsynapticPerspectiveConvolveDelivery::~PostsynapticPerspectiveConvolveDelivery() {}
22 
23 int PostsynapticPerspectiveConvolveDelivery::initialize(char const *name, HyPerCol *hc) {
24  return BaseObject::initialize(name, hc);
25 }
26 
27 void PostsynapticPerspectiveConvolveDelivery::setObjectType() {
28  mObjectType = "PostsynapticPerspectiveConvolveDelivery";
29 }
30 
32  int status = HyPerDelivery::ioParamsFillGroup(ioFlag);
33  return status;
34 }
35 
37  mReceiveGpu = false; // If it's true, we should be using a different class.
38 }
39 
40 Response::Status PostsynapticPerspectiveConvolveDelivery::communicateInitInfo(
41  std::shared_ptr<CommunicateInitInfoMessage const> message) {
42  auto status = HyPerDelivery::communicateInitInfo(message);
43  if (!Response::completed(status)) {
44  return status;
45  }
46  // HyPerDelivery::communicateInitInfo() postpones until mWeightsPair communicates.
47  pvAssert(mWeightsPair and mWeightsPair->getInitInfoCommunicatedFlag());
48  if (!mWeightsPair->getInitInfoCommunicatedFlag()) {
49  return Response::POSTPONE;
50  }
51  mWeightsPair->needPost();
52  return Response::SUCCESS;
53 }
54 
55 Response::Status PostsynapticPerspectiveConvolveDelivery::allocateDataStructures() {
56  auto status = HyPerDelivery::allocateDataStructures();
57  return status;
58 }
59 
61  // Check if we need to update based on connection's channel
62  if (getChannelCode() == CHANNEL_NOUPDATE) {
63  return;
64  }
65  float *postChannel = mPostLayer->getChannel(getChannelCode());
66  pvAssert(postChannel);
67 
68  int numAxonalArbors = mArborList->getNumAxonalArbors();
69  for (int arbor = 0; arbor < numAxonalArbors; arbor++) {
70  int delay = mArborList->getDelay(arbor);
71  PVLayerCube activityCube = mPreLayer->getPublisher()->createCube(delay);
72 
73  // Get number of neurons restricted target
74  const int numPostRestricted = mPostLayer->getNumNeurons();
75 
76  const PVLayerLoc *sourceLoc = mPreLayer->getLayerLoc();
77  const PVLayerLoc *targetLoc = mPostLayer->getLayerLoc();
78 
79  const int sourceNx = sourceLoc->nx;
80  const int sourceNy = sourceLoc->ny;
81  const int sourceNf = sourceLoc->nf;
82  const int targetNx = targetLoc->nx;
83  const int targetNy = targetLoc->ny;
84  const int targetNf = targetLoc->nf;
85  const int nbatch = targetLoc->nbatch;
86 
87  const PVHalo *sourceHalo = &sourceLoc->halo;
88  const PVHalo *targetHalo = &targetLoc->halo;
89 
90  // get source layer's extended y stride
91  int sy = (sourceNx + sourceHalo->lt + sourceHalo->rt) * sourceNf;
92 
93  // The start of the gsyn buffer
94  float *gSynPatchHead = mPostLayer->getChannel(getChannelCode());
95 
96  // Get source layer's patch y stride
97  Weights *postWeights = mWeightsPair->getPostWeights();
98  int syp = postWeights->getPatchStrideY();
99  int yPatchSize = postWeights->getPatchSizeY();
100  int numPerStride = postWeights->getPatchSizeX() * postWeights->getPatchSizeF();
101  int neuronIndexStride = targetNf < 4 ? 1 : targetNf / 4;
102 
103  for (int b = 0; b < nbatch; b++) {
104  int sourceNxExt = sourceNx + sourceHalo->rt + sourceHalo->lt;
105  int sourceNyExt = sourceNy + sourceHalo->dn + sourceHalo->up;
106  int sourceNumExtended = sourceNxExt * sourceNyExt * sourceNf;
107 
108  float *activityBatch = activityCube.data + b * sourceNumExtended;
109  float *gSynPatchHeadBatch = gSynPatchHead + b * numPostRestricted;
110 
111  // Iterate over each line in the y axis, the goal is to keep weights in the cache
112  for (int ky = 0; ky < yPatchSize; ky++) {
113 // Threading over feature was the important change that improved cache performance by
114 // 5-10x. dynamic scheduling also gave another performance increase over static.
115 #ifdef PV_USE_OPENMP_THREADS
116 #pragma omp parallel for schedule(static)
117 #endif
118  for (int feature = 0; feature < neuronIndexStride; feature++) {
119  for (int idx = feature; idx < numPostRestricted; idx += neuronIndexStride) {
120  float *gSyn = gSynPatchHeadBatch + idx;
121 
122  int idxExtended = kIndexExtended(
123  idx,
124  targetNx,
125  targetNy,
126  targetNf,
127  targetHalo->lt,
128  targetHalo->rt,
129  targetHalo->dn,
130  targetHalo->up);
131  int startSourceExt = postWeights->getGeometry()->getUnshrunkenStart(idxExtended);
132  float *a = activityBatch + startSourceExt + ky * sy;
133 
134  int kTargetExt = kIndexExtended(
135  idx,
136  targetNx,
137  targetNy,
138  targetNf,
139  targetHalo->lt,
140  targetHalo->rt,
141  targetHalo->dn,
142  targetHalo->up);
143  float *weightBuf = postWeights->getDataFromPatchIndex(arbor, kTargetExt);
144  float *weightValues = weightBuf + ky * syp;
145 
146  float dv = 0.0f;
147  for (int k = 0; k < numPerStride; ++k) {
148  dv += a[k] * weightValues[k];
149  }
150  *gSyn += mDeltaTimeFactor * dv;
151  }
152  }
153  }
154  }
155  }
156 #ifdef PV_USE_CUDA
157  // CPU updated GSyn, now need to update GSyn on GPU
158  mPostLayer->setUpdatedDeviceGSynFlag(true);
159 #endif // PV_USE_CUDA
160 }
161 
162 void PostsynapticPerspectiveConvolveDelivery::deliverUnitInput(
163 
164  float *recvBuffer) {
165  // Get number of neurons restricted target
166  const int numPostRestricted = mPostLayer->getNumNeurons();
167 
168  const PVLayerLoc *targetLoc = mPostLayer->getLayerLoc();
169 
170  const int targetNx = targetLoc->nx;
171  const int targetNy = targetLoc->ny;
172  const int targetNf = targetLoc->nf;
173  const int nbatch = targetLoc->nbatch;
174 
175  const PVHalo *targetHalo = &targetLoc->halo;
176 
177  // Get source layer's patch y stride
178  Weights *postWeights = mWeightsPair->getPostWeights();
179  int syp = postWeights->getPatchStrideY();
180  int yPatchSize = postWeights->getPatchSizeY();
181  int numPerStride = postWeights->getPatchSizeX() * postWeights->getPatchSizeF();
182  int neuronIndexStride = targetNf < 4 ? 1 : targetNf / 4;
183 
184  int numAxonalArbors = mArborList->getNumAxonalArbors();
185  for (int arbor = 0; arbor < numAxonalArbors; arbor++) {
186  for (int b = 0; b < nbatch; b++) {
187  float *recvBatch = recvBuffer + b * numPostRestricted;
188 
189  // Iterate over each line in the y axis, the goal is to keep weights in the cache
190  for (int ky = 0; ky < yPatchSize; ky++) {
191 // Threading over feature was the important change that improved cache performance by
192 // 5-10x. dynamic scheduling also gave another performance increase over static.
193 #ifdef PV_USE_OPENMP_THREADS
194 #pragma omp parallel for schedule(static)
195 #endif
196  for (int feature = 0; feature < neuronIndexStride; feature++) {
197  for (int idx = feature; idx < numPostRestricted; idx += neuronIndexStride) {
198  float *recvLocation = recvBatch + idx;
199 
200  int kTargetExt = kIndexExtended(
201  idx,
202  targetNx,
203  targetNy,
204  targetNf,
205  targetHalo->lt,
206  targetHalo->rt,
207  targetHalo->dn,
208  targetHalo->up);
209  float *weightBuf = postWeights->getDataFromPatchIndex(arbor, kTargetExt);
210  float *weightValues = weightBuf + ky * syp;
211 
212  float dv = 0.0f;
213  for (int k = 0; k < numPerStride; ++k) {
214  dv += weightValues[k];
215  }
216  *recvLocation += mDeltaTimeFactor * dv;
217  }
218  }
219  }
220  }
221  }
222 }
223 
224 } // end namespace PV
int getPatchSizeX() const
Definition: Weights.hpp:219
PVLayerCube createCube(int delay=0)
Definition: Publisher.cpp:60
static bool completed(Status &a)
Definition: Response.hpp:49
int getPatchSizeY() const
Definition: Weights.hpp:222
std::shared_ptr< PatchGeometry > getGeometry() const
Definition: Weights.hpp:148
int getPatchStrideY() const
Definition: Weights.hpp:248
int getNumAxonalArbors() const
Definition: ArborList.hpp:52
float * getDataFromPatchIndex(int arbor, int patchIndex)
Definition: Weights.cpp:205
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
virtual void ioParam_receiveGpu(enum ParamsIOFlag ioFlag) override
receiveGpu: PostsynapticPerspectiveConvolveDelivery always sets receiveGpu to false. The receiveGpu=true case is handled by the PostsynapticPerspectiveGPUDelivery class.
virtual int ioParamsFillGroup(enum ParamsIOFlag ioFlag) override
int getPatchSizeF() const
Definition: Weights.hpp:225
bool getInitInfoCommunicatedFlag() const
Definition: BaseObject.hpp:95