PetaVision  Alpha
updateStateFunctions.h
1 /*
2  * updateStateFunctions.h
3  *
4  * Static inline methods to be called by layers' updateState methods
5  *
6  * Created on: Mar 7, 2012
7  * Author: pschultz
8  */
9 #ifndef UPDATESTATEFUNCTIONS_H_
10 #define UPDATESTATEFUNCTIONS_H_
11 
12 #ifndef PV_USE_CUDA
13 #include "../include/pv_types.h"
14 #include "../utils/conversions.h"
15 #endif // PV_USE_CUDA
16 
17 #include "../include/pv_common.h"
18 #include <float.h>
19 
20 #ifndef PV_USE_CUDA
21 #define KERNEL inline
22 #define MEM_GLOBAL
23 #define MEM_CONST
24 #define MEM_LOCAL
25 #else
26 #define KERNEL __device__
27 #define MEM_GLOBAL
28 #define MEM_CONST
29 #define MEM_LOCAL
30 #define getIndex() (blockIdx.x * blockDim.x) + threadIdx.x
31 #include "../cudakernels/conversions.hcu"
32 #define CHANNEL_EXC 0
33 #define CHANNEL_INH 1
34 #define CHANNEL_INHB 2
35 #define CHANNEL_GAP 3
36 #endif
37 
38 // Prototypes
39 KERNEL
40 int applyGSyn_HyPerLayer1Channel(
41  int nbatch,
42  int numNeurons,
43  MEM_GLOBAL float *V,
44  MEM_GLOBAL float *GSynHead);
45 KERNEL
46 int applyGSyn_HyPerLayer(
47  int nbatch,
48  int numNeurons,
49  MEM_GLOBAL float *V,
50  MEM_GLOBAL float *GSynHead);
51 KERNEL
52 int applyGSyn_LabelErrorLayer(
53  int nbatch,
54  int numNeurons,
55  MEM_GLOBAL float *V,
56  MEM_GLOBAL float *GSynHead,
57  int nx,
58  int ny,
59  int nf,
60  int lt,
61  int rt,
62  int dn,
63  int up,
64  int isBinary);
65 
66 KERNEL
67 int updateV_ANNLayer_vertices(
68  int nbatch,
69  int numNeurons,
70  MEM_GLOBAL float *V,
71  int num_channels,
72  MEM_GLOBAL float *GSynHead,
73  MEM_GLOBAL float *activity,
74  int numVertices,
75  float *verticesV,
76  float *verticesA,
77  float *slopes,
78  int nx,
79  int ny,
80  int nf,
81  int lt,
82  int rt,
83  int dn,
84  int up);
85 
86 #ifdef OBSOLETE // Marked obsolete Jun 30, 2017. Use updateV_ANNLayer_vertices
87 // updateV_PtwiseLinearTransferLayer was deprecated Jun 28, 2016, along with the
88 // PtwiseLinearTransferLayer class.
89 // Use ANNLayer with verticesA/verticesV/slopeNegInf/slopePosInf, and updateV_ANNLayer_vertices
90 // instead.
91 KERNEL
92 int updateV_PtwiseLinearTransferLayer(
93  int nbatch,
94  int numNeurons,
95  MEM_GLOBAL float *V,
96  int num_channels,
97  MEM_GLOBAL float *GSynHead,
98  MEM_GLOBAL float *activity,
99  int numVertices,
100  float *verticesV,
101  float *verticesA,
102  float *slopes,
103  int nx,
104  int ny,
105  int nf,
106  int lt,
107  int rt,
108  int dn,
109  int up) {
110  return updateV_ANNLayer_vertices(
111  nbatch,
112  numNeurons,
113  V,
114  num_channels,
115  GSynHead,
116  activity,
117  numVertices,
118  verticesV,
119  verticesA,
120  slopes,
121  nx,
122  ny,
123  nf,
124  lt,
125  rt,
126  dn,
127  up);
128 }
129 #endif // OBSOLETE // Marked obsolete Jun 30, 2017. Use updateV_ANNLayer_vertices
130 
131 KERNEL
132 int updateV_ANNLayer_threshminmax(
133  int nbatch,
134  int numNeurons,
135  MEM_GLOBAL float *V,
136  int num_channels,
137  MEM_GLOBAL float *GSynHead,
138  MEM_GLOBAL float *activity,
139  float VThresh,
140  float AMin,
141  float AMax,
142  float AShift,
143  float VWidth,
144  int nx,
145  int ny,
146  int nf,
147  int lt,
148  int rt,
149  int dn,
150  int up);
151 KERNEL
152 int updateV_ANNErrorLayer(
153  int nbatch,
154  int numNeurons,
155  MEM_GLOBAL float *V,
156  MEM_GLOBAL float *GSynHead,
157  MEM_GLOBAL float *activity,
158  int numVertices,
159  float *verticesV,
160  float *verticesA,
161  float *slopes,
162  int nx,
163  int ny,
164  int nf,
165  int lt,
166  int rt,
167  int dn,
168  int up,
169  float errScale);
170 KERNEL
171 int applyGSyn_HyPerLCALayer(
172  int nbatch,
173  int numNeurons,
174  MEM_GLOBAL float *V,
175  MEM_GLOBAL float *GSynHead,
176  MEM_GLOBAL float *activity,
177  float dt_tau,
178  float selfInteract,
179  int nx,
180  int ny,
181  int nf,
182  int lt,
183  int rt,
184  int dn,
185  int up);
186 KERNEL
187 int applyGSyn_HyPerLCALayer2(
188  int nbatch,
189  int numNeurons,
190  MEM_GLOBAL float *V,
191  MEM_GLOBAL float *GSynHead,
192  MEM_GLOBAL float *activity,
193  double *dtAdapt,
194  float tau,
195  float selfInteract,
196  int nx,
197  int ny,
198  int nf,
199  int lt,
200  int rt,
201  int dn,
202  int up);
203 KERNEL
204 int applyGSyn_ISTALayer(
205  int nbatch,
206  int numNeurons,
207  MEM_GLOBAL float *V,
208  MEM_GLOBAL float *GSynHead,
209  MEM_GLOBAL float *activity,
210  double *dtAdapt,
211  float tau,
212  int nx,
213  int ny,
214  int nf,
215  int lt,
216  int rt,
217  int dn,
218  int up,
219  float VThresh);
220 KERNEL
221 int applyGSyn_ISTALayer2(
222  int nbatch,
223  int numNeurons,
224  MEM_GLOBAL float *V,
225  MEM_GLOBAL float *GSynHead,
226  MEM_GLOBAL float *activity,
227  double *dtAdapt,
228  float tau,
229  int nx,
230  int ny,
231  int nf,
232  int lt,
233  int rt,
234  int dn,
235  int up,
236  float VThresh);
237 KERNEL
238 int applyGSyn_ANNWhitenedLayer(
239  int nbatch,
240  int numNeurons,
241  MEM_GLOBAL float *V,
242  MEM_GLOBAL float *GSynHead);
243 
244 KERNEL
245 int updateV_HyPerLCALayer(
246  int nbatch,
247  int numNeurons,
248  int numChannels,
249  MEM_GLOBAL float *V,
250  MEM_GLOBAL float *GSynHead,
251  MEM_GLOBAL float *activity,
252  int numVertices,
253  float *verticesV,
254  float *verticesA,
255  float *slopes,
256  double *dtAdapt,
257  float tau,
258  float selfInteract,
259  int nx,
260  int ny,
261  int nf,
262  int lt,
263  int rt,
264  int dn,
265  int up);
266 
267 KERNEL
268 int updateV_MomentumLCALayer(
269  int nbatch,
270  int numNeurons,
271  int numChannels,
272  MEM_GLOBAL float *V,
273  MEM_GLOBAL float *GSynHead,
274  MEM_GLOBAL float *activity,
275  MEM_GLOBAL float *prevDrive,
276  int numVertices,
277  float *verticesV,
278  float *verticesA,
279  float *slopes,
280  double *dtAdapt,
281  float tau,
282  float LCAMomentumRate,
283  float selfInteract,
284  int nx,
285  int ny,
286  int nf,
287  int lt,
288  int rt,
289  int dn,
290  int up);
291 
292 KERNEL
293 int updateV_ISTALayer(
294  int nbatch,
295  int numNeurons,
296  MEM_GLOBAL float *V,
297  MEM_GLOBAL float *GSynHead,
298  MEM_GLOBAL float *activity,
299  float VThresh,
300  MEM_GLOBAL double *dtAdapt,
301  float tau,
302  int nx,
303  int ny,
304  int nf,
305  int lt,
306  int rt,
307  int dn,
308  int up,
309  int numChannels);
310 KERNEL
311 int updateV_ANNDivInh(int nbatch, int numNeurons, MEM_GLOBAL float *V, MEM_GLOBAL float *GSynHead);
312 KERNEL
313 int updateV_ANNSquaredLayer(
314  int nbatch,
315  int numNeurons,
316  MEM_GLOBAL float *V,
317  MEM_GLOBAL float *GSynHead);
318 KERNEL
319 int updateV_PoolingANNLayer(
320  int nbatch,
321  int numNeurons,
322  MEM_GLOBAL float *V,
323  MEM_GLOBAL float *GSynHead,
324  float biasa,
325  float biasb);
326 KERNEL
327 int updateV_PtwiseProductLayer(
328  int nbatch,
329  int numNeurons,
330  MEM_GLOBAL float *V,
331  MEM_GLOBAL float *GSynHead);
332 KERNEL
333 int updateV_PtwiseQuotientLayer(
334  int nbatch,
335  int numNeurons,
336  MEM_GLOBAL float *V,
337  MEM_GLOBAL float *GSynHead);
338 KERNEL
339 int updateV_SigmoidLayer();
340 KERNEL
341 int applyVMax_ANNLayer_threshminmax(
342  int nbatch,
343  int numNeurons,
344  MEM_GLOBAL float *V,
345  float AMax,
346  MEM_GLOBAL float *activity,
347  int nx,
348  int ny,
349  int nf,
350  int lt,
351  int rt,
352  int dn,
353  int up);
354 KERNEL
355 int applyVThresh_ANNLayer_threshminmax(
356  int nbatch,
357  int numNeurons,
358  MEM_GLOBAL float *V,
359  float AMin,
360  float VThresh,
361  float AShift,
362  float VWidth,
363  MEM_GLOBAL float *activity,
364  int nx,
365  int ny,
366  int nf,
367  int lt,
368  int rt,
369  int dn,
370  int up);
371 KERNEL
372 int applyVThresh_ANNErrorLayer(
373  int nbatch,
374  int numNeurons,
375  MEM_GLOBAL float *V,
376  float AMin,
377  float VThresh,
378  float AShift,
379  MEM_GLOBAL float *activity,
380  int nx,
381  int ny,
382  int nf,
383  int lt,
384  int rt,
385  int dn,
386  int up);
387 KERNEL
388 int squareV_ANNSquaredLayer(int nbatch, int numNeurons, MEM_GLOBAL float *V);
389 KERNEL
390 int updateSparsityTermDeriv_LogLatWTAGenLayer(
391  int nbatch,
392  int numNeurons,
393  int num_features,
394  MEM_GLOBAL float *V,
395  MEM_GLOBAL float *sparsitytermderivative);
396 KERNEL
397 float lateralCompetitionPenalty(MEM_GLOBAL float *V, int num_features);
398 
399 KERNEL
400 int setActivity_HyPerLayer(
401  int nbatch,
402  int numNeurons,
403  MEM_GLOBAL float *A,
404  MEM_GLOBAL float *V,
405  int nx,
406  int ny,
407  int nf,
408  int lt,
409  int rt,
410  int dn,
411  int up);
412 
413 KERNEL
414 int setActivity_PtwiseLinearTransferLayer(
415  int nbatch,
416  int numNeurons,
417  MEM_GLOBAL float *A,
418  MEM_GLOBAL float *V,
419  int nx,
420  int ny,
421  int nf,
422  int lt,
423  int rt,
424  int dn,
425  int up,
426  int numVertices,
427  float *verticesV,
428  float *verticesA,
429  float *slopes);
430 
431 KERNEL
432 int setActivity_AccumulateLayer(
433  int nbatch,
434  int numNeurons,
435  MEM_GLOBAL float *A,
436  MEM_GLOBAL float *V,
437  int nx,
438  int ny,
439  int nf,
440  int lt,
441  int rt,
442  int dn,
443  int up);
444 KERNEL
445 int setActivity_IncrementLayer(
446  int nbatch,
447  int numNeurons,
448  MEM_GLOBAL float *A,
449  MEM_GLOBAL float *V,
450  MEM_GLOBAL float *Vprev,
451  int nx,
452  int ny,
453  int nf,
454  int lt,
455  int rt,
456  int dn,
457  int up);
458 KERNEL
459 int setActivity_GapLayer(
460  int nbatch,
461  int numNeurons,
462  MEM_GLOBAL float *A,
463  MEM_GLOBAL float *V,
464  int nx,
465  int ny,
466  int nf,
467  int lt,
468  int rt,
469  int dn,
470  int up,
471  int orig_lt,
472  int orig_rt,
473  int orig_dn,
474  int orig_up,
475  MEM_GLOBAL float *active,
476  float ampSpiklet);
477 
478 KERNEL
479 int resetGSynBuffers_HyPerLayer(
480  int nbatch,
481  int numNeurons,
482  int num_channels,
483  MEM_GLOBAL float *GSynHead);
484 KERNEL
485 int resetGSynBuffers_PoolingIndexLayer(
486  int nbatch,
487  int numNeurons,
488  int num_channels,
489  MEM_GLOBAL float *GSynHead);
490 KERNEL
491 int resetGSynBuffers_SigmoidLayer();
492 
493 // Definitions
494 KERNEL
495 int applyGSyn_HyPerLayer1Channel(
496  int nbatch,
497  int numNeurons,
498  MEM_GLOBAL float *V,
499  MEM_GLOBAL float *GSynHead) {
500  int kbatch;
501  // gSyn spins from slowest to fastest: [channel, batch, ny, nx, nf]
502  MEM_GLOBAL float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
503 #ifndef PV_USE_CUDA
504 #ifdef PV_USE_OPENMP_THREADS
505 #pragma omp parallel for schedule(static)
506 #endif
507  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
508 #else
509  kbatch = getIndex();
510 #endif // PV_USE_CUDA
511  {
512  int b = kbatch / numNeurons;
513  int k = kbatch % numNeurons;
514  MEM_GLOBAL float *VBatch = V + b * numNeurons;
515  MEM_GLOBAL float *GSynExcBatch = GSynExc + b * numNeurons;
516  VBatch[k] = GSynExcBatch[k];
517  }
518  return PV_SUCCESS;
519 }
520 
521 KERNEL
522 int applyGSyn_HyPerLayer(
523  int nbatch,
524  int numNeurons,
525  MEM_GLOBAL float *V,
526  MEM_GLOBAL float *GSynHead) {
527  int kbatch;
528  MEM_GLOBAL float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
529  MEM_GLOBAL float *GSynInh = &GSynHead[CHANNEL_INH * nbatch * numNeurons];
530 #ifndef PV_USE_CUDA
531 #ifdef PV_USE_OPENMP_THREADS
532 #pragma omp parallel for schedule(static)
533 #endif
534  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
535 #else
536  kbatch = getIndex();
537 #endif // PV_USE_CUDA
538  {
539  int b = kbatch / numNeurons;
540  int k = kbatch % numNeurons;
541  MEM_GLOBAL float *VBatch = V + b * numNeurons;
542  MEM_GLOBAL float *GSynExcBatch = GSynExc + b * numNeurons;
543  MEM_GLOBAL float *GSynInhBatch = GSynInh + b * numNeurons;
544 
545  VBatch[k] = GSynExcBatch[k] - GSynInhBatch[k];
546  }
547  return PV_SUCCESS;
548 }
549 
550 KERNEL
551 int applyGSyn_LabelErrorLayer(
552  int nbatch,
553  int numNeurons,
554  MEM_GLOBAL float *V,
555  MEM_GLOBAL float *GSynHead,
556  int nx,
557  int ny,
558  int nf,
559  int lt,
560  int rt,
561  int dn,
562  int up,
563  int isBinary) {
564  int kbatch;
565  MEM_GLOBAL float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
566  MEM_GLOBAL float *GSynInh = &GSynHead[CHANNEL_INH * nbatch * numNeurons];
567 
568  if (isBinary > 0) {
569 #ifndef PV_USE_CUDA
570 #ifdef PV_USE_OPENMP_THREADS
571 #pragma omp parallel for schedule(static)
572 #endif
573  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
574 #else
575  kbatch = getIndex();
576 #endif // PV_USE_CUDA
577  {
578  int b = kbatch / numNeurons;
579  int k = kbatch % numNeurons;
580  MEM_GLOBAL float *VBatch = V + b * numNeurons;
581  MEM_GLOBAL float *GSynExcBatch = GSynExc + b * numNeurons;
582  MEM_GLOBAL float *GSynInhBatch = GSynInh + b * numNeurons;
583  VBatch[k] = GSynExcBatch[k] - GSynInhBatch[k];
584  if (GSynExcBatch[k] > 0.0f) { // target label is positive
585  VBatch[k] = VBatch[k] > 0.0f ? VBatch[k] : 0.0f;
586  }
587  else { // target label is negative
588  VBatch[k] = VBatch[k] < 0.0f ? VBatch[k] : 0.0f;
589  }
590  }
591  }
592  else {
593 #ifndef PV_USE_CUDA
594 #ifdef PV_USE_OPENMP_THREADS
595 #pragma omp parallel for schedule(static)
596 #endif
597  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
598 #else
599  kbatch = getIndex();
600 #endif // PV_USE_CUDA
601  {
602  int b = kbatch / numNeurons;
603  int k = kbatch % numNeurons;
604  float *VBatch = V + b * numNeurons;
605  float *GSynExcBatch = GSynExc + b * numNeurons;
606  float *GSynInhBatch = GSynInh + b * numNeurons;
607 
608  float ratio = 1.0f;
609  // Need to find maximum value of target label
610  // If first feature, find ratio between target and guess feature val
611  int iF = featureIndex(k, nx + lt + rt, ny + dn + up, nf);
612  if (iF == 0) {
613  float maxTargetVal = GSynExcBatch[k];
614  int maxIdx = k;
615  // Find max value in feature space
616  for (int iif = 1; iif < nf; iif++) {
617  if (GSynExcBatch[k + iif] > maxTargetVal) {
618  maxTargetVal = GSynExcBatch[k + iif];
619  maxIdx = k + iif;
620  }
621  }
622  // Find ratio
623  // if target label is positive and guess is over target
624  if (maxTargetVal > 0 && GSynInhBatch[maxIdx] > maxTargetVal) {
625  ratio = maxTargetVal / GSynInhBatch[maxIdx];
626  }
627  else {
628  ratio = 1.0f;
629  }
630  }
631  // Calculate V value based on target and rescaled guess
632  VBatch[k] = GSynExcBatch[k] - (GSynInhBatch[k] * ratio);
633  // If target label is negative, and guess is lower than target label, err = 0
634  if (GSynExcBatch[k] < 0.0f) {
635  VBatch[k] = VBatch[k] < 0.0f ? VBatch[k] : 0.0f;
636  }
637  }
638  }
639  return PV_SUCCESS;
640 }
641 
642 KERNEL
643 int applyGSyn_HyPerLCALayer(
644  int nbatch,
645  int numNeurons,
646  MEM_GLOBAL float *V,
647  MEM_GLOBAL float *GSynHead,
648  MEM_GLOBAL float *activity,
649  double *dtAdapt,
650  float tau,
651  float selfInteract,
652  int nx,
653  int ny,
654  int nf,
655  int lt,
656  int rt,
657  int dn,
658  int up) {
659  int kbatch;
660  MEM_GLOBAL float *GSynError = &GSynHead[0 * nbatch * numNeurons]; // weighted input
661 #ifndef PV_USE_CUDA
662 #ifdef PV_USE_OPENMP_THREADS
663 #pragma omp parallel for schedule(static)
664 #endif
665  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
666 #else
667  kbatch = getIndex();
668 #endif // PV_USE_CUDA
669  {
670  int b = kbatch / numNeurons;
671  int k = kbatch % numNeurons;
672  float exp_tau = (float)exp(-dtAdapt[b] / (double)tau);
673  MEM_GLOBAL float *VBatch = V + b * numNeurons;
674  MEM_GLOBAL float *GSynErrorBatch = GSynError + b * numNeurons;
675  // Activity extended
676  MEM_GLOBAL float *activityBatch = activity + b * (nx + rt + lt) * (ny + up + dn) * nf;
677  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
678  VBatch[k] = exp_tau * VBatch[k]
679  + (1.0f - exp_tau) * (GSynErrorBatch[k] + selfInteract * activityBatch[kex]);
680  }
681  return PV_SUCCESS;
682 }
683 
684 KERNEL
685 int applyGSyn_HyPerLCALayer2(
686  int nbatch,
687  int numNeurons,
688  MEM_GLOBAL float *V,
689  MEM_GLOBAL float *GSynHead,
690  MEM_GLOBAL float *activity,
691  double *dtAdapt,
692  float tau,
693  float selfInteract,
694  int nx,
695  int ny,
696  int nf,
697  int lt,
698  int rt,
699  int dn,
700  int up) {
701  int kbatch;
702  MEM_GLOBAL float *GSynError = &GSynHead[0 * nbatch * numNeurons]; // weighted input
703  MEM_GLOBAL float *GSynError2 = &GSynHead[1 * nbatch * numNeurons]; // weighted input
704 
705 #ifndef PV_USE_CUDA
706 #ifdef PV_USE_OPENMP_THREADS
707 #pragma omp parallel for schedule(static)
708 #endif
709  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
710 #else
711  kbatch = getIndex();
712 #endif // PV_USE_CUDA
713  {
714  int b = kbatch / numNeurons;
715  int k = kbatch % numNeurons;
716 
717  float exp_tau = (float)exp(-dtAdapt[b] / (double)tau);
718  MEM_GLOBAL float *VBatch = V + b * numNeurons;
719  MEM_GLOBAL float *GSynErrorBatch = GSynError + b * numNeurons;
720  MEM_GLOBAL float *GSynError2Batch = GSynError2 + b * numNeurons;
721  // Activity extended
722  MEM_GLOBAL float *activityBatch = activity + b * (nx + rt + lt) * (ny + up + dn) * nf;
723 
724  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
725  VBatch[k] = exp_tau * VBatch[k]
726  + (1.0f - exp_tau) * (GSynErrorBatch[k] - GSynError2Batch[k]
727  + selfInteract * activityBatch[kex]);
728  }
729  return PV_SUCCESS;
730 }
731 
732 KERNEL
733 int applyGSyn_MomentumLCALayer(
734  int nbatch,
735  int numNeurons,
736  MEM_GLOBAL float *V,
737  MEM_GLOBAL float *GSynHead,
738  MEM_GLOBAL float *activity,
739  MEM_GLOBAL float *prevDrive,
740  double *dtAdapt,
741  float tau,
742  float LCAMomentumRate,
743  float selfInteract,
744  int nx,
745  int ny,
746  int nf,
747  int lt,
748  int rt,
749  int dn,
750  int up) {
751  int kbatch;
752  MEM_GLOBAL float *GSynError = &GSynHead[0 * nbatch * numNeurons]; // weighted input
753 #ifndef PV_USE_CUDA
754 #ifdef PV_USE_OPENMP_THREADS
755 #pragma omp parallel for schedule(static)
756 #endif
757  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
758 #else
759  kbatch = getIndex();
760 #endif // PV_USE_CUDA
761  {
762  int b = kbatch / numNeurons;
763  int k = kbatch % numNeurons;
764  float exp_tau = expf((float)-dtAdapt[b] / tau);
765  MEM_GLOBAL float *VBatch = V + b * numNeurons;
766  MEM_GLOBAL float *GSynErrorBatch = GSynError + b * numNeurons;
767  MEM_GLOBAL float *prevDriveBatch = prevDrive + b * numNeurons;
768  // Activity extended
769  MEM_GLOBAL float *activityBatch = activity + b * (nx + rt + lt) * (ny + up + dn) * nf;
770  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
771  // Calculate current drive
772  float currentDrive =
773  (1.0f - exp_tau) * (GSynErrorBatch[k] + selfInteract * activityBatch[kex]);
774  // Accumulate into VBatch with decay and momentum
775  VBatch[k] = exp_tau * VBatch[k] + currentDrive + LCAMomentumRate * prevDriveBatch[k];
776  // Update momentum buffer
777  prevDriveBatch[k] = currentDrive;
778  }
779  return PV_SUCCESS;
780 }
781 
782 KERNEL
783 int applyGSyn_MomentumLCALayer2(
784  int nbatch,
785  int numNeurons,
786  MEM_GLOBAL float *V,
787  MEM_GLOBAL float *GSynHead,
788  MEM_GLOBAL float *activity,
789  MEM_GLOBAL float *prevDrive,
790  double *dtAdapt,
791  float tau,
792  float LCAMomentumRate,
793  float selfInteract,
794  int nx,
795  int ny,
796  int nf,
797  int lt,
798  int rt,
799  int dn,
800  int up) {
801  int kbatch;
802  MEM_GLOBAL float *GSynError = &GSynHead[0 * nbatch * numNeurons]; // weighted input
803  MEM_GLOBAL float *GSynError2 = &GSynHead[1 * nbatch * numNeurons]; // weighted input
804 
805 #ifndef PV_USE_CUDA
806 #ifdef PV_USE_OPENMP_THREADS
807 #pragma omp parallel for schedule(static)
808 #endif
809  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
810 #else
811  kbatch = getIndex();
812 #endif // PV_USE_CUDA
813  {
814  int b = kbatch / numNeurons;
815  int k = kbatch % numNeurons;
816 
817  float exp_tau = expf((float)-dtAdapt[b] / tau);
818  MEM_GLOBAL float *VBatch = V + b * numNeurons;
819  MEM_GLOBAL float *GSynErrorBatch = GSynError + b * numNeurons;
820  MEM_GLOBAL float *GSynError2Batch = GSynError2 + b * numNeurons;
821  MEM_GLOBAL float *prevDriveBatch = prevDrive + b * numNeurons;
822  // Activity extended
823  MEM_GLOBAL float *activityBatch = activity + b * (nx + rt + lt) * (ny + up + dn) * nf;
824 
825  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
826 
827  float currentDrive = (1.0f - exp_tau) * ((GSynErrorBatch[k] - GSynError2Batch[k])
828  + selfInteract * activityBatch[kex]);
829  VBatch[k] = exp_tau * VBatch[k] + currentDrive + LCAMomentumRate * prevDriveBatch[k];
830  prevDriveBatch[k] = currentDrive;
831  }
832  return PV_SUCCESS;
833 }
834 
835 KERNEL
836 int applyGSyn_ISTALayer(
837  int nbatch,
838  int numNeurons,
839  MEM_GLOBAL float *V,
840  MEM_GLOBAL float *GSynHead,
841  MEM_GLOBAL float *activity,
842  double *dtAdapt,
843  float tau,
844  int nx,
845  int ny,
846  int nf,
847  int lt,
848  int rt,
849  int dn,
850  int up,
851  float VThresh) {
852  int kbatch;
853  MEM_GLOBAL float *GSynError = &GSynHead[CHANNEL_EXC * nbatch * numNeurons]; // weighted input
854 #ifndef PV_USE_CUDA
855 #ifdef PV_USE_OPENMP_THREADS
856 #pragma omp parallel for schedule(static)
857 #endif
858  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
859 #else
860  kbatch = getIndex();
861 #endif // PV_USE_CUDA
862  {
863  int b = kbatch / numNeurons;
864  int k = kbatch % numNeurons;
865  MEM_GLOBAL float *VBatch = V + b * numNeurons;
866  MEM_GLOBAL float *GSynErrorBatch = GSynError + b * numNeurons;
867  // Activity extended
868  MEM_GLOBAL float *activityBatch = activity + b * (nx + rt + lt) * (ny + up + dn) * nf;
869  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
870  float sign = 0.0f;
871  if (activityBatch[kex] != 0.0f) {
872  sign = activityBatch[kex] / fabsf(activityBatch[kex]);
873  }
874  VBatch[k] += ((float)dtAdapt[b] / tau) * (GSynErrorBatch[k] - (VThresh * sign));
875  }
876  return PV_SUCCESS;
877 }
878 
879 KERNEL
880 int applyGSyn_ISTALayer2(
881  int nbatch,
882  int numNeurons,
883  MEM_GLOBAL float *V,
884  MEM_GLOBAL float *GSynHead,
885  MEM_GLOBAL float *activity,
886  double *dtAdapt,
887  float tau,
888  int nx,
889  int ny,
890  int nf,
891  int lt,
892  int rt,
893  int dn,
894  int up,
895  float VThresh) {
896  int kbatch;
897  MEM_GLOBAL float *GSynError = &GSynHead[0 * nbatch * numNeurons]; // weighted input
898  MEM_GLOBAL float *GSynError2 = &GSynHead[1 * nbatch * numNeurons]; // weighted input
899 
900 #ifndef PV_USE_CUDA
901 #ifdef PV_USE_OPENMP_THREADS
902 #pragma omp parallel for schedule(static)
903 #endif
904  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
905 #else
906  kbatch = getIndex();
907 #endif // PV_USE_CUDA
908  {
909  int b = kbatch / numNeurons;
910  int k = kbatch % numNeurons;
911 
912  MEM_GLOBAL float *VBatch = V + b * numNeurons;
913  MEM_GLOBAL float *GSynErrorBatch = GSynError + b * numNeurons;
914  MEM_GLOBAL float *GSynError2Batch = GSynError2 + b * numNeurons;
915  // Activity extended
916  MEM_GLOBAL float *activityBatch = activity + b * (nx + rt + lt) * (ny + up + dn) * nf;
917 
918  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
919  float sign = 0.0f;
920  if (activityBatch[kex] != 0.0f) {
921  sign = activityBatch[kex] / fabsf(activityBatch[kex]);
922  }
923  VBatch[k] += ((float)dtAdapt[b] / tau)
924  * ((GSynErrorBatch[k] - GSynError2Batch[k]) - (VThresh * sign));
925  }
926  return PV_SUCCESS;
927 }
928 
929 KERNEL
930 int applyGSyn_ANNWhitenedLayer(
931  int nbatch,
932  int numNeurons,
933  MEM_GLOBAL float *V,
934  MEM_GLOBAL float *GSynHead) {
935  int k;
936  MEM_GLOBAL float *GSynInput = &GSynHead[0 * nbatch * numNeurons]; // un-whitened input
937  MEM_GLOBAL float *GSynAveInput = &GSynHead[1 * nbatch * numNeurons]; // un-whitened input
938  MEM_GLOBAL float *GSynAveSquaredInput = &GSynHead[2 * nbatch * numNeurons]; // un-whitened input
939 #ifndef PV_USE_CUDA
940 #ifdef PV_USE_OPENMP_THREADS
941 #pragma omp parallel for schedule(static)
942 #endif
943  for (k = 0; k < numNeurons * nbatch; k++)
944 #else
945  k = getIndex();
946 #endif // PV_USE_CUDA
947  {
948  // set mean to zero and standard deviation to one over patch window
949  V[k] = (GSynInput[k] - GSynAveInput[k])
950  / (sqrtf(GSynAveSquaredInput[k] - GSynAveInput[k] * GSynAveInput[k]) + FLT_MIN);
951  }
952  return PV_SUCCESS;
953 }
954 
955 KERNEL
956 int updateV_ANNLayer_vertices(
957  int nbatch,
958  int numNeurons,
959  MEM_GLOBAL float *V,
960  int num_channels,
961  MEM_GLOBAL float *GSynHead,
962  MEM_GLOBAL float *activity,
963  int numVertices,
964  float *verticesV,
965  float *verticesA,
966  float *slopes,
967  int nx,
968  int ny,
969  int nf,
970  int lt,
971  int rt,
972  int dn,
973  int up) {
974  int status = PV_SUCCESS;
975  if (num_channels == 1) {
976  status = applyGSyn_HyPerLayer1Channel(nbatch, numNeurons, V, GSynHead);
977  }
978  else {
979  status = applyGSyn_HyPerLayer(nbatch, numNeurons, V, GSynHead);
980  }
981  if (status == PV_SUCCESS) {
982  status = setActivity_PtwiseLinearTransferLayer(
983  nbatch,
984  numNeurons,
985  activity,
986  V,
987  nx,
988  ny,
989  nf,
990  lt,
991  rt,
992  dn,
993  up,
994  numVertices,
995  verticesV,
996  verticesA,
997  slopes);
998  }
999  return status;
1000 }
1001 
1002 KERNEL
1003 int updateV_ANNLayer_threshminmax(
1004  int nbatch,
1005  int numNeurons,
1006  MEM_GLOBAL float *V,
1007  int num_channels,
1008  MEM_GLOBAL float *GSynHead,
1009  MEM_GLOBAL float *activity,
1010  float VThresh,
1011  float AMin,
1012  float AMax,
1013  float AShift,
1014  float VWidth,
1015  int nx,
1016  int ny,
1017  int nf,
1018  int lt,
1019  int rt,
1020  int dn,
1021  int up) {
1022  if (num_channels == 1) {
1023  applyGSyn_HyPerLayer1Channel(nbatch, numNeurons, V, GSynHead);
1024  }
1025  else {
1026  applyGSyn_HyPerLayer(nbatch, numNeurons, V, GSynHead);
1027  }
1028  setActivity_HyPerLayer(nbatch, numNeurons, activity, V, nx, ny, nf, lt, rt, dn, up);
1029  applyVThresh_ANNLayer_threshminmax(
1030  nbatch,
1031  numNeurons,
1032  V,
1033  VThresh,
1034  AMin,
1035  AShift,
1036  VWidth,
1037  activity,
1038  nx,
1039  ny,
1040  nf,
1041  lt,
1042  rt,
1043  dn,
1044  up);
1045  applyVMax_ANNLayer_threshminmax(
1046  nbatch, numNeurons, V, AMax, activity, nx, ny, nf, lt, rt, dn, up);
1047  return PV_SUCCESS;
1048 }
1049 
1050 KERNEL
1051 int updateV_HyPerLCALayer(
1052  int nbatch,
1053  int numNeurons,
1054  int numChannels,
1055  MEM_GLOBAL float *V,
1056  MEM_GLOBAL float *GSynHead,
1057  MEM_GLOBAL float *activity,
1058  int numVertices,
1059  float *verticesV,
1060  float *verticesA,
1061  float *slopes,
1062  double *dtAdapt,
1063  float tau,
1064  float selfInteract,
1065  int nx,
1066  int ny,
1067  int nf,
1068  int lt,
1069  int rt,
1070  int dn,
1071  int up) {
1072  int status = PV_SUCCESS;
1073  if (numChannels == 2) {
1074  if (status == PV_SUCCESS)
1075  status = applyGSyn_HyPerLCALayer2(
1076  nbatch,
1077  numNeurons,
1078  V,
1079  GSynHead,
1080  activity,
1081  dtAdapt,
1082  tau,
1083  selfInteract,
1084  nx,
1085  ny,
1086  nf,
1087  lt,
1088  rt,
1089  dn,
1090  up);
1091  }
1092  else if (numChannels == 1) {
1093  if (status == PV_SUCCESS)
1094  status = applyGSyn_HyPerLCALayer(
1095  nbatch,
1096  numNeurons,
1097  V,
1098  GSynHead,
1099  activity,
1100  dtAdapt,
1101  tau,
1102  selfInteract,
1103  nx,
1104  ny,
1105  nf,
1106  lt,
1107  rt,
1108  dn,
1109  up);
1110  }
1111 
1112  if (status == PV_SUCCESS) {
1113  status = setActivity_PtwiseLinearTransferLayer(
1114  nbatch,
1115  numNeurons,
1116  activity,
1117  V,
1118  nx,
1119  ny,
1120  nf,
1121  lt,
1122  rt,
1123  dn,
1124  up,
1125  numVertices,
1126  verticesV,
1127  verticesA,
1128  slopes);
1129  }
1130  return status;
1131 }
1132 
1133 KERNEL
1134 int updateV_MomentumLCALayer(
1135  int nbatch,
1136  int numNeurons,
1137  int numChannels,
1138  MEM_GLOBAL float *V,
1139  MEM_GLOBAL float *GSynHead,
1140  MEM_GLOBAL float *activity,
1141  MEM_GLOBAL float *prevDrive,
1142  int numVertices,
1143  float *verticesV,
1144  float *verticesA,
1145  float *slopes,
1146  double *dtAdapt,
1147  float tau,
1148  float LCAMomentumRate,
1149  float selfInteract,
1150  int nx,
1151  int ny,
1152  int nf,
1153  int lt,
1154  int rt,
1155  int dn,
1156  int up) {
1157  int status = PV_SUCCESS;
1158  if (numChannels == 2) {
1159  if (status == PV_SUCCESS)
1160  status = applyGSyn_MomentumLCALayer2(
1161  nbatch,
1162  numNeurons,
1163  V,
1164  GSynHead,
1165  activity,
1166  prevDrive,
1167  dtAdapt,
1168  tau,
1169  LCAMomentumRate,
1170  selfInteract,
1171  nx,
1172  ny,
1173  nf,
1174  lt,
1175  rt,
1176  dn,
1177  up);
1178  }
1179  else if (numChannels == 1) {
1180  if (status == PV_SUCCESS)
1181  status = applyGSyn_MomentumLCALayer(
1182  nbatch,
1183  numNeurons,
1184  V,
1185  GSynHead,
1186  activity,
1187  prevDrive,
1188  dtAdapt,
1189  tau,
1190  LCAMomentumRate,
1191  selfInteract,
1192  nx,
1193  ny,
1194  nf,
1195  lt,
1196  rt,
1197  dn,
1198  up);
1199  }
1200 
1201  if (status == PV_SUCCESS) {
1202  status = setActivity_PtwiseLinearTransferLayer(
1203  nbatch,
1204  numNeurons,
1205  activity,
1206  V,
1207  nx,
1208  ny,
1209  nf,
1210  lt,
1211  rt,
1212  dn,
1213  up,
1214  numVertices,
1215  verticesV,
1216  verticesA,
1217  slopes);
1218  }
1219  return status;
1220 }
1221 
1222 KERNEL
1223 int updateV_ISTALayer(
1224  int nbatch,
1225  int numNeurons,
1226  MEM_GLOBAL float *V,
1227  MEM_GLOBAL float *GSynHead,
1228  MEM_GLOBAL float *activity,
1229  float VThresh,
1230  MEM_GLOBAL double *dtAdapt,
1231  float tau,
1232  int nx,
1233  int ny,
1234  int nf,
1235  int lt,
1236  int rt,
1237  int dn,
1238  int up,
1239  int numChannels) {
1240  int status = PV_SUCCESS;
1241  if (numChannels == 2) {
1242  if (status == PV_SUCCESS)
1243  status = applyGSyn_ISTALayer2(
1244  nbatch,
1245  numNeurons,
1246  V,
1247  GSynHead,
1248  activity,
1249  dtAdapt,
1250  tau,
1251  nx,
1252  ny,
1253  nf,
1254  lt,
1255  rt,
1256  dn,
1257  up,
1258  VThresh);
1259  }
1260  else if (numChannels == 1) {
1261  if (status == PV_SUCCESS)
1262  status = applyGSyn_ISTALayer(
1263  nbatch,
1264  numNeurons,
1265  V,
1266  GSynHead,
1267  activity,
1268  dtAdapt,
1269  tau,
1270  nx,
1271  ny,
1272  nf,
1273  lt,
1274  rt,
1275  dn,
1276  up,
1277  VThresh);
1278  }
1279  if (status == PV_SUCCESS)
1280  status = setActivity_HyPerLayer(nbatch, numNeurons, activity, V, nx, ny, nf, lt, rt, dn, up);
1281  return status;
1282 }
1283 
1284 KERNEL
1285 int updateV_ANNErrorLayer(
1286  int nbatch,
1287  int numNeurons,
1288  MEM_GLOBAL float *V,
1289  MEM_GLOBAL float *GSynHead,
1290  MEM_GLOBAL float *activity,
1291  int numVertices,
1292  float *verticesV,
1293  float *verticesA,
1294  float *slopes,
1295  int nx,
1296  int ny,
1297  int nf,
1298  int lt,
1299  int rt,
1300  int dn,
1301  int up,
1302  float errScale) {
1303  int status;
1304  status = applyGSyn_HyPerLayer(nbatch, numNeurons, V, GSynHead);
1305 #ifdef PV_USE_OPENMP_THREADS
1306 #pragma omp parallel for schedule(static)
1307 #endif
1308  for (int i = 0; i < numNeurons * nbatch; i++) {
1309  V[i] *= errScale;
1310  }
1311  if (status == PV_SUCCESS) {
1312  status = setActivity_HyPerLayer(nbatch, numNeurons, activity, V, nx, ny, nf, lt, rt, dn, up);
1313  }
1314  if (status == PV_SUCCESS) {
1315  status = setActivity_PtwiseLinearTransferLayer(
1316  nbatch,
1317  numNeurons,
1318  activity,
1319  V,
1320  nx,
1321  ny,
1322  nf,
1323  lt,
1324  rt,
1325  dn,
1326  up,
1327  numVertices,
1328  verticesV,
1329  verticesA,
1330  slopes);
1331  }
1332  return status;
1333 }
1334 
1335 KERNEL
1336 int updateV_ANNDivInh(int nbatch, int numNeurons, MEM_GLOBAL float *V, MEM_GLOBAL float *GSynHead) {
1337  int k;
1338  MEM_GLOBAL float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
1339  MEM_GLOBAL float *GSynInh = &GSynHead[CHANNEL_INH * nbatch * numNeurons];
1340  MEM_GLOBAL float *GSynDivInh = &GSynHead[CHANNEL_INHB * nbatch * numNeurons];
1341 
1342 #ifndef PV_USE_CUDA
1343 #ifdef PV_USE_OPENMP_THREADS
1344 #pragma omp parallel for schedule(static)
1345 #endif
1346  for (k = 0; k < numNeurons * nbatch; k++)
1347 #else
1348  k = getIndex();
1349 #endif // PV_USE_CUDA
1350  {
1351  V[k] = (GSynExc[k] - GSynInh[k]) / (GSynDivInh[k] + 0.04f);
1352  }
1353  return PV_SUCCESS;
1354 }
1355 
1356 KERNEL
1357 int updateV_ANNSquaredLayer(
1358  int nbatch,
1359  int numNeurons,
1360  MEM_GLOBAL float *V,
1361  MEM_GLOBAL float *GSynHead) {
1362  int status;
1363  status = applyGSyn_HyPerLayer1Channel(nbatch, numNeurons, V, GSynHead);
1364  if (status == PV_SUCCESS)
1365  status = squareV_ANNSquaredLayer(nbatch, numNeurons, V);
1366  return status;
1367 }
1368 
1369 KERNEL
1370 int updateV_PoolingANNLayer(
1371  int nbatch,
1372  int numNeurons,
1373  MEM_GLOBAL float *V,
1374  MEM_GLOBAL float *GSynHead,
1375  float biasa,
1376  float biasb) {
1377  int k;
1378  MEM_GLOBAL float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
1379  MEM_GLOBAL float *GSynInh = &GSynHead[CHANNEL_INH * nbatch * numNeurons];
1380 #ifndef PV_USE_CUDA
1381 #ifdef PV_USE_OPENMP_THREADS
1382 #pragma omp parallel for schedule(static)
1383 #endif
1384  for (k = 0; k < numNeurons * nbatch; k++)
1385 #else
1386  k = getIndex();
1387 #endif // PV_USE_CUDA
1388  {
1389  V[k] = GSynExc[k] * GSynInh[k] * (biasa * GSynExc[k] + biasb * GSynInh[k]);
1390  }
1391  return PV_SUCCESS;
1392 }
1393 
1394 KERNEL
1395 int updateV_PtwiseProductLayer(
1396  int nbatch,
1397  int numNeurons,
1398  MEM_GLOBAL float *V,
1399  MEM_GLOBAL float *GSynHead) {
1400  int k;
1401  MEM_GLOBAL float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
1402  MEM_GLOBAL float *GSynInh = &GSynHead[CHANNEL_INH * nbatch * numNeurons];
1403 #ifndef PV_USE_CUDA
1404 #ifdef PV_USE_OPENMP_THREADS
1405 #pragma omp parallel for schedule(static)
1406 #endif
1407  for (k = 0; k < numNeurons * nbatch; k++)
1408 #else
1409  k = getIndex();
1410 #endif // PV_USE_CUDA
1411  {
1412  V[k] = GSynExc[k] * GSynInh[k];
1413  }
1414  return PV_SUCCESS;
1415 }
1416 
1417 KERNEL
1418 int updateV_PtwiseQuotientLayer(
1419  int nbatch,
1420  int numNeurons,
1421  MEM_GLOBAL float *V,
1422  MEM_GLOBAL float *GSynHead) {
1423  int k;
1424  MEM_GLOBAL float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
1425  MEM_GLOBAL float *GSynInh = &GSynHead[CHANNEL_INH * nbatch * numNeurons];
1426 #ifndef PV_USE_CUDA
1427 #ifdef PV_USE_OPENMP_THREADS
1428 #pragma omp parallel for schedule(static)
1429 #endif
1430  for (k = 0; k < numNeurons * nbatch; k++)
1431 #else
1432  k = getIndex();
1433 #endif // PV_USE_CUDA
1434  {
1435  V[k] = GSynExc[k] / GSynInh[k];
1436  }
1437  return PV_SUCCESS;
1438 }
1439 
1440 KERNEL
1441 int updateV_SigmoidLayer() {
1442  return PV_SUCCESS; // sourcelayer is responsible for updating V.
1443 }
1444 
1445 KERNEL
1446 int applyVMax_ANNLayer_threshminmax(
1447  int nbatch,
1448  int numNeurons,
1449  MEM_GLOBAL float *V,
1450  float AMax,
1451  MEM_GLOBAL float *activity,
1452  int nx,
1453  int ny,
1454  int nf,
1455  int lt,
1456  int rt,
1457  int dn,
1458  int up) {
1459  if (AMax < FLT_MAX) {
1460  int kbatch = 0;
1461 #ifndef PV_USE_CUDA
1462 #ifdef PV_USE_OPENMP_THREADS
1463 #pragma omp parallel for schedule(static)
1464 #endif
1465  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1466 #else
1467  kbatch = getIndex();
1468 #endif // PV_USE_CUDA
1469  {
1470  int b = kbatch / numNeurons;
1471  int k = kbatch % numNeurons;
1472  MEM_GLOBAL float *activityBatch = activity + b * (nx + lt + rt) * (ny + up + dn) * nf;
1473  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1474  if (activityBatch[kex] > AMax) {
1475  activityBatch[kex] = AMax;
1476  }
1477  }
1478  }
1479  return PV_SUCCESS;
1480 }
1481 
1482 KERNEL
1483 int applyVThresh_ANNLayer_threshminmax(
1484  int nbatch,
1485  int numNeurons,
1486  MEM_GLOBAL float *V,
1487  float VThresh,
1488  float AMin,
1489  float AShift,
1490  float VWidth,
1491  MEM_GLOBAL float *activity,
1492  int nx,
1493  int ny,
1494  int nf,
1495  int lt,
1496  int rt,
1497  int dn,
1498  int up) {
1499  if (VThresh > -FLT_MAX) {
1500  int kbatch = 0;
1501 #ifndef PV_USE_CUDA
1502 #ifdef PV_USE_OPENMP_THREADS
1503 #pragma omp parallel for schedule(static)
1504 #endif
1505  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1506 #else
1507  kbatch = getIndex();
1508  ;
1509 #endif // PV_USE_CUDA
1510  {
1511  int b = kbatch / numNeurons;
1512  int k = kbatch % numNeurons;
1513  MEM_GLOBAL float *VBatch = V + b * numNeurons;
1514  MEM_GLOBAL float *activityBatch = activity + b * (nx + lt + rt) * (ny + up + dn) * nf;
1515  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1516  if (VBatch[k] < VThresh) {
1517  activityBatch[kex] = AMin;
1518  }
1519  else if (VBatch[k] < VThresh + VWidth) {
1520  activityBatch[kex] =
1521  AMin + (VThresh + VWidth - AShift - AMin) * (VBatch[k] - VThresh) / VWidth;
1522  }
1523  else {
1524  activityBatch[kex] -= AShift;
1525  }
1526  }
1527  }
1528  return PV_SUCCESS;
1529 }
1530 
1531 KERNEL
1532 int applyVThresh_ANNErrorLayer(
1533  int nbatch,
1534  int numNeurons,
1535  MEM_GLOBAL float *V,
1536  float AMin,
1537  float VThresh,
1538  float AShift,
1539  MEM_GLOBAL float *activity,
1540  int nx,
1541  int ny,
1542  int nf,
1543  int lt,
1544  int rt,
1545  int dn,
1546  int up) {
1547  if (VThresh > -FLT_MAX) {
1548  int kbatch = 0;
1549 #ifndef PV_USE_CUDA
1550 #ifdef PV_USE_OPENMP_THREADS
1551 #pragma omp parallel for schedule(static)
1552 #endif
1553  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1554 #else
1555  kbatch = getIndex();
1556 #endif // PV_USE_CUDA
1557  {
1558  int b = kbatch / numNeurons;
1559  int k = kbatch % numNeurons;
1560  MEM_GLOBAL float *VBatch = V + b * numNeurons;
1561  MEM_GLOBAL float *activityBatch = activity + b * (nx + lt + rt) * (ny + up + dn) * nf;
1562  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1563  if (fabsf(VBatch[k]) < VThresh)
1564  activityBatch[kex] = AMin;
1565  else
1566  activityBatch[kex] -= AShift;
1567  }
1568  }
1569  return PV_SUCCESS;
1570 }
1571 
1572 KERNEL
1573 int squareV_ANNSquaredLayer(int nbatch, int numNeurons, MEM_GLOBAL float *V) {
1574  int k;
1575 #ifndef PV_USE_CUDA
1576 #ifdef PV_USE_OPENMP_THREADS
1577 #pragma omp parallel for schedule(static)
1578 #endif
1579  for (k = 0; k < numNeurons * nbatch; k++)
1580 #else
1581  k = getIndex();
1582 #endif // PV_USE_CUDA
1583  {
1584  V[k] *= V[k];
1585  }
1586  return PV_SUCCESS;
1587 }
1588 
1589 KERNEL
1590 int updateSparsityTermDeriv_LogLatWTAGenLayer(
1591  int nbatch,
1592  int numNeurons,
1593  int num_features,
1594  MEM_GLOBAL float *V,
1595  MEM_GLOBAL float *sparsitytermderivative) {
1596 #ifndef PV_USE_CUDA
1597  int k;
1598 #ifdef PV_USE_OPENMP_THREADS
1599 #pragma omp parallel for
1600 #endif
1601  for (k = 0; k < numNeurons / num_features; k++) {
1602  int feature_start = k * num_features;
1603  float sum_across_features = 0.0f;
1604  int f;
1605  for (f = 0; f < num_features; f++) {
1606  sum_across_features += V[feature_start + f];
1607  }
1608  float lat_wta_expr = lateralCompetitionPenalty(&V[feature_start], num_features);
1609  for (f = 0; f < num_features; f++) {
1610  sparsitytermderivative[k * num_features + f] =
1611  2.0f * (sum_across_features - V[k * num_features + f]) / (1.0f + lat_wta_expr);
1612  }
1613  }
1614 #else // PV_USE_CUDA
1615  int k = getIndex();
1616  {
1617  int feature_start = k - (k % num_features);
1618  float sum_across_features = 0.0f;
1619  for (int f = 0; f < num_features; f++) {
1620  sum_across_features += V[feature_start + f];
1621  }
1622  float lat_wta_expr = lateralCompetitionPenalty(&V[feature_start], num_features);
1623  // Each block of num_features neurons will have the same sum_across_features and latWTAexpr.
1624  // Can we eliminate redundant calculations?
1625  sparsitytermderivative[k] = 2.0f * (sum_across_features - V[k]) / (1.0f + lat_wta_expr);
1626  }
1627 #endif // PV_USE_CUDA
1628 
1629  return PV_SUCCESS;
1630 }
1631 
1632 KERNEL
1633 float lateralCompetitionPenalty(MEM_GLOBAL float *V, int num_features) {
1634  float z = 0.0f;
1635 #ifdef PV_USE_OPENMP_THREADS
1636 #pragma omp parallel for
1637 #endif
1638  for (int p = 0; p < num_features; p++) {
1639  for (int q = 0; q < num_features; q++) {
1640  if (p != q) {
1641  z += V[p] * V[q];
1642  }
1643  }
1644  }
1645  return z;
1646 }
1647 
1648 KERNEL
1649 int setActivity_HyPerLayer(
1650  int nbatch,
1651  int numNeurons,
1652  MEM_GLOBAL float *A,
1653  MEM_GLOBAL float *V,
1654  int nx,
1655  int ny,
1656  int nf,
1657  int lt,
1658  int rt,
1659  int dn,
1660  int up) {
1661  int kbatch;
1662 #ifndef PV_USE_CUDA
1663 #ifdef PV_USE_OPENMP_THREADS
1664 #pragma omp parallel for schedule(static)
1665 #endif
1666  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1667 #else
1668  kbatch = getIndex();
1669 #endif // PV_USE_CUDA
1670  {
1671  int b = kbatch / numNeurons;
1672  int k = kbatch % numNeurons;
1673  MEM_GLOBAL float *ABatch = A + b * ((nx + lt + rt) * (ny + up + dn) * nf);
1674  MEM_GLOBAL float *VBatch = V + b * numNeurons;
1675  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1676  ABatch[kex] = VBatch[k];
1677  }
1678  return PV_SUCCESS;
1679 }
1680 
1681 KERNEL
1682 int setActivity_PtwiseLinearTransferLayer(
1683  int nbatch,
1684  int numNeurons,
1685  MEM_GLOBAL float *A,
1686  MEM_GLOBAL float *V,
1687  int nx,
1688  int ny,
1689  int nf,
1690  int lt,
1691  int rt,
1692  int dn,
1693  int up,
1694  int numVertices,
1695  float *verticesV,
1696  float *verticesA,
1697  float *slopes) {
1698  int kbatch;
1699  int last = numVertices - 1;
1700 #ifndef PV_USE_CUDA
1701 #ifdef PV_USE_OPENMP_THREADS
1702 #pragma omp parallel for
1703 #endif
1704  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1705 #else
1706  kbatch = getIndex();
1707 #endif // PV_USE_CUDA
1708  {
1709  int b = kbatch / numNeurons;
1710  int k = kbatch % numNeurons;
1711  float *VBatch = V + b * numNeurons;
1712  float *ABatch = A + b * (nx + lt + rt) * (ny + up + dn) * nf;
1713  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1714  int v;
1715  float potential = VBatch[k];
1716  float activity = 0.0f;
1717 
1718  if (potential < verticesV[0]) {
1719  activity = verticesA[0] + slopes[0] * (potential - verticesV[0]);
1720  }
1721  else if (potential >= verticesV[last]) {
1722  activity = verticesA[last] + slopes[numVertices] * (potential - verticesV[last]);
1723  }
1724  else {
1725  for (v = 0; v < last; v++) {
1726  if (potential < verticesV[v]) {
1727  break; // makes the jumps continuous from the right. TODO: allow user control over
1728  // value at jump
1729  }
1730  if (potential == verticesV[v]) {
1731  activity = verticesA[v];
1732  }
1733  else if (potential > verticesV[v] && potential < verticesV[v + 1]) {
1734  activity = verticesA[v] + slopes[v + 1] * (potential - verticesV[v]);
1735  }
1736  }
1737  }
1738  ABatch[kex] = activity;
1739  }
1740  return PV_SUCCESS;
1741 }
1742 
1743 KERNEL
1744 int setActivity_AccumulateLayer(
1745  int nbatch,
1746  int numNeurons,
1747  MEM_GLOBAL float *A,
1748  MEM_GLOBAL float *V,
1749  int nx,
1750  int ny,
1751  int nf,
1752  int lt,
1753  int rt,
1754  int dn,
1755  int up) {
1756  // static inline int setActivity_HyPerLayer(int numNeurons, float * A, float * V, int nx,
1757  // int ny, int nf, int lt, int rt, int dn, int up) {
1758  int kbatch;
1759 #ifndef PV_USE_CUDA
1760 #ifdef PV_USE_OPENMP_THREADS
1761 #pragma omp parallel for schedule(static)
1762 #endif
1763  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1764 #else
1765  kbatch = getIndex();
1766 #endif // PV_USE_CUDA
1767  {
1768  int b = kbatch / numNeurons;
1769  int k = kbatch % numNeurons;
1770  MEM_GLOBAL float *ABatch = A + b * ((nx + lt + rt) * (ny + up + dn) * nf);
1771  MEM_GLOBAL float *VBatch = V + b * numNeurons;
1772  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1773  ABatch[kex] += VBatch[k];
1774  }
1775  return PV_SUCCESS;
1776 }
1777 
1778 KERNEL
1779 int setActivity_GapLayer(
1780  int nbatch,
1781  int numNeurons,
1782  MEM_GLOBAL float *A,
1783  MEM_GLOBAL float *V,
1784  int nx,
1785  int ny,
1786  int nf,
1787  int lt,
1788  int rt,
1789  int dn,
1790  int up,
1791  int orig_lt,
1792  int orig_rt,
1793  int orig_dn,
1794  int orig_up,
1795  MEM_GLOBAL float *checkActive,
1796  float ampSpikelet) {
1797  int kbatch;
1798 #ifndef PV_USE_CUDA
1799 #ifdef PV_USE_OPENMP_THREADS
1800 #pragma omp parallel for schedule(static)
1801 #endif
1802  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1803 #else
1804  kbatch = getIndex();
1805 #endif // PV_USE_CUDA
1806  {
1807  int b = kbatch / numNeurons;
1808  int k = kbatch % numNeurons;
1809  MEM_GLOBAL float *ABatch = A + b * ((nx + lt + rt) * (ny + up + dn) * nf);
1810  MEM_GLOBAL float *VBatch = V + b * numNeurons;
1811  MEM_GLOBAL float *checkActiveBatch = checkActive + b * numNeurons;
1812  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1813  int kexorig = kIndexExtended(k, nx, ny, nf, orig_lt, orig_rt, orig_dn, orig_up);
1814  ABatch[kex] = VBatch[k];
1815  if (checkActiveBatch[kexorig] > 0.0f) {
1816  ABatch[kex] += ampSpikelet;
1817  }
1818  }
1819  return PV_SUCCESS;
1820 }
1821 
1822 KERNEL
1823 int setActivity_SigmoidLayer(
1824  int nbatch,
1825  int numNeurons,
1826  MEM_GLOBAL float *A,
1827  MEM_GLOBAL float *V,
1828  int nx,
1829  int ny,
1830  int nf,
1831  int lt,
1832  int rt,
1833  int dn,
1834  int up,
1835  float VthRest,
1836  float Vrest,
1837  float sigmoid_alpha,
1838  bool sigmoid_flag,
1839  bool inverse_flag,
1840  float dt) {
1841  float Vth = (VthRest + Vrest) / 2.0f;
1842  float sig_scale = -logf(1.0f / sigmoid_alpha - 1.0f) / (Vth - Vrest);
1843  if (!sigmoid_flag) {
1844  sig_scale = sig_scale / logf(3.0f);
1845  // If sigmoid_flag is off, A is a piecewise linear function of V, with slope of sig_scale/2 at
1846  // V=Vth, truncated to have minimum value 0 and maximum value 1.
1847  // The log(3) term makes alpha=1/4 have the slope such that V reaches 0 at Vrest, and V
1848  // reaches 1 at VthRest. Without it, that alpha happens at 0.26894...
1849  }
1850 
1851  int kbatch;
1852 #ifndef PV_USE_CUDA
1853 #ifdef PV_USE_OPENMP_THREADS
1854 #pragma omp parallel for schedule(static)
1855 #endif
1856  for (kbatch = 0; kbatch < numNeurons; kbatch++)
1857 #else
1858  kbatch = getIndex();
1859 #endif // PV_USE_CUDA
1860  {
1861  int b = kbatch / numNeurons;
1862  int k = kbatch % numNeurons;
1863  MEM_GLOBAL float *ABatch = A + b * ((nx + lt + rt) * (ny + up + dn) * nf);
1864  MEM_GLOBAL float *VBatch = V + b * numNeurons;
1865  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1866  float activity = 0.0f;
1867  if (!sigmoid_flag) {
1868  activity = 0.5f - (VBatch[k] - Vth) * sig_scale / 2.0f;
1869  activity = activity < 0.0f ? 0.0f : activity;
1870  activity = activity > 1.0f ? 1.0f : activity;
1871  }
1872  else {
1873  activity = 1.0f / (1.0f + expf(2.0f * (VBatch[k] - Vth) * sig_scale));
1874  }
1875  ABatch[kex] = activity;
1876  if (inverse_flag) {
1877  ABatch[kex] = 1.0f - ABatch[kex];
1878  }
1879  }
1880  return PV_SUCCESS;
1881 }
1882 
1883 KERNEL
1884 int resetGSynBuffers_HyPerLayer(
1885  int nbatch,
1886  int numNeurons,
1887  int num_channels,
1888  MEM_GLOBAL float *GSynHead) {
1889  for (int ch = 0; ch < num_channels; ch++) {
1890  MEM_GLOBAL float *channelStart = &GSynHead[ch * nbatch * numNeurons];
1891  int k;
1892 #ifndef PV_USE_CUDA
1893 #ifdef PV_USE_OPENMP_THREADS
1894 #pragma omp parallel for schedule(static)
1895 #endif
1896  for (k = 0; k < numNeurons * nbatch; k++)
1897 #else
1898  k = getIndex();
1899 #endif // PV_USE_CUDA
1900  {
1901  channelStart[k] = 0.0f;
1902  }
1903  }
1904  return PV_SUCCESS;
1905 }
1906 
1907 // TODO merge this with resetGSynBuffers_HyPerLayer with a template
1908 KERNEL
1909 int resetGSynBuffers_PoolingIndexLayer(
1910  int nbatch,
1911  int numNeurons,
1912  int num_channels,
1913  MEM_GLOBAL float *GSynHead) {
1914  for (int ch = 0; ch < num_channels; ch++) {
1915  MEM_GLOBAL float *channelStart = &GSynHead[ch * nbatch * numNeurons];
1916  int k;
1917 #ifndef PV_USE_CUDA
1918 #ifdef PV_USE_OPENMP_THREADS
1919 #pragma omp parallel for schedule(static)
1920 #endif
1921  for (k = 0; k < numNeurons * nbatch; k++)
1922 #else
1923  k = getIndex();
1924 #endif // PV_USE_CUDA
1925  {
1926  channelStart[k] = -1.0f;
1927  }
1928  }
1929  return PV_SUCCESS;
1930 }
1931 
1932 KERNEL
1933 int resetGSynBuffers_SigmoidLayer() {
1934  return PV_SUCCESS; // V is cloned from sourcelayer, so Sigmoid Layer doesn't use the GSynBuffers
1935 }
1936 
1937 KERNEL
1938 int setActivity_MLPSigmoidLayer(
1939  int nbatch,
1940  int numNeurons,
1941  MEM_GLOBAL float *A,
1942  MEM_GLOBAL float *V,
1943  float linear_alpha,
1944  bool *dropout_buf,
1945  int nx,
1946  int ny,
1947  int nf,
1948  int lt,
1949  int rt,
1950  int dn,
1951  int up,
1952  float dt) {
1953  int kbatch;
1954 
1955 #ifndef PV_USE_CUDA
1956 #ifdef PV_USE_OPENMP_THREADS
1957 #pragma omp parallel for
1958 #endif
1959  for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1960 #else
1961  kbatch = getIndex();
1962 #endif // !PV_USE_CUDA
1963  {
1964  int b = kbatch / numNeurons;
1965  int k = kbatch % numNeurons;
1966  float *VBatch = V + b * nx * ny * nf;
1967  float *ABatch = A + b * (nx + lt + rt) * (ny + up + dn) * nf;
1968  bool *dropoutBatch = dropout_buf + b * nx * ny * nf;
1969 
1970  int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1971  float activity = 1.7159f * tanhf(2.0f / 3.0f * VBatch[k]) + linear_alpha * VBatch[k];
1972  // Set explicitly to 0 if that neuron was dropped out
1973  ABatch[kex] = dropoutBatch[k] ? 0.0f : activity;
1974  }
1975  return PV_SUCCESS;
1976 }
1977 #endif /* UPDATESTATEFUNCTIONS_H_ */