9 #ifndef UPDATESTATEFUNCTIONS_H_ 10 #define UPDATESTATEFUNCTIONS_H_ 13 #include "../include/pv_types.h" 14 #include "../utils/conversions.h" 17 #include "../include/pv_common.h" 26 #define KERNEL __device__ 30 #define getIndex() (blockIdx.x * blockDim.x) + threadIdx.x 31 #include "../cudakernels/conversions.hcu" 34 #define CHANNEL_INHB 2 40 int applyGSyn_HyPerLayer1Channel(
44 MEM_GLOBAL
float *GSynHead);
46 int applyGSyn_HyPerLayer(
50 MEM_GLOBAL
float *GSynHead);
52 int applyGSyn_LabelErrorLayer(
56 MEM_GLOBAL
float *GSynHead,
67 int updateV_ANNLayer_vertices(
72 MEM_GLOBAL
float *GSynHead,
73 MEM_GLOBAL
float *activity,
86 #ifdef OBSOLETE // Marked obsolete Jun 30, 2017. Use updateV_ANNLayer_vertices 92 int updateV_PtwiseLinearTransferLayer(
97 MEM_GLOBAL
float *GSynHead,
98 MEM_GLOBAL
float *activity,
110 return updateV_ANNLayer_vertices(
129 #endif // OBSOLETE // Marked obsolete Jun 30, 2017. Use updateV_ANNLayer_vertices 132 int updateV_ANNLayer_threshminmax(
137 MEM_GLOBAL
float *GSynHead,
138 MEM_GLOBAL
float *activity,
152 int updateV_ANNErrorLayer(
156 MEM_GLOBAL
float *GSynHead,
157 MEM_GLOBAL
float *activity,
171 int applyGSyn_HyPerLCALayer(
175 MEM_GLOBAL
float *GSynHead,
176 MEM_GLOBAL
float *activity,
187 int applyGSyn_HyPerLCALayer2(
191 MEM_GLOBAL
float *GSynHead,
192 MEM_GLOBAL
float *activity,
204 int applyGSyn_ISTALayer(
208 MEM_GLOBAL
float *GSynHead,
209 MEM_GLOBAL
float *activity,
221 int applyGSyn_ISTALayer2(
225 MEM_GLOBAL
float *GSynHead,
226 MEM_GLOBAL
float *activity,
238 int applyGSyn_ANNWhitenedLayer(
242 MEM_GLOBAL
float *GSynHead);
245 int updateV_HyPerLCALayer(
250 MEM_GLOBAL
float *GSynHead,
251 MEM_GLOBAL
float *activity,
268 int updateV_MomentumLCALayer(
273 MEM_GLOBAL
float *GSynHead,
274 MEM_GLOBAL
float *activity,
275 MEM_GLOBAL
float *prevDrive,
282 float LCAMomentumRate,
293 int updateV_ISTALayer(
297 MEM_GLOBAL
float *GSynHead,
298 MEM_GLOBAL
float *activity,
300 MEM_GLOBAL
double *dtAdapt,
311 int updateV_ANNDivInh(
int nbatch,
int numNeurons, MEM_GLOBAL
float *V, MEM_GLOBAL
float *GSynHead);
313 int updateV_ANNSquaredLayer(
317 MEM_GLOBAL
float *GSynHead);
319 int updateV_PoolingANNLayer(
323 MEM_GLOBAL
float *GSynHead,
327 int updateV_PtwiseProductLayer(
331 MEM_GLOBAL
float *GSynHead);
333 int updateV_PtwiseQuotientLayer(
337 MEM_GLOBAL
float *GSynHead);
339 int updateV_SigmoidLayer();
341 int applyVMax_ANNLayer_threshminmax(
346 MEM_GLOBAL
float *activity,
355 int applyVThresh_ANNLayer_threshminmax(
363 MEM_GLOBAL
float *activity,
372 int applyVThresh_ANNErrorLayer(
379 MEM_GLOBAL
float *activity,
388 int squareV_ANNSquaredLayer(
int nbatch,
int numNeurons, MEM_GLOBAL
float *V);
390 int updateSparsityTermDeriv_LogLatWTAGenLayer(
395 MEM_GLOBAL
float *sparsitytermderivative);
397 float lateralCompetitionPenalty(MEM_GLOBAL
float *V,
int num_features);
400 int setActivity_HyPerLayer(
414 int setActivity_PtwiseLinearTransferLayer(
432 int setActivity_AccumulateLayer(
445 int setActivity_IncrementLayer(
450 MEM_GLOBAL
float *Vprev,
459 int setActivity_GapLayer(
475 MEM_GLOBAL
float *active,
479 int resetGSynBuffers_HyPerLayer(
483 MEM_GLOBAL
float *GSynHead);
485 int resetGSynBuffers_PoolingIndexLayer(
489 MEM_GLOBAL
float *GSynHead);
491 int resetGSynBuffers_SigmoidLayer();
495 int applyGSyn_HyPerLayer1Channel(
499 MEM_GLOBAL
float *GSynHead) {
502 MEM_GLOBAL
float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
504 #ifdef PV_USE_OPENMP_THREADS 505 #pragma omp parallel for schedule(static) 507 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
510 #endif // PV_USE_CUDA 512 int b = kbatch / numNeurons;
513 int k = kbatch % numNeurons;
514 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
515 MEM_GLOBAL
float *GSynExcBatch = GSynExc + b * numNeurons;
516 VBatch[k] = GSynExcBatch[k];
522 int applyGSyn_HyPerLayer(
526 MEM_GLOBAL
float *GSynHead) {
528 MEM_GLOBAL
float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
529 MEM_GLOBAL
float *GSynInh = &GSynHead[CHANNEL_INH * nbatch * numNeurons];
531 #ifdef PV_USE_OPENMP_THREADS 532 #pragma omp parallel for schedule(static) 534 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
537 #endif // PV_USE_CUDA 539 int b = kbatch / numNeurons;
540 int k = kbatch % numNeurons;
541 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
542 MEM_GLOBAL
float *GSynExcBatch = GSynExc + b * numNeurons;
543 MEM_GLOBAL
float *GSynInhBatch = GSynInh + b * numNeurons;
545 VBatch[k] = GSynExcBatch[k] - GSynInhBatch[k];
551 int applyGSyn_LabelErrorLayer(
555 MEM_GLOBAL
float *GSynHead,
565 MEM_GLOBAL
float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
566 MEM_GLOBAL
float *GSynInh = &GSynHead[CHANNEL_INH * nbatch * numNeurons];
570 #ifdef PV_USE_OPENMP_THREADS 571 #pragma omp parallel for schedule(static) 573 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
576 #endif // PV_USE_CUDA 578 int b = kbatch / numNeurons;
579 int k = kbatch % numNeurons;
580 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
581 MEM_GLOBAL
float *GSynExcBatch = GSynExc + b * numNeurons;
582 MEM_GLOBAL
float *GSynInhBatch = GSynInh + b * numNeurons;
583 VBatch[k] = GSynExcBatch[k] - GSynInhBatch[k];
584 if (GSynExcBatch[k] > 0.0f) {
585 VBatch[k] = VBatch[k] > 0.0f ? VBatch[k] : 0.0f;
588 VBatch[k] = VBatch[k] < 0.0f ? VBatch[k] : 0.0f;
594 #ifdef PV_USE_OPENMP_THREADS 595 #pragma omp parallel for schedule(static) 597 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
600 #endif // PV_USE_CUDA 602 int b = kbatch / numNeurons;
603 int k = kbatch % numNeurons;
604 float *VBatch = V + b * numNeurons;
605 float *GSynExcBatch = GSynExc + b * numNeurons;
606 float *GSynInhBatch = GSynInh + b * numNeurons;
611 int iF = featureIndex(k, nx + lt + rt, ny + dn + up, nf);
613 float maxTargetVal = GSynExcBatch[k];
616 for (
int iif = 1; iif < nf; iif++) {
617 if (GSynExcBatch[k + iif] > maxTargetVal) {
618 maxTargetVal = GSynExcBatch[k + iif];
624 if (maxTargetVal > 0 && GSynInhBatch[maxIdx] > maxTargetVal) {
625 ratio = maxTargetVal / GSynInhBatch[maxIdx];
632 VBatch[k] = GSynExcBatch[k] - (GSynInhBatch[k] * ratio);
634 if (GSynExcBatch[k] < 0.0f) {
635 VBatch[k] = VBatch[k] < 0.0f ? VBatch[k] : 0.0f;
643 int applyGSyn_HyPerLCALayer(
647 MEM_GLOBAL
float *GSynHead,
648 MEM_GLOBAL
float *activity,
660 MEM_GLOBAL
float *GSynError = &GSynHead[0 * nbatch * numNeurons];
662 #ifdef PV_USE_OPENMP_THREADS 663 #pragma omp parallel for schedule(static) 665 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
668 #endif // PV_USE_CUDA 670 int b = kbatch / numNeurons;
671 int k = kbatch % numNeurons;
672 float exp_tau = (float)exp(-dtAdapt[b] / (
double)tau);
673 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
674 MEM_GLOBAL
float *GSynErrorBatch = GSynError + b * numNeurons;
676 MEM_GLOBAL
float *activityBatch = activity + b * (nx + rt + lt) * (ny + up + dn) * nf;
677 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
678 VBatch[k] = exp_tau * VBatch[k]
679 + (1.0f - exp_tau) * (GSynErrorBatch[k] + selfInteract * activityBatch[kex]);
685 int applyGSyn_HyPerLCALayer2(
689 MEM_GLOBAL
float *GSynHead,
690 MEM_GLOBAL
float *activity,
702 MEM_GLOBAL
float *GSynError = &GSynHead[0 * nbatch * numNeurons];
703 MEM_GLOBAL
float *GSynError2 = &GSynHead[1 * nbatch * numNeurons];
706 #ifdef PV_USE_OPENMP_THREADS 707 #pragma omp parallel for schedule(static) 709 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
712 #endif // PV_USE_CUDA 714 int b = kbatch / numNeurons;
715 int k = kbatch % numNeurons;
717 float exp_tau = (float)exp(-dtAdapt[b] / (
double)tau);
718 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
719 MEM_GLOBAL
float *GSynErrorBatch = GSynError + b * numNeurons;
720 MEM_GLOBAL
float *GSynError2Batch = GSynError2 + b * numNeurons;
722 MEM_GLOBAL
float *activityBatch = activity + b * (nx + rt + lt) * (ny + up + dn) * nf;
724 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
725 VBatch[k] = exp_tau * VBatch[k]
726 + (1.0f - exp_tau) * (GSynErrorBatch[k] - GSynError2Batch[k]
727 + selfInteract * activityBatch[kex]);
733 int applyGSyn_MomentumLCALayer(
737 MEM_GLOBAL
float *GSynHead,
738 MEM_GLOBAL
float *activity,
739 MEM_GLOBAL
float *prevDrive,
742 float LCAMomentumRate,
752 MEM_GLOBAL
float *GSynError = &GSynHead[0 * nbatch * numNeurons];
754 #ifdef PV_USE_OPENMP_THREADS 755 #pragma omp parallel for schedule(static) 757 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
760 #endif // PV_USE_CUDA 762 int b = kbatch / numNeurons;
763 int k = kbatch % numNeurons;
764 float exp_tau = expf((
float)-dtAdapt[b] / tau);
765 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
766 MEM_GLOBAL
float *GSynErrorBatch = GSynError + b * numNeurons;
767 MEM_GLOBAL
float *prevDriveBatch = prevDrive + b * numNeurons;
769 MEM_GLOBAL
float *activityBatch = activity + b * (nx + rt + lt) * (ny + up + dn) * nf;
770 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
773 (1.0f - exp_tau) * (GSynErrorBatch[k] + selfInteract * activityBatch[kex]);
775 VBatch[k] = exp_tau * VBatch[k] + currentDrive + LCAMomentumRate * prevDriveBatch[k];
777 prevDriveBatch[k] = currentDrive;
783 int applyGSyn_MomentumLCALayer2(
787 MEM_GLOBAL
float *GSynHead,
788 MEM_GLOBAL
float *activity,
789 MEM_GLOBAL
float *prevDrive,
792 float LCAMomentumRate,
802 MEM_GLOBAL
float *GSynError = &GSynHead[0 * nbatch * numNeurons];
803 MEM_GLOBAL
float *GSynError2 = &GSynHead[1 * nbatch * numNeurons];
806 #ifdef PV_USE_OPENMP_THREADS 807 #pragma omp parallel for schedule(static) 809 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
812 #endif // PV_USE_CUDA 814 int b = kbatch / numNeurons;
815 int k = kbatch % numNeurons;
817 float exp_tau = expf((
float)-dtAdapt[b] / tau);
818 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
819 MEM_GLOBAL
float *GSynErrorBatch = GSynError + b * numNeurons;
820 MEM_GLOBAL
float *GSynError2Batch = GSynError2 + b * numNeurons;
821 MEM_GLOBAL
float *prevDriveBatch = prevDrive + b * numNeurons;
823 MEM_GLOBAL
float *activityBatch = activity + b * (nx + rt + lt) * (ny + up + dn) * nf;
825 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
827 float currentDrive = (1.0f - exp_tau) * ((GSynErrorBatch[k] - GSynError2Batch[k])
828 + selfInteract * activityBatch[kex]);
829 VBatch[k] = exp_tau * VBatch[k] + currentDrive + LCAMomentumRate * prevDriveBatch[k];
830 prevDriveBatch[k] = currentDrive;
836 int applyGSyn_ISTALayer(
840 MEM_GLOBAL
float *GSynHead,
841 MEM_GLOBAL
float *activity,
853 MEM_GLOBAL
float *GSynError = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
855 #ifdef PV_USE_OPENMP_THREADS 856 #pragma omp parallel for schedule(static) 858 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
861 #endif // PV_USE_CUDA 863 int b = kbatch / numNeurons;
864 int k = kbatch % numNeurons;
865 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
866 MEM_GLOBAL
float *GSynErrorBatch = GSynError + b * numNeurons;
868 MEM_GLOBAL
float *activityBatch = activity + b * (nx + rt + lt) * (ny + up + dn) * nf;
869 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
871 if (activityBatch[kex] != 0.0f) {
872 sign = activityBatch[kex] / fabsf(activityBatch[kex]);
874 VBatch[k] += ((float)dtAdapt[b] / tau) * (GSynErrorBatch[k] - (VThresh * sign));
880 int applyGSyn_ISTALayer2(
884 MEM_GLOBAL
float *GSynHead,
885 MEM_GLOBAL
float *activity,
897 MEM_GLOBAL
float *GSynError = &GSynHead[0 * nbatch * numNeurons];
898 MEM_GLOBAL
float *GSynError2 = &GSynHead[1 * nbatch * numNeurons];
901 #ifdef PV_USE_OPENMP_THREADS 902 #pragma omp parallel for schedule(static) 904 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
907 #endif // PV_USE_CUDA 909 int b = kbatch / numNeurons;
910 int k = kbatch % numNeurons;
912 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
913 MEM_GLOBAL
float *GSynErrorBatch = GSynError + b * numNeurons;
914 MEM_GLOBAL
float *GSynError2Batch = GSynError2 + b * numNeurons;
916 MEM_GLOBAL
float *activityBatch = activity + b * (nx + rt + lt) * (ny + up + dn) * nf;
918 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
920 if (activityBatch[kex] != 0.0f) {
921 sign = activityBatch[kex] / fabsf(activityBatch[kex]);
923 VBatch[k] += ((float)dtAdapt[b] / tau)
924 * ((GSynErrorBatch[k] - GSynError2Batch[k]) - (VThresh * sign));
930 int applyGSyn_ANNWhitenedLayer(
934 MEM_GLOBAL
float *GSynHead) {
936 MEM_GLOBAL
float *GSynInput = &GSynHead[0 * nbatch * numNeurons];
937 MEM_GLOBAL
float *GSynAveInput = &GSynHead[1 * nbatch * numNeurons];
938 MEM_GLOBAL
float *GSynAveSquaredInput = &GSynHead[2 * nbatch * numNeurons];
940 #ifdef PV_USE_OPENMP_THREADS 941 #pragma omp parallel for schedule(static) 943 for (k = 0; k < numNeurons * nbatch; k++)
946 #endif // PV_USE_CUDA 949 V[k] = (GSynInput[k] - GSynAveInput[k])
950 / (sqrtf(GSynAveSquaredInput[k] - GSynAveInput[k] * GSynAveInput[k]) + FLT_MIN);
956 int updateV_ANNLayer_vertices(
961 MEM_GLOBAL
float *GSynHead,
962 MEM_GLOBAL
float *activity,
974 int status = PV_SUCCESS;
975 if (num_channels == 1) {
976 status = applyGSyn_HyPerLayer1Channel(nbatch, numNeurons, V, GSynHead);
979 status = applyGSyn_HyPerLayer(nbatch, numNeurons, V, GSynHead);
981 if (status == PV_SUCCESS) {
982 status = setActivity_PtwiseLinearTransferLayer(
1003 int updateV_ANNLayer_threshminmax(
1006 MEM_GLOBAL
float *V,
1008 MEM_GLOBAL
float *GSynHead,
1009 MEM_GLOBAL
float *activity,
1022 if (num_channels == 1) {
1023 applyGSyn_HyPerLayer1Channel(nbatch, numNeurons, V, GSynHead);
1026 applyGSyn_HyPerLayer(nbatch, numNeurons, V, GSynHead);
1028 setActivity_HyPerLayer(nbatch, numNeurons, activity, V, nx, ny, nf, lt, rt, dn, up);
1029 applyVThresh_ANNLayer_threshminmax(
1045 applyVMax_ANNLayer_threshminmax(
1046 nbatch, numNeurons, V, AMax, activity, nx, ny, nf, lt, rt, dn, up);
1051 int updateV_HyPerLCALayer(
1055 MEM_GLOBAL
float *V,
1056 MEM_GLOBAL
float *GSynHead,
1057 MEM_GLOBAL
float *activity,
1072 int status = PV_SUCCESS;
1073 if (numChannels == 2) {
1074 if (status == PV_SUCCESS)
1075 status = applyGSyn_HyPerLCALayer2(
1092 else if (numChannels == 1) {
1093 if (status == PV_SUCCESS)
1094 status = applyGSyn_HyPerLCALayer(
1112 if (status == PV_SUCCESS) {
1113 status = setActivity_PtwiseLinearTransferLayer(
1134 int updateV_MomentumLCALayer(
1138 MEM_GLOBAL
float *V,
1139 MEM_GLOBAL
float *GSynHead,
1140 MEM_GLOBAL
float *activity,
1141 MEM_GLOBAL
float *prevDrive,
1148 float LCAMomentumRate,
1157 int status = PV_SUCCESS;
1158 if (numChannels == 2) {
1159 if (status == PV_SUCCESS)
1160 status = applyGSyn_MomentumLCALayer2(
1179 else if (numChannels == 1) {
1180 if (status == PV_SUCCESS)
1181 status = applyGSyn_MomentumLCALayer(
1201 if (status == PV_SUCCESS) {
1202 status = setActivity_PtwiseLinearTransferLayer(
1223 int updateV_ISTALayer(
1226 MEM_GLOBAL
float *V,
1227 MEM_GLOBAL
float *GSynHead,
1228 MEM_GLOBAL
float *activity,
1230 MEM_GLOBAL
double *dtAdapt,
1240 int status = PV_SUCCESS;
1241 if (numChannels == 2) {
1242 if (status == PV_SUCCESS)
1243 status = applyGSyn_ISTALayer2(
1260 else if (numChannels == 1) {
1261 if (status == PV_SUCCESS)
1262 status = applyGSyn_ISTALayer(
1279 if (status == PV_SUCCESS)
1280 status = setActivity_HyPerLayer(nbatch, numNeurons, activity, V, nx, ny, nf, lt, rt, dn, up);
1285 int updateV_ANNErrorLayer(
1288 MEM_GLOBAL
float *V,
1289 MEM_GLOBAL
float *GSynHead,
1290 MEM_GLOBAL
float *activity,
1304 status = applyGSyn_HyPerLayer(nbatch, numNeurons, V, GSynHead);
1305 #ifdef PV_USE_OPENMP_THREADS 1306 #pragma omp parallel for schedule(static) 1308 for (
int i = 0; i < numNeurons * nbatch; i++) {
1311 if (status == PV_SUCCESS) {
1312 status = setActivity_HyPerLayer(nbatch, numNeurons, activity, V, nx, ny, nf, lt, rt, dn, up);
1314 if (status == PV_SUCCESS) {
1315 status = setActivity_PtwiseLinearTransferLayer(
1336 int updateV_ANNDivInh(
int nbatch,
int numNeurons, MEM_GLOBAL
float *V, MEM_GLOBAL
float *GSynHead) {
1338 MEM_GLOBAL
float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
1339 MEM_GLOBAL
float *GSynInh = &GSynHead[CHANNEL_INH * nbatch * numNeurons];
1340 MEM_GLOBAL
float *GSynDivInh = &GSynHead[CHANNEL_INHB * nbatch * numNeurons];
1343 #ifdef PV_USE_OPENMP_THREADS 1344 #pragma omp parallel for schedule(static) 1346 for (k = 0; k < numNeurons * nbatch; k++)
1349 #endif // PV_USE_CUDA 1351 V[k] = (GSynExc[k] - GSynInh[k]) / (GSynDivInh[k] + 0.04f);
1357 int updateV_ANNSquaredLayer(
1360 MEM_GLOBAL
float *V,
1361 MEM_GLOBAL
float *GSynHead) {
1363 status = applyGSyn_HyPerLayer1Channel(nbatch, numNeurons, V, GSynHead);
1364 if (status == PV_SUCCESS)
1365 status = squareV_ANNSquaredLayer(nbatch, numNeurons, V);
1370 int updateV_PoolingANNLayer(
1373 MEM_GLOBAL
float *V,
1374 MEM_GLOBAL
float *GSynHead,
1378 MEM_GLOBAL
float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
1379 MEM_GLOBAL
float *GSynInh = &GSynHead[CHANNEL_INH * nbatch * numNeurons];
1381 #ifdef PV_USE_OPENMP_THREADS 1382 #pragma omp parallel for schedule(static) 1384 for (k = 0; k < numNeurons * nbatch; k++)
1387 #endif // PV_USE_CUDA 1389 V[k] = GSynExc[k] * GSynInh[k] * (biasa * GSynExc[k] + biasb * GSynInh[k]);
1395 int updateV_PtwiseProductLayer(
1398 MEM_GLOBAL
float *V,
1399 MEM_GLOBAL
float *GSynHead) {
1401 MEM_GLOBAL
float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
1402 MEM_GLOBAL
float *GSynInh = &GSynHead[CHANNEL_INH * nbatch * numNeurons];
1404 #ifdef PV_USE_OPENMP_THREADS 1405 #pragma omp parallel for schedule(static) 1407 for (k = 0; k < numNeurons * nbatch; k++)
1410 #endif // PV_USE_CUDA 1412 V[k] = GSynExc[k] * GSynInh[k];
1418 int updateV_PtwiseQuotientLayer(
1421 MEM_GLOBAL
float *V,
1422 MEM_GLOBAL
float *GSynHead) {
1424 MEM_GLOBAL
float *GSynExc = &GSynHead[CHANNEL_EXC * nbatch * numNeurons];
1425 MEM_GLOBAL
float *GSynInh = &GSynHead[CHANNEL_INH * nbatch * numNeurons];
1427 #ifdef PV_USE_OPENMP_THREADS 1428 #pragma omp parallel for schedule(static) 1430 for (k = 0; k < numNeurons * nbatch; k++)
1433 #endif // PV_USE_CUDA 1435 V[k] = GSynExc[k] / GSynInh[k];
1441 int updateV_SigmoidLayer() {
1446 int applyVMax_ANNLayer_threshminmax(
1449 MEM_GLOBAL
float *V,
1451 MEM_GLOBAL
float *activity,
1459 if (AMax < FLT_MAX) {
1462 #ifdef PV_USE_OPENMP_THREADS 1463 #pragma omp parallel for schedule(static) 1465 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1467 kbatch = getIndex();
1468 #endif // PV_USE_CUDA 1470 int b = kbatch / numNeurons;
1471 int k = kbatch % numNeurons;
1472 MEM_GLOBAL
float *activityBatch = activity + b * (nx + lt + rt) * (ny + up + dn) * nf;
1473 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1474 if (activityBatch[kex] > AMax) {
1475 activityBatch[kex] = AMax;
1483 int applyVThresh_ANNLayer_threshminmax(
1486 MEM_GLOBAL
float *V,
1491 MEM_GLOBAL
float *activity,
1499 if (VThresh > -FLT_MAX) {
1502 #ifdef PV_USE_OPENMP_THREADS 1503 #pragma omp parallel for schedule(static) 1505 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1507 kbatch = getIndex();
1509 #endif // PV_USE_CUDA 1511 int b = kbatch / numNeurons;
1512 int k = kbatch % numNeurons;
1513 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
1514 MEM_GLOBAL
float *activityBatch = activity + b * (nx + lt + rt) * (ny + up + dn) * nf;
1515 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1516 if (VBatch[k] < VThresh) {
1517 activityBatch[kex] = AMin;
1519 else if (VBatch[k] < VThresh + VWidth) {
1520 activityBatch[kex] =
1521 AMin + (VThresh + VWidth - AShift - AMin) * (VBatch[k] - VThresh) / VWidth;
1524 activityBatch[kex] -= AShift;
1532 int applyVThresh_ANNErrorLayer(
1535 MEM_GLOBAL
float *V,
1539 MEM_GLOBAL
float *activity,
1547 if (VThresh > -FLT_MAX) {
1550 #ifdef PV_USE_OPENMP_THREADS 1551 #pragma omp parallel for schedule(static) 1553 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1555 kbatch = getIndex();
1556 #endif // PV_USE_CUDA 1558 int b = kbatch / numNeurons;
1559 int k = kbatch % numNeurons;
1560 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
1561 MEM_GLOBAL
float *activityBatch = activity + b * (nx + lt + rt) * (ny + up + dn) * nf;
1562 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1563 if (fabsf(VBatch[k]) < VThresh)
1564 activityBatch[kex] = AMin;
1566 activityBatch[kex] -= AShift;
1573 int squareV_ANNSquaredLayer(
int nbatch,
int numNeurons, MEM_GLOBAL
float *V) {
1576 #ifdef PV_USE_OPENMP_THREADS 1577 #pragma omp parallel for schedule(static) 1579 for (k = 0; k < numNeurons * nbatch; k++)
1582 #endif // PV_USE_CUDA 1590 int updateSparsityTermDeriv_LogLatWTAGenLayer(
1594 MEM_GLOBAL
float *V,
1595 MEM_GLOBAL
float *sparsitytermderivative) {
1598 #ifdef PV_USE_OPENMP_THREADS 1599 #pragma omp parallel for 1601 for (k = 0; k < numNeurons / num_features; k++) {
1602 int feature_start = k * num_features;
1603 float sum_across_features = 0.0f;
1605 for (f = 0; f < num_features; f++) {
1606 sum_across_features += V[feature_start + f];
1608 float lat_wta_expr = lateralCompetitionPenalty(&V[feature_start], num_features);
1609 for (f = 0; f < num_features; f++) {
1610 sparsitytermderivative[k * num_features + f] =
1611 2.0f * (sum_across_features - V[k * num_features + f]) / (1.0f + lat_wta_expr);
1614 #else // PV_USE_CUDA 1617 int feature_start = k - (k % num_features);
1618 float sum_across_features = 0.0f;
1619 for (
int f = 0; f < num_features; f++) {
1620 sum_across_features += V[feature_start + f];
1622 float lat_wta_expr = lateralCompetitionPenalty(&V[feature_start], num_features);
1625 sparsitytermderivative[k] = 2.0f * (sum_across_features - V[k]) / (1.0f + lat_wta_expr);
1627 #endif // PV_USE_CUDA 1633 float lateralCompetitionPenalty(MEM_GLOBAL
float *V,
int num_features) {
1635 #ifdef PV_USE_OPENMP_THREADS 1636 #pragma omp parallel for 1638 for (
int p = 0; p < num_features; p++) {
1639 for (
int q = 0; q < num_features; q++) {
1649 int setActivity_HyPerLayer(
1652 MEM_GLOBAL
float *A,
1653 MEM_GLOBAL
float *V,
1663 #ifdef PV_USE_OPENMP_THREADS 1664 #pragma omp parallel for schedule(static) 1666 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1668 kbatch = getIndex();
1669 #endif // PV_USE_CUDA 1671 int b = kbatch / numNeurons;
1672 int k = kbatch % numNeurons;
1673 MEM_GLOBAL
float *ABatch = A + b * ((nx + lt + rt) * (ny + up + dn) * nf);
1674 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
1675 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1676 ABatch[kex] = VBatch[k];
1682 int setActivity_PtwiseLinearTransferLayer(
1685 MEM_GLOBAL
float *A,
1686 MEM_GLOBAL
float *V,
1699 int last = numVertices - 1;
1701 #ifdef PV_USE_OPENMP_THREADS 1702 #pragma omp parallel for 1704 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1706 kbatch = getIndex();
1707 #endif // PV_USE_CUDA 1709 int b = kbatch / numNeurons;
1710 int k = kbatch % numNeurons;
1711 float *VBatch = V + b * numNeurons;
1712 float *ABatch = A + b * (nx + lt + rt) * (ny + up + dn) * nf;
1713 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1715 float potential = VBatch[k];
1716 float activity = 0.0f;
1718 if (potential < verticesV[0]) {
1719 activity = verticesA[0] + slopes[0] * (potential - verticesV[0]);
1721 else if (potential >= verticesV[last]) {
1722 activity = verticesA[last] + slopes[numVertices] * (potential - verticesV[last]);
1725 for (v = 0; v < last; v++) {
1726 if (potential < verticesV[v]) {
1730 if (potential == verticesV[v]) {
1731 activity = verticesA[v];
1733 else if (potential > verticesV[v] && potential < verticesV[v + 1]) {
1734 activity = verticesA[v] + slopes[v + 1] * (potential - verticesV[v]);
1738 ABatch[kex] = activity;
1744 int setActivity_AccumulateLayer(
1747 MEM_GLOBAL
float *A,
1748 MEM_GLOBAL
float *V,
1760 #ifdef PV_USE_OPENMP_THREADS 1761 #pragma omp parallel for schedule(static) 1763 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1765 kbatch = getIndex();
1766 #endif // PV_USE_CUDA 1768 int b = kbatch / numNeurons;
1769 int k = kbatch % numNeurons;
1770 MEM_GLOBAL
float *ABatch = A + b * ((nx + lt + rt) * (ny + up + dn) * nf);
1771 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
1772 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1773 ABatch[kex] += VBatch[k];
1779 int setActivity_GapLayer(
1782 MEM_GLOBAL
float *A,
1783 MEM_GLOBAL
float *V,
1795 MEM_GLOBAL
float *checkActive,
1796 float ampSpikelet) {
1799 #ifdef PV_USE_OPENMP_THREADS 1800 #pragma omp parallel for schedule(static) 1802 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1804 kbatch = getIndex();
1805 #endif // PV_USE_CUDA 1807 int b = kbatch / numNeurons;
1808 int k = kbatch % numNeurons;
1809 MEM_GLOBAL
float *ABatch = A + b * ((nx + lt + rt) * (ny + up + dn) * nf);
1810 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
1811 MEM_GLOBAL
float *checkActiveBatch = checkActive + b * numNeurons;
1812 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1813 int kexorig = kIndexExtended(k, nx, ny, nf, orig_lt, orig_rt, orig_dn, orig_up);
1814 ABatch[kex] = VBatch[k];
1815 if (checkActiveBatch[kexorig] > 0.0f) {
1816 ABatch[kex] += ampSpikelet;
1823 int setActivity_SigmoidLayer(
1826 MEM_GLOBAL
float *A,
1827 MEM_GLOBAL
float *V,
1837 float sigmoid_alpha,
1841 float Vth = (VthRest + Vrest) / 2.0f;
1842 float sig_scale = -logf(1.0f / sigmoid_alpha - 1.0f) / (Vth - Vrest);
1843 if (!sigmoid_flag) {
1844 sig_scale = sig_scale / logf(3.0f);
1853 #ifdef PV_USE_OPENMP_THREADS 1854 #pragma omp parallel for schedule(static) 1856 for (kbatch = 0; kbatch < numNeurons; kbatch++)
1858 kbatch = getIndex();
1859 #endif // PV_USE_CUDA 1861 int b = kbatch / numNeurons;
1862 int k = kbatch % numNeurons;
1863 MEM_GLOBAL
float *ABatch = A + b * ((nx + lt + rt) * (ny + up + dn) * nf);
1864 MEM_GLOBAL
float *VBatch = V + b * numNeurons;
1865 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1866 float activity = 0.0f;
1867 if (!sigmoid_flag) {
1868 activity = 0.5f - (VBatch[k] - Vth) * sig_scale / 2.0f;
1869 activity = activity < 0.0f ? 0.0f : activity;
1870 activity = activity > 1.0f ? 1.0f : activity;
1873 activity = 1.0f / (1.0f + expf(2.0f * (VBatch[k] - Vth) * sig_scale));
1875 ABatch[kex] = activity;
1877 ABatch[kex] = 1.0f - ABatch[kex];
1884 int resetGSynBuffers_HyPerLayer(
1888 MEM_GLOBAL
float *GSynHead) {
1889 for (
int ch = 0; ch < num_channels; ch++) {
1890 MEM_GLOBAL
float *channelStart = &GSynHead[ch * nbatch * numNeurons];
1893 #ifdef PV_USE_OPENMP_THREADS 1894 #pragma omp parallel for schedule(static) 1896 for (k = 0; k < numNeurons * nbatch; k++)
1899 #endif // PV_USE_CUDA 1901 channelStart[k] = 0.0f;
1909 int resetGSynBuffers_PoolingIndexLayer(
1913 MEM_GLOBAL
float *GSynHead) {
1914 for (
int ch = 0; ch < num_channels; ch++) {
1915 MEM_GLOBAL
float *channelStart = &GSynHead[ch * nbatch * numNeurons];
1918 #ifdef PV_USE_OPENMP_THREADS 1919 #pragma omp parallel for schedule(static) 1921 for (k = 0; k < numNeurons * nbatch; k++)
1924 #endif // PV_USE_CUDA 1926 channelStart[k] = -1.0f;
1933 int resetGSynBuffers_SigmoidLayer() {
1938 int setActivity_MLPSigmoidLayer(
1941 MEM_GLOBAL
float *A,
1942 MEM_GLOBAL
float *V,
1956 #ifdef PV_USE_OPENMP_THREADS 1957 #pragma omp parallel for 1959 for (kbatch = 0; kbatch < numNeurons * nbatch; kbatch++)
1961 kbatch = getIndex();
1962 #endif // !PV_USE_CUDA 1964 int b = kbatch / numNeurons;
1965 int k = kbatch % numNeurons;
1966 float *VBatch = V + b * nx * ny * nf;
1967 float *ABatch = A + b * (nx + lt + rt) * (ny + up + dn) * nf;
1968 bool *dropoutBatch = dropout_buf + b * nx * ny * nf;
1970 int kex = kIndexExtended(k, nx, ny, nf, lt, rt, dn, up);
1971 float activity = 1.7159f * tanhf(2.0f / 3.0f * VBatch[k]) + linear_alpha * VBatch[k];
1973 ABatch[kex] = dropoutBatch[k] ? 0.0f : activity;