int LCAConn::update_dW(int axonId) { // compute dW but don't add them to the weights yet. // That takes place in reduceKernels, so that the output is // independent of the number of processors. int nExt = preSynapticLayer()->getNumExtended(); int numKernelIndices = getNumDataPatches(); const pvdata_t * preactbuf = preSynapticLayer()->getLayerData(getDelay(axonId)); const pvdata_t * postactbuf = postSynapticLayer()->getLayerData(getDelay(axonId)); int sya = (post->getLayerLoc()->nf * (post->getLayerLoc()->nx + 2*post->getLayerLoc()->nb)); for(int kExt=0; kExt<nExt;kExt++) { PVPatch * weights = getWeights(kExt,axonId); size_t offset = getAPostOffset(kExt, axonId); pvdata_t preact = preactbuf[kExt]; int ny = weights->ny; int nk = weights->nx * nfp; const pvdata_t * postactRef = &postactbuf[offset]; pvdata_t * dwdata = get_dwData(axonId, kExt); int lineoffsetw = 0; int lineoffseta = 0; for( int y=0; y<ny; y++ ) { for( int k=0; k<nk; k++ ) { dwdata[lineoffsetw + k] += updateRule_dW(preact, postactRef[lineoffseta+k],lineoffseta+k); } lineoffsetw += syp; lineoffseta += sya; } } // Divide by (numNeurons/numKernels) int divisor = pre->getNumNeurons()/numKernelIndices; assert( divisor*numKernelIndices == pre->getNumNeurons() ); for( int kernelindex=0; kernelindex<numKernelIndices; kernelindex++ ) { int numpatchitems = nxp*nyp*nfp; pvdata_t * dwpatchdata = get_dwDataHead(axonId,kernelindex); for( int n=0; n<numpatchitems; n++ ) { dwpatchdata[n] /= divisor; } } lastUpdateTime = parent->simulationTime(); return PV_SUCCESS; }
int MomentumConn::applyMomentum(int arbor_ID){ int nExt = preSynapticLayer()->getNumExtended(); const PVLayerLoc * loc = preSynapticLayer()->getLayerLoc(); if(sharedWeights){ int numKernels = getNumDataPatches(); //Shared weights done in parallel, parallel in numkernels #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int kernelIdx = 0; kernelIdx < numKernels; kernelIdx++){ pvwdata_t * dwdata_start = get_dwDataHead(arbor_ID, kernelIdx); pvwdata_t * prev_dw_start = get_prev_dwDataHead(arbor_ID, kernelIdx); pvwdata_t * wdata_start = get_wDataHead(arbor_ID, kernelIdx); if(!strcmp(momentumMethod, "simple")){ for(int k = 0; k < nxp*nyp*nfp; k++){ dwdata_start[k] += momentumTau * prev_dw_start[k] - momentumDecay*wdata_start[k]; } } else if(!strcmp(momentumMethod, "viscosity")){ for(int k = 0; k < nxp*nyp*nfp; k++){ //dwdata_start[k] = momentumTau * (prev_dw_start[k] + dwdata_start[k]) * (1 - exp(-1.0/ momentumTau)) - momentumDecay*wdata_start[k]; dwdata_start[k] = (prev_dw_start[k] * exp(-1.0/ momentumTau)) + dwdata_start[k] - momentumDecay*wdata_start[k]; } } else if(!strcmp(momentumMethod, "alex")){ for(int k = 0; k < nxp*nyp*nfp; k++){ //weight_inc[i] := momW * weight_inc[i-1] - wc * epsW * weights[i-1] + epsW * weight_grads[i] // weights[i] := weights[i-1] + weight_inc[i] dwdata_start[k] = momentumTau * prev_dw_start[k] - momentumDecay * getDWMax()* wdata_start[k] + dwdata_start[k]; } } } } else{ std::cout << "Warning: Momentum not implemented for non-shared weights, not implementing momentum\n"; } return PV_SUCCESS; }
int PoolingConn::deliverPostsynapticPerspective(PVLayerCube const * activity, int arborID) { //Check channel number for noupdate if(getChannel() == CHANNEL_NOUPDATE) { return PV_SUCCESS; } assert(post->getChannel(getChannel())); assert(arborID >= 0); //Get number of neurons restricted target const int numPostRestricted = post->getNumNeurons(); float dt_factor = getConvertToRateDeltaTimeFactor(); const PVLayerLoc * sourceLoc = preSynapticLayer()->getLayerLoc(); const PVLayerLoc * targetLoc = post->getLayerLoc(); const int sourceNx = sourceLoc->nx; const int sourceNy = sourceLoc->ny; const int sourceNf = sourceLoc->nf; const int targetNx = targetLoc->nx; const int targetNy = targetLoc->ny; const int targetNf = targetLoc->nf; const PVHalo * sourceHalo = &sourceLoc->halo; const PVHalo * targetHalo = &targetLoc->halo; //get source layer's extended y stride int sy = (sourceNx+sourceHalo->lt+sourceHalo->rt)*sourceNf; //The start of the gsyn buffer pvdata_t * gSynPatchHead = post->getChannel(this->getChannel()); clearGateIdxBuffer(); int* gatePatchHead = NULL; if(needPostIndexLayer) { gatePatchHead = postIndexLayer->getChannel(CHANNEL_EXC); } long * startSourceExtBuf = getPostToPreActivity(); if(!startSourceExtBuf) { std::cout << "HyPerLayer::recvFromPost error getting preToPostActivity from connection. Is shrink_patches on?\n"; exit(EXIT_FAILURE); } float resetVal = 0; if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) { resetVal = -INFINITY; } for(int b = 0; b < parent->getNBatch(); b++) { #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for (int kTargetRes = 0; kTargetRes < numPostRestricted; kTargetRes++) { pvdata_t * activityBatch = activity->data + b * (sourceNx + sourceHalo->rt + sourceHalo->lt) * (sourceNy + sourceHalo->up + sourceHalo->dn) * sourceNf; pvdata_t * gSynPatchHeadBatch = gSynPatchHead + b * targetNx * targetNy * targetNf; //Change restricted to extended post neuron int kTargetExt = kIndexExtended(kTargetRes, targetNx, targetNy, targetNf, targetHalo->lt, targetHalo->rt, targetHalo->dn, targetHalo->up); //Read from buffer long startSourceExt = startSourceExtBuf[kTargetRes]; //Calculate target's start of gsyn pvdata_t * gSynPatchPos = gSynPatchHeadBatch + kTargetRes; //Initialize patch as a huge negative number *gSynPatchPos = resetVal; int* gatePatchPos = NULL; if(needPostIndexLayer) { gatePatchPos = gatePatchHead + b * postIndexLayer->getNumNeurons() + kTargetRes; //Initialize gatePatchPos as a negative number *gatePatchPos = -1; } float* activityStartBuf = &(activityBatch[startSourceExt]); pvwdata_t * weightY = NULL; //No weights in pooling int sf = postConn->fPatchSize(); int yPatchSize = postConn->yPatchSize(); int numPerStride = postConn->xPatchSize() * postConn->fPatchSize(); const PVLayerLoc * postLoc = post->getLayerLoc(); const int kfPost = featureIndex(kTargetExt, postLoc->nx + postLoc->halo.lt + postLoc->halo.rt, postLoc->ny + postLoc->halo.dn + postLoc->halo.up, postLoc->nf); int offset = kfPost; pvwdata_t w = 1.0; if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING) { float relative_XScale = pow(2, (post->getXScale() - pre->getXScale())); float relative_YScale = pow(2, (post->getYScale() - pre->getYScale())); w = 1.0/(nxp*nyp*relative_XScale*relative_YScale); } for (int ky = 0; ky < yPatchSize; ky++) { int kPreExt = startSourceExt + ky*sy+offset; const int kxPreExt = kxPos(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf); const int kyPreExt = kyPos(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf); const int kfPre = featureIndex(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf); const int kxPreGlobalExt = kxPreExt + sourceLoc->kx0; const int kyPreGlobalExt = kyPreExt + sourceLoc->ky0; const int kPreGlobalExt = kIndex(kxPreGlobalExt, kyPreGlobalExt, kfPre, sourceLoc->nxGlobal + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->nyGlobal + sourceLoc->halo.up + sourceLoc->halo.dn, sourceLoc->nf); float * activityY = &(activityStartBuf[ky*sy+offset]); (accumulateFunctionFromPostPointer)(kPreGlobalExt, numPerStride, gSynPatchPos, activityY, &w, dt_factor, gatePatchPos, sf); } } } return PV_SUCCESS; }
int PoolingConn::deliverPresynapticPerspective(PVLayerCube const * activity, int arborID) { //Check if we need to update based on connection's channel if(getChannel() == CHANNEL_NOUPDATE) { return PV_SUCCESS; } assert(post->getChannel(getChannel())); float dt_factor; if (getPvpatchAccumulateType()==ACCUMULATE_STOCHASTIC) { dt_factor = getParent()->getDeltaTime(); } else { dt_factor = getConvertToRateDeltaTimeFactor(); } const PVLayerLoc * preLoc = preSynapticLayer()->getLayerLoc(); const PVLayerLoc * postLoc = postSynapticLayer()->getLayerLoc(); assert(arborID >= 0); const int numExtended = activity->numItems; float resetVal = 0; if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) { resetVal = -INFINITY; float* gSyn = post->getChannel(getChannel()); //gSyn is res #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int i = 0; i < post->getNumNeuronsAllBatches(); i++) { gSyn[i] = resetVal; } } clearGateIdxBuffer(); for(int b = 0; b < parent->getNBatch(); b++) { pvdata_t * activityBatch = activity->data + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf; pvdata_t * gSynPatchHeadBatch = post->getChannel(getChannel()) + b * postLoc->nx * postLoc->ny * postLoc->nf; int* gatePatchHeadBatch = NULL; if(needPostIndexLayer) { gatePatchHeadBatch = postIndexLayer->getChannel(CHANNEL_EXC) + b * postIndexLayer->getNumNeurons(); } unsigned int * activeIndicesBatch = NULL; if(activity->isSparse) { activeIndicesBatch = activity->activeIndices + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf; } int numLoop; if(activity->isSparse) { numLoop = activity->numActive[b]; } else { numLoop = numExtended; } if(thread_gateIdxBuffer) { #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int i = 0; i < parent->getNumThreads() * post->getNumNeurons(); i++) { int ti = i/post->getNumNeurons(); int ni = i % post->getNumNeurons(); thread_gateIdxBuffer[ti][ni] = -1; } } #ifdef PV_USE_OPENMP_THREADS //Clear all gsyn buffers if(thread_gSyn) { int numNeurons = post->getNumNeurons(); #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int i = 0; i < parent->getNumThreads() * numNeurons; i++) { int ti = i/numNeurons; int ni = i % numNeurons; thread_gSyn[ti][ni] = resetVal; } } #endif // PV_USE_OPENMP_THREADS #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for schedule(static) #endif for (int loopIndex = 0; loopIndex < numLoop; loopIndex++) { int kPreExt; if(activity->isSparse) { kPreExt = activeIndicesBatch[loopIndex]; } else { kPreExt = loopIndex; } float a = activityBatch[kPreExt] * dt_factor; //if (a == 0.0f) continue; //If we're using thread_gSyn, set this here pvdata_t * gSynPatchHead; //float * gatePatchHead = NULL; int * gatePatchHead = NULL; #ifdef PV_USE_OPENMP_THREADS if(thread_gSyn) { int ti = omp_get_thread_num(); gSynPatchHead = thread_gSyn[ti]; } else { gSynPatchHead = gSynPatchHeadBatch; } if(needPostIndexLayer) { if(thread_gateIdxBuffer) { int ti = omp_get_thread_num(); gatePatchHead = thread_gateIdxBuffer[ti]; } else { gatePatchHead = gatePatchHeadBatch; } } #else // PV_USE_OPENMP_THREADS gSynPatchHead = gSynPatchHeadBatch; if(needPostIndexLayer) { gatePatchHead = gatePatchHeadBatch; } #endif // PV_USE_OPENMP_THREADS //deliverOnePreNeuronActivity(kPreExt, arborID, a, gSynPatchHead, gatePatchHead); PVPatch * weights = getWeights(kPreExt, arborID); const int nk = weights->nx * fPatchSize(); const int ny = weights->ny; const int sy = getPostNonextStrides()->sy; // stride in layer pvwdata_t * weightDataStart = NULL; pvgsyndata_t * postPatchStart = gSynPatchHead + getGSynPatchStart(kPreExt, arborID); int* postGatePatchStart = gatePatchHead + getGSynPatchStart(kPreExt, arborID); //float* postGatePatchStart = gatePatchHead + getGSynPatchStart(kPreExt, arborID); const int kxPreExt = kxPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kyPreExt = kyPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kfPre = featureIndex(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kxPreGlobalExt = kxPreExt + preLoc->kx0; const int kyPreGlobalExt = kyPreExt + preLoc->ky0; const int kPreGlobalExt = kIndex(kxPreGlobalExt, kyPreGlobalExt, kfPre, preLoc->nxGlobal + preLoc->halo.lt + preLoc->halo.rt, preLoc->nyGlobal + preLoc->halo.up + preLoc->halo.dn, preLoc->nf); int offset = kfPre; int sf = fPatchSize(); pvwdata_t w = 1.0; if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING) { float relative_XScale = pow(2, (post->getXScale() - pre->getXScale())); float relative_YScale = pow(2, (post->getYScale() - pre->getYScale())); w = 1.0/(nxp*nyp*relative_XScale*relative_YScale); } void* auxPtr = NULL; for (int y = 0; y < ny; y++) { if(needPostIndexLayer) { auxPtr = (postGatePatchStart+ y*sy + offset); } (accumulateFunctionPointer)(kPreGlobalExt, nk, postPatchStart + y*sy + offset, a, &w, auxPtr, sf); } } #ifdef PV_USE_OPENMP_THREADS //Accumulate back into gSyn // Should this be done in HyPerLayer where it can be done once, as opposed to once per connection? if(thread_gSyn) { pvdata_t * gSynPatchHead = gSynPatchHeadBatch; //float* gateIdxBuffer = postIndexLayer->getChannel(CHANNEL_EXC); int * gateIdxBuffer = NULL; if(needPostIndexLayer && thread_gateIdxBuffer) { gateIdxBuffer = gatePatchHeadBatch; } int numNeurons = post->getNumNeurons(); //Looping over neurons first to be thread safe #pragma omp parallel for for(int ni = 0; ni < numNeurons; ni++) { //Different for maxpooling if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) { for(int ti = 0; ti < parent->getNumThreads(); ti++) { if(gSynPatchHead[ni] < thread_gSyn[ti][ni]) { gSynPatchHead[ni] = thread_gSyn[ti][ni]; if(needPostIndexLayer && thread_gateIdxBuffer) { gateIdxBuffer[ni] = thread_gateIdxBuffer[ti][ni]; assert(gateIdxBuffer >= 0); } } } } else { for(int ti = 0; ti < parent->getNumThreads(); ti++) { gSynPatchHead[ni] += thread_gSyn[ti][ni]; } } } } #endif } if(activity->isSparse) { pvdata_t * gSyn = post->getChannel(getChannel()); for (int k=0; k<post->getNumNeuronsAllBatches(); k++) { if (gSyn[k]==-INFINITY) { gSyn[k] = 0.0f; } } } return PV_SUCCESS; }
int LCALIFLateralKernelConn::update_dW(int axonId) { if (parent->simulationTime() < dWUpdateTime) { return PV_SUCCESS; } dWUpdateTime += dWUpdatePeriod; int nExt = preSynapticLayer()->getNumExtended(); int numKernelIndices = getNumDataPatches(); updateIntegratedSpikeCount(); float target_rate_sq = getTargetRateKHz()*getTargetRateKHz(); const pvdata_t * preactbuf = integratedSpikeCount; const pvdata_t * postactbuf = integratedSpikeCount; int sya = (post->getLayerLoc()->nf * (post->getLayerLoc()->nx + post->getLayerLoc()->halo.lt + post->getLayerLoc()->halo.rt)); const PVLayerLoc * preloc = pre->getLayerLoc(); int nxpre = preloc->nx; int nypre = preloc->ny; int nfpre = preloc->nf; int nxglob = preloc->nxGlobal; int nyglob = preloc->nyGlobal; int kx0 = preloc->kx0; int ky0 = preloc->ky0; for(int kExt=0; kExt<nExt;kExt++) { int xglob = kxPos(kExt, nxpre + preloc->halo.lt + preloc->halo.rt, nypre + preloc->halo.dn + preloc->halo.up, nfpre) + kx0 - preloc->halo.lt; int yglob = kyPos(kExt, nxpre + preloc->halo.lt + preloc->halo.rt, nypre + preloc->halo.dn + preloc->halo.up, nfpre) + ky0 - preloc->halo.dn; if (xglob < 0 || xglob >= nxglob || yglob < 0 || yglob >= nyglob) { continue; } PVPatch * weights = getWeights(kExt,axonId); size_t offset = getAPostOffset(kExt, axonId); pvdata_t preactrate = preactbuf[kExt]/integrationTimeConstant; int ny = weights->ny; int nk = weights->nx * nfp; pvwdata_t * dwdata = get_dwData(axonId, kExt); int lineoffsetw = 0; int lineoffseta = 0; for( int y=0; y<ny; y++ ) { for( int k=0; k<nk; k++ ) { int postactindex = offset+lineoffseta+k; if (postactindex != kExt) { // Neurons don't inhibit themselves pvdata_t postactrate = postactbuf[postactindex]/integrationTimeConstant; pvdata_t dw = preactrate*postactrate-target_rate_sq; dwdata[lineoffsetw + k] += dw; } } lineoffsetw += syp; lineoffseta += sya; } } // Divide each dw by the number of correlations that contributed to that dw (divisorptr was summed over all MPI processes in initialization). // Also divide by target_rate_sq to normalize to a dimensionless quantity. // The nonlinear filter and the multiplication by dt/tauINH takes place in updateWeights, because the filter has to be applied after reduceKernels // and the multiplication by dt/tauINH needs to take place after the filter. int patch_size = nxp*nyp*nfp; for( int kernelindex=0; kernelindex<numKernelIndices; kernelindex++ ) { pvwdata_t * dwpatchdata = get_dwDataHead(axonId,kernelindex); float * divisorptr = &interiorCounts[axonId][kernelindex*patch_size]; for( int n=0; n<patch_size; n++ ) { assert(divisorptr[n]>0 || dwpatchdata[n]==0); if (divisorptr[n]>0) dwpatchdata[n] /= target_rate_sq * divisorptr[n]; } } lastUpdateTime = parent->simulationTime(); return PV_SUCCESS; }
int TransposePoolingConn::deliverPresynapticPerspective(PVLayerCube const * activity, int arborID) { //Check if we need to update based on connection's channel if(getChannel() == CHANNEL_NOUPDATE){ return PV_SUCCESS; } assert(post->getChannel(getChannel())); const PVLayerLoc * preLoc = preSynapticLayer()->getLayerLoc(); const PVLayerLoc * postLoc = postSynapticLayer()->getLayerLoc(); assert(arborID >= 0); const int numExtended = activity->numItems; //Grab postIdxLayer's data int* postIdxData = NULL; if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){ PoolingIndexLayer* postIndexLayer = originalConn->getPostIndexLayer(); assert(postIndexLayer); //Make sure this layer is an integer layer assert(postIndexLayer->getDataType() == PV_INT); DataStore * store = parent->icCommunicator()->publisherStore(postIndexLayer->getLayerId()); int delay = getDelay(arborID); //TODO this is currently a hack, need to properly implement data types. postIdxData = (int*) store->buffer(LOCAL, delay); } for(int b = 0; b < parent->getNBatch(); b++){ pvdata_t * activityBatch = activity->data + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf; pvdata_t * gSynPatchHeadBatch = post->getChannel(getChannel()) + b * postLoc->nx * postLoc->ny * postLoc->nf; int * postIdxDataBatch = NULL; if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){ postIdxDataBatch = postIdxData + b * originalConn->getPostIndexLayer()->getNumExtended(); } unsigned int * activeIndicesBatch = NULL; if(activity->isSparse){ activeIndicesBatch = activity->activeIndices + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf; } int numLoop; if(activity->isSparse){ numLoop = activity->numActive[b]; } else{ numLoop = numExtended; } #ifdef PV_USE_OPENMP_THREADS //Clear all thread gsyn buffer if(thread_gSyn){ int numNeurons = post->getNumNeurons(); #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int i = 0; i < parent->getNumThreads() * numNeurons; i++){ int ti = i/numNeurons; int ni = i % numNeurons; thread_gSyn[ti][ni] = 0; } } #endif // PV_USE_OPENMP_THREADS #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for schedule(static) #endif for (int loopIndex = 0; loopIndex < numLoop; loopIndex++) { int kPreExt; if(activity->isSparse){ kPreExt = activeIndicesBatch[loopIndex]; } else{ kPreExt = loopIndex; } float a = activityBatch[kPreExt]; if (a == 0.0f) continue; //If we're using thread_gSyn, set this here pvdata_t * gSynPatchHead; #ifdef PV_USE_OPENMP_THREADS if(thread_gSyn){ int ti = omp_get_thread_num(); gSynPatchHead = thread_gSyn[ti]; } else{ gSynPatchHead = gSynPatchHeadBatch; } #else // PV_USE_OPENMP_THREADS gSynPatchHead = gSynPatchHeadBatch; #endif // PV_USE_OPENMP_THREADS const int kxPreExt = kxPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kyPreExt = kyPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kfPre = featureIndex(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){ const int kxPreGlobalExt = kxPreExt + preLoc->kx0; const int kyPreGlobalExt = kyPreExt + preLoc->ky0; if(kxPreGlobalExt < preLoc->halo.lt || kxPreGlobalExt >= preLoc->nxGlobal + preLoc->halo.lt || kyPreGlobalExt < preLoc->halo.up || kyPreGlobalExt >= preLoc->nyGlobal + preLoc->halo.up){ continue; } //Convert stored global extended index into local extended index int postGlobalExtIdx = postIdxDataBatch[kPreExt]; // If all inputs are zero and input layer is sparse, postGlobalExtIdx will still be -1. if(postGlobalExtIdx == -1) { continue; } //Make sure the index is in bounds assert(postGlobalExtIdx >= 0 && postGlobalExtIdx < (postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt) * (postLoc->nyGlobal + postLoc->halo.up + postLoc->halo.dn) * postLoc->nf); const int kxPostGlobalExt = kxPos(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf); const int kyPostGlobalExt = kyPos(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf); const int kfPost = featureIndex(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf); const int kxPostLocalRes = kxPostGlobalExt - postLoc->kx0 - postLoc->halo.lt; const int kyPostLocalRes = kyPostGlobalExt - postLoc->ky0 - postLoc->halo.up; if(kxPostLocalRes < 0 || kxPostLocalRes >= postLoc->nx|| kyPostLocalRes < 0 || kyPostLocalRes >= postLoc->ny){ continue; } const int kPostLocalRes = kIndex(kxPostLocalRes, kyPostLocalRes, kfPost, postLoc->nx, postLoc->ny, postLoc->nf); gSynPatchHeadBatch[kPostLocalRes] = a; } else{ PVPatch * weights = getWeights(kPreExt, arborID); const int nk = weights->nx * fPatchSize(); const int ny = weights->ny; pvgsyndata_t * postPatchStart = gSynPatchHead + getGSynPatchStart(kPreExt, arborID); const int sy = getPostNonextStrides()->sy; // stride in layer int offset = kfPre; int sf = fPatchSize(); pvwdata_t w = 1.0; if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING){ float relative_XScale = pow(2, (post->getXScale() - pre->getXScale())); float relative_YScale = pow(2, (post->getYScale() - pre->getYScale())); w = 1.0/(nxp*nyp*relative_XScale*relative_YScale); } void* auxPtr = NULL; for (int y = 0; y < ny; y++) { (accumulateFunctionPointer)(0, nk, postPatchStart + y*sy + offset, a, &w, auxPtr, sf); } } } #ifdef PV_USE_OPENMP_THREADS //Set back into gSyn if(thread_gSyn){ pvdata_t * gSynPatchHead = gSynPatchHeadBatch; int numNeurons = post->getNumNeurons(); //Looping over neurons first to be thread safe #pragma omp parallel for for(int ni = 0; ni < numNeurons; ni++){ for(int ti = 0; ti < parent->getNumThreads(); ti++){ if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){ if(gSynPatchHead[ni] < fabs(thread_gSyn[ti][ni])){ gSynPatchHead[ni] = thread_gSyn[ti][ni]; } } else{ gSynPatchHead[ni] += thread_gSyn[ti][ni]; } } } } #endif } return PV_SUCCESS; }