예제 #1
0
  int LCAConn::update_dW(int axonId)
  { // compute dW but don't add them to the weights yet.
    // That takes place in reduceKernels, so that the output is
    // independent of the number of processors.
    int nExt = preSynapticLayer()->getNumExtended();
    int numKernelIndices = getNumDataPatches();
    const pvdata_t * preactbuf = preSynapticLayer()->getLayerData(getDelay(axonId));
    const pvdata_t * postactbuf = postSynapticLayer()->getLayerData(getDelay(axonId));

    int sya = (post->getLayerLoc()->nf * (post->getLayerLoc()->nx + 2*post->getLayerLoc()->nb));
    
    for(int kExt=0; kExt<nExt;kExt++) {
      PVPatch * weights = getWeights(kExt,axonId);
      size_t offset = getAPostOffset(kExt, axonId);
      pvdata_t preact = preactbuf[kExt];
      int ny = weights->ny;
      int nk = weights->nx * nfp;
      const pvdata_t * postactRef = &postactbuf[offset];
      pvdata_t * dwdata = get_dwData(axonId, kExt);
      int lineoffsetw = 0;
      int lineoffseta = 0;
      for( int y=0; y<ny; y++ ) {
	for( int k=0; k<nk; k++ ) {
	  dwdata[lineoffsetw + k] += updateRule_dW(preact, postactRef[lineoffseta+k],lineoffseta+k);
	}
	lineoffsetw += syp;
	lineoffseta += sya;
      }
    }
    
    // Divide by (numNeurons/numKernels)
    int divisor = pre->getNumNeurons()/numKernelIndices;
    assert( divisor*numKernelIndices == pre->getNumNeurons() );
    for( int kernelindex=0; kernelindex<numKernelIndices; kernelindex++ ) {
      int numpatchitems = nxp*nyp*nfp;
      pvdata_t * dwpatchdata = get_dwDataHead(axonId,kernelindex);
      for( int n=0; n<numpatchitems; n++ ) {
	dwpatchdata[n] /= divisor;
      }
    }

    lastUpdateTime = parent->simulationTime();

    return PV_SUCCESS;    
  }
예제 #2
0
int MomentumConn::applyMomentum(int arbor_ID){
   int nExt = preSynapticLayer()->getNumExtended();
   const PVLayerLoc * loc = preSynapticLayer()->getLayerLoc();
   if(sharedWeights){
      int numKernels = getNumDataPatches();
      //Shared weights done in parallel, parallel in numkernels
#ifdef PV_USE_OPENMP_THREADS
#pragma omp parallel for
#endif
      for(int kernelIdx = 0; kernelIdx < numKernels; kernelIdx++){
         pvwdata_t * dwdata_start  = get_dwDataHead(arbor_ID, kernelIdx);
         pvwdata_t * prev_dw_start = get_prev_dwDataHead(arbor_ID, kernelIdx);
         pvwdata_t * wdata_start   = get_wDataHead(arbor_ID, kernelIdx);
         if(!strcmp(momentumMethod, "simple")){
            for(int k = 0; k < nxp*nyp*nfp; k++){
               dwdata_start[k] += momentumTau * prev_dw_start[k] - momentumDecay*wdata_start[k];
            }
         }
         else if(!strcmp(momentumMethod, "viscosity")){
            for(int k = 0; k < nxp*nyp*nfp; k++){
               //dwdata_start[k] = momentumTau * (prev_dw_start[k] + dwdata_start[k]) * (1 - exp(-1.0/ momentumTau)) - momentumDecay*wdata_start[k];
               dwdata_start[k] = (prev_dw_start[k] * exp(-1.0/ momentumTau)) + dwdata_start[k] - momentumDecay*wdata_start[k];
            }
         }
         else if(!strcmp(momentumMethod, "alex")){
            for(int k = 0; k < nxp*nyp*nfp; k++){
               //weight_inc[i] := momW * weight_inc[i-1] - wc * epsW * weights[i-1] + epsW * weight_grads[i]
               //   weights[i] := weights[i-1] + weight_inc[i]
               dwdata_start[k] = momentumTau * prev_dw_start[k] - momentumDecay * getDWMax()* wdata_start[k] + dwdata_start[k];
            }
         }
      }
   }
   else{
      std::cout << "Warning: Momentum not implemented for non-shared weights, not implementing momentum\n";
   }
   return PV_SUCCESS;
}
예제 #3
0
int PoolingConn::deliverPostsynapticPerspective(PVLayerCube const * activity, int arborID) {
    //Check channel number for noupdate
    if(getChannel() == CHANNEL_NOUPDATE) {
        return PV_SUCCESS;
    }
    assert(post->getChannel(getChannel()));

    assert(arborID >= 0);
    //Get number of neurons restricted target
    const int numPostRestricted = post->getNumNeurons();

    float dt_factor = getConvertToRateDeltaTimeFactor();

    const PVLayerLoc * sourceLoc = preSynapticLayer()->getLayerLoc();
    const PVLayerLoc * targetLoc = post->getLayerLoc();

    const int sourceNx = sourceLoc->nx;
    const int sourceNy = sourceLoc->ny;
    const int sourceNf = sourceLoc->nf;
    const int targetNx = targetLoc->nx;
    const int targetNy = targetLoc->ny;
    const int targetNf = targetLoc->nf;

    const PVHalo * sourceHalo = &sourceLoc->halo;
    const PVHalo * targetHalo = &targetLoc->halo;

    //get source layer's extended y stride
    int sy  = (sourceNx+sourceHalo->lt+sourceHalo->rt)*sourceNf;

    //The start of the gsyn buffer
    pvdata_t * gSynPatchHead = post->getChannel(this->getChannel());

    clearGateIdxBuffer();
    int* gatePatchHead = NULL;
    if(needPostIndexLayer) {
        gatePatchHead = postIndexLayer->getChannel(CHANNEL_EXC);
    }


    long * startSourceExtBuf = getPostToPreActivity();
    if(!startSourceExtBuf) {
        std::cout << "HyPerLayer::recvFromPost error getting preToPostActivity from connection. Is shrink_patches on?\n";
        exit(EXIT_FAILURE);
    }

    float resetVal = 0;
    if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) {
        resetVal = -INFINITY;
    }


    for(int b = 0; b < parent->getNBatch(); b++) {
#ifdef PV_USE_OPENMP_THREADS
        #pragma omp parallel for
#endif
        for (int kTargetRes = 0; kTargetRes < numPostRestricted; kTargetRes++) {
            pvdata_t * activityBatch = activity->data + b * (sourceNx + sourceHalo->rt + sourceHalo->lt) * (sourceNy + sourceHalo->up + sourceHalo->dn) * sourceNf;
            pvdata_t * gSynPatchHeadBatch = gSynPatchHead + b * targetNx * targetNy * targetNf;

            //Change restricted to extended post neuron
            int kTargetExt = kIndexExtended(kTargetRes, targetNx, targetNy, targetNf, targetHalo->lt, targetHalo->rt, targetHalo->dn, targetHalo->up);

            //Read from buffer
            long startSourceExt = startSourceExtBuf[kTargetRes];

            //Calculate target's start of gsyn
            pvdata_t * gSynPatchPos = gSynPatchHeadBatch + kTargetRes;
            //Initialize patch as a huge negative number
            *gSynPatchPos = resetVal;

            int* gatePatchPos = NULL;
            if(needPostIndexLayer) {
                gatePatchPos = gatePatchHead + b * postIndexLayer->getNumNeurons() + kTargetRes;
                //Initialize gatePatchPos as a negative number
                *gatePatchPos = -1;
            }

            float* activityStartBuf = &(activityBatch[startSourceExt]);

            pvwdata_t * weightY = NULL; //No weights in pooling
            int sf = postConn->fPatchSize();
            int yPatchSize = postConn->yPatchSize();
            int numPerStride = postConn->xPatchSize() * postConn->fPatchSize();

            const PVLayerLoc * postLoc = post->getLayerLoc();
            const int kfPost = featureIndex(kTargetExt, postLoc->nx + postLoc->halo.lt + postLoc->halo.rt, postLoc->ny + postLoc->halo.dn + postLoc->halo.up, postLoc->nf);
            int offset = kfPost;

            pvwdata_t w = 1.0;
            if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING) {
                float relative_XScale = pow(2, (post->getXScale() - pre->getXScale()));
                float relative_YScale = pow(2, (post->getYScale() - pre->getYScale()));
                w = 1.0/(nxp*nyp*relative_XScale*relative_YScale);
            }

            for (int ky = 0; ky < yPatchSize; ky++) {
                int kPreExt = startSourceExt + ky*sy+offset;
                const int kxPreExt = kxPos(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf);
                const int kyPreExt = kyPos(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf);
                const int kfPre = featureIndex(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf);
                const int kxPreGlobalExt = kxPreExt + sourceLoc->kx0;
                const int kyPreGlobalExt = kyPreExt + sourceLoc->ky0;
                const int kPreGlobalExt = kIndex(kxPreGlobalExt, kyPreGlobalExt, kfPre, sourceLoc->nxGlobal + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->nyGlobal + sourceLoc->halo.up + sourceLoc->halo.dn, sourceLoc->nf);

                float * activityY = &(activityStartBuf[ky*sy+offset]);

                (accumulateFunctionFromPostPointer)(kPreGlobalExt, numPerStride, gSynPatchPos, activityY, &w, dt_factor, gatePatchPos, sf);
            }
        }
    }
    return PV_SUCCESS;
}
예제 #4
0
int PoolingConn::deliverPresynapticPerspective(PVLayerCube const * activity, int arborID) {

    //Check if we need to update based on connection's channel
    if(getChannel() == CHANNEL_NOUPDATE) {
        return PV_SUCCESS;
    }
    assert(post->getChannel(getChannel()));

    float dt_factor;
    if (getPvpatchAccumulateType()==ACCUMULATE_STOCHASTIC) {
        dt_factor = getParent()->getDeltaTime();
    }
    else {
        dt_factor = getConvertToRateDeltaTimeFactor();
    }

    const PVLayerLoc * preLoc = preSynapticLayer()->getLayerLoc();
    const PVLayerLoc * postLoc = postSynapticLayer()->getLayerLoc();

    assert(arborID >= 0);
    const int numExtended = activity->numItems;

    float resetVal = 0;
    if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) {
        resetVal = -INFINITY;
        float* gSyn = post->getChannel(getChannel());
        //gSyn is res
#ifdef PV_USE_OPENMP_THREADS
        #pragma omp parallel for
#endif
        for(int i = 0; i < post->getNumNeuronsAllBatches(); i++) {
            gSyn[i] = resetVal;
        }

    }


    clearGateIdxBuffer();

    for(int b = 0; b < parent->getNBatch(); b++) {
        pvdata_t * activityBatch = activity->data + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf;
        pvdata_t * gSynPatchHeadBatch = post->getChannel(getChannel()) + b * postLoc->nx * postLoc->ny * postLoc->nf;
        int* gatePatchHeadBatch = NULL;
        if(needPostIndexLayer) {
            gatePatchHeadBatch = postIndexLayer->getChannel(CHANNEL_EXC) + b * postIndexLayer->getNumNeurons();
        }

        unsigned int * activeIndicesBatch = NULL;
        if(activity->isSparse) {
            activeIndicesBatch = activity->activeIndices + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf;
        }
        int numLoop;
        if(activity->isSparse) {
            numLoop = activity->numActive[b];
        }
        else {
            numLoop = numExtended;
        }

        if(thread_gateIdxBuffer) {
#ifdef PV_USE_OPENMP_THREADS
            #pragma omp parallel for
#endif
            for(int i = 0; i < parent->getNumThreads() * post->getNumNeurons(); i++) {
                int ti = i/post->getNumNeurons();
                int ni = i % post->getNumNeurons();
                thread_gateIdxBuffer[ti][ni] = -1;
            }
        }

#ifdef PV_USE_OPENMP_THREADS
        //Clear all gsyn buffers
        if(thread_gSyn) {
            int numNeurons = post->getNumNeurons();
#ifdef PV_USE_OPENMP_THREADS
            #pragma omp parallel for
#endif
            for(int i = 0; i < parent->getNumThreads() * numNeurons; i++) {
                int ti = i/numNeurons;
                int ni = i % numNeurons;
                thread_gSyn[ti][ni] = resetVal;
            }
        }
#endif // PV_USE_OPENMP_THREADS

#ifdef PV_USE_OPENMP_THREADS
        #pragma omp parallel for schedule(static)
#endif
        for (int loopIndex = 0; loopIndex < numLoop; loopIndex++) {
            int kPreExt;
            if(activity->isSparse) {
                kPreExt = activeIndicesBatch[loopIndex];
            }
            else {
                kPreExt = loopIndex;
            }

            float a = activityBatch[kPreExt] * dt_factor;
            //if (a == 0.0f) continue;

            //If we're using thread_gSyn, set this here
            pvdata_t * gSynPatchHead;
            //float * gatePatchHead = NULL;
            int * gatePatchHead = NULL;
#ifdef PV_USE_OPENMP_THREADS
            if(thread_gSyn) {
                int ti = omp_get_thread_num();
                gSynPatchHead = thread_gSyn[ti];
            }
            else {
                gSynPatchHead = gSynPatchHeadBatch;
            }

            if(needPostIndexLayer) {
                if(thread_gateIdxBuffer) {
                    int ti = omp_get_thread_num();
                    gatePatchHead = thread_gateIdxBuffer[ti];
                }
                else {
                    gatePatchHead = gatePatchHeadBatch;
                }
            }
#else // PV_USE_OPENMP_THREADS
            gSynPatchHead = gSynPatchHeadBatch;
            if(needPostIndexLayer) {
                gatePatchHead = gatePatchHeadBatch;
            }
#endif // PV_USE_OPENMP_THREADS
            //deliverOnePreNeuronActivity(kPreExt, arborID, a, gSynPatchHead, gatePatchHead);

            PVPatch * weights = getWeights(kPreExt, arborID);
            const int nk = weights->nx * fPatchSize();
            const int ny = weights->ny;
            const int sy  = getPostNonextStrides()->sy;       // stride in layer
            pvwdata_t * weightDataStart = NULL;
            pvgsyndata_t * postPatchStart = gSynPatchHead + getGSynPatchStart(kPreExt, arborID);
            int* postGatePatchStart = gatePatchHead + getGSynPatchStart(kPreExt, arborID);
            //float* postGatePatchStart = gatePatchHead + getGSynPatchStart(kPreExt, arborID);

            const int kxPreExt = kxPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf);
            const int kyPreExt = kyPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf);
            const int kfPre = featureIndex(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf);

            const int kxPreGlobalExt = kxPreExt + preLoc->kx0;
            const int kyPreGlobalExt = kyPreExt + preLoc->ky0;

            const int kPreGlobalExt = kIndex(kxPreGlobalExt, kyPreGlobalExt, kfPre, preLoc->nxGlobal + preLoc->halo.lt + preLoc->halo.rt, preLoc->nyGlobal + preLoc->halo.up + preLoc->halo.dn, preLoc->nf);

            int offset = kfPre;
            int sf = fPatchSize();
            pvwdata_t w = 1.0;
            if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING) {
                float relative_XScale = pow(2, (post->getXScale() - pre->getXScale()));
                float relative_YScale = pow(2, (post->getYScale() - pre->getYScale()));
                w = 1.0/(nxp*nyp*relative_XScale*relative_YScale);
            }
            void* auxPtr = NULL;
            for (int y = 0; y < ny; y++) {
                if(needPostIndexLayer) {
                    auxPtr = (postGatePatchStart+ y*sy + offset);
                }
                (accumulateFunctionPointer)(kPreGlobalExt, nk, postPatchStart + y*sy + offset, a, &w, auxPtr, sf);
            }
        }
#ifdef PV_USE_OPENMP_THREADS
        //Accumulate back into gSyn // Should this be done in HyPerLayer where it can be done once, as opposed to once per connection?
        if(thread_gSyn) {
            pvdata_t * gSynPatchHead = gSynPatchHeadBatch;
            //float* gateIdxBuffer = postIndexLayer->getChannel(CHANNEL_EXC);
            int * gateIdxBuffer = NULL;
            if(needPostIndexLayer && thread_gateIdxBuffer) {
                gateIdxBuffer = gatePatchHeadBatch;
            }
            int numNeurons = post->getNumNeurons();
            //Looping over neurons first to be thread safe
            #pragma omp parallel for
            for(int ni = 0; ni < numNeurons; ni++) {
                //Different for maxpooling
                if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) {
                    for(int ti = 0; ti < parent->getNumThreads(); ti++) {
                        if(gSynPatchHead[ni] < thread_gSyn[ti][ni]) {
                            gSynPatchHead[ni] = thread_gSyn[ti][ni];
                            if(needPostIndexLayer && thread_gateIdxBuffer) {
                                gateIdxBuffer[ni] = thread_gateIdxBuffer[ti][ni];
                                assert(gateIdxBuffer >= 0);
                            }
                        }
                    }
                }
                else {
                    for(int ti = 0; ti < parent->getNumThreads(); ti++) {
                        gSynPatchHead[ni] += thread_gSyn[ti][ni];
                    }
                }
            }
        }
#endif
    }
    if(activity->isSparse) {
        pvdata_t * gSyn = post->getChannel(getChannel());
        for (int k=0; k<post->getNumNeuronsAllBatches(); k++) {
            if (gSyn[k]==-INFINITY) {
                gSyn[k] = 0.0f;
            }
        }
    }
    return PV_SUCCESS;
}
int LCALIFLateralKernelConn::update_dW(int axonId) {
   if (parent->simulationTime() < dWUpdateTime) {
      return PV_SUCCESS;
   }
   dWUpdateTime += dWUpdatePeriod;
   int nExt = preSynapticLayer()->getNumExtended();
   int numKernelIndices = getNumDataPatches();
   updateIntegratedSpikeCount();
   float target_rate_sq = getTargetRateKHz()*getTargetRateKHz();
   const pvdata_t * preactbuf = integratedSpikeCount;
   const pvdata_t * postactbuf = integratedSpikeCount;

   int sya = (post->getLayerLoc()->nf * (post->getLayerLoc()->nx + post->getLayerLoc()->halo.lt + post->getLayerLoc()->halo.rt));

   const PVLayerLoc * preloc = pre->getLayerLoc();
   int nxpre = preloc->nx;
   int nypre = preloc->ny;
   int nfpre = preloc->nf;
   int nxglob = preloc->nxGlobal;
   int nyglob = preloc->nyGlobal;
   int kx0 = preloc->kx0;
   int ky0 = preloc->ky0;
   for(int kExt=0; kExt<nExt;kExt++) {
      int xglob = kxPos(kExt, nxpre + preloc->halo.lt + preloc->halo.rt, nypre + preloc->halo.dn + preloc->halo.up, nfpre) + kx0 - preloc->halo.lt;
      int yglob = kyPos(kExt, nxpre + preloc->halo.lt + preloc->halo.rt, nypre + preloc->halo.dn + preloc->halo.up, nfpre) + ky0 - preloc->halo.dn;
      if (xglob < 0 || xglob >= nxglob || yglob < 0 || yglob >= nyglob) {
         continue;
      }
      PVPatch * weights = getWeights(kExt,axonId);
      size_t offset = getAPostOffset(kExt, axonId);
      pvdata_t preactrate = preactbuf[kExt]/integrationTimeConstant;
      int ny = weights->ny;
      int nk = weights->nx * nfp;
      pvwdata_t * dwdata = get_dwData(axonId, kExt);
      int lineoffsetw = 0;
      int lineoffseta = 0;
      for( int y=0; y<ny; y++ ) {
         for( int k=0; k<nk; k++ ) {
            int postactindex = offset+lineoffseta+k;
            if (postactindex != kExt) { // Neurons don't inhibit themselves
               pvdata_t postactrate = postactbuf[postactindex]/integrationTimeConstant;
               pvdata_t dw = preactrate*postactrate-target_rate_sq;
               dwdata[lineoffsetw + k] += dw;
            }
         }
         lineoffsetw += syp;
         lineoffseta += sya;
      }
   }
   // Divide each dw by the number of correlations that contributed to that dw (divisorptr was summed over all MPI processes in initialization).
   // Also divide by target_rate_sq to normalize to a dimensionless quantity.
   // The nonlinear filter and the multiplication by dt/tauINH takes place in updateWeights, because the filter has to be applied after reduceKernels
   // and the multiplication by dt/tauINH needs to take place after the filter.
   int patch_size = nxp*nyp*nfp;
   for( int kernelindex=0; kernelindex<numKernelIndices; kernelindex++ ) {
      pvwdata_t * dwpatchdata = get_dwDataHead(axonId,kernelindex);
      float * divisorptr = &interiorCounts[axonId][kernelindex*patch_size];
      for( int n=0; n<patch_size; n++ ) {
         assert(divisorptr[n]>0 || dwpatchdata[n]==0);
         if (divisorptr[n]>0) dwpatchdata[n] /= target_rate_sq * divisorptr[n];
      }
   }

   lastUpdateTime = parent->simulationTime();

   return PV_SUCCESS;
}
예제 #6
0
int TransposePoolingConn::deliverPresynapticPerspective(PVLayerCube const * activity, int arborID) {
   //Check if we need to update based on connection's channel
   if(getChannel() == CHANNEL_NOUPDATE){
      return PV_SUCCESS;
   }
   assert(post->getChannel(getChannel()));

   const PVLayerLoc * preLoc = preSynapticLayer()->getLayerLoc();
   const PVLayerLoc * postLoc = postSynapticLayer()->getLayerLoc();

   assert(arborID >= 0);
   const int numExtended = activity->numItems;

   //Grab postIdxLayer's data
   int* postIdxData = NULL;
   if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){
      PoolingIndexLayer* postIndexLayer = originalConn->getPostIndexLayer();
      assert(postIndexLayer);
      //Make sure this layer is an integer layer
      assert(postIndexLayer->getDataType() == PV_INT);
      DataStore * store = parent->icCommunicator()->publisherStore(postIndexLayer->getLayerId());
      int delay = getDelay(arborID);

      //TODO this is currently a hack, need to properly implement data types.
      postIdxData = (int*) store->buffer(LOCAL, delay);
   }

   for(int b = 0; b < parent->getNBatch(); b++){
      pvdata_t * activityBatch = activity->data + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf;
      pvdata_t * gSynPatchHeadBatch = post->getChannel(getChannel()) + b * postLoc->nx * postLoc->ny * postLoc->nf;
      int * postIdxDataBatch = NULL;
      if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){
         postIdxDataBatch = postIdxData + b * originalConn->getPostIndexLayer()->getNumExtended();
      }

      unsigned int * activeIndicesBatch = NULL;
      if(activity->isSparse){
         activeIndicesBatch = activity->activeIndices + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf;
      }

      int numLoop;
      if(activity->isSparse){
         numLoop = activity->numActive[b];
      }
      else{
         numLoop = numExtended;
      }

#ifdef PV_USE_OPENMP_THREADS
      //Clear all thread gsyn buffer
      if(thread_gSyn){
         int numNeurons = post->getNumNeurons();
#ifdef PV_USE_OPENMP_THREADS
#pragma omp parallel for
#endif
         for(int i = 0; i < parent->getNumThreads() * numNeurons; i++){
            int ti = i/numNeurons;
            int ni = i % numNeurons;
            thread_gSyn[ti][ni] = 0;
         }
      }
#endif // PV_USE_OPENMP_THREADS


#ifdef PV_USE_OPENMP_THREADS
#pragma omp parallel for schedule(static)
#endif
      for (int loopIndex = 0; loopIndex < numLoop; loopIndex++) {
         int kPreExt;
         if(activity->isSparse){
            kPreExt = activeIndicesBatch[loopIndex];
         }
         else{
            kPreExt = loopIndex;
         }

         float a = activityBatch[kPreExt];
         if (a == 0.0f) continue;

         //If we're using thread_gSyn, set this here
         pvdata_t * gSynPatchHead;
#ifdef PV_USE_OPENMP_THREADS
         if(thread_gSyn){
            int ti = omp_get_thread_num();
            gSynPatchHead = thread_gSyn[ti];
         }
         else{
            gSynPatchHead = gSynPatchHeadBatch;
         }
#else // PV_USE_OPENMP_THREADS
         gSynPatchHead = gSynPatchHeadBatch;
#endif // PV_USE_OPENMP_THREADS

         const int kxPreExt = kxPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf);
         const int kyPreExt = kyPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf);
         const int kfPre = featureIndex(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf);

         if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){
            const int kxPreGlobalExt = kxPreExt + preLoc->kx0;
            const int kyPreGlobalExt = kyPreExt + preLoc->ky0;
            if(kxPreGlobalExt < preLoc->halo.lt || kxPreGlobalExt >= preLoc->nxGlobal + preLoc->halo.lt ||
               kyPreGlobalExt < preLoc->halo.up || kyPreGlobalExt >= preLoc->nyGlobal + preLoc->halo.up){
               continue;
            }

            //Convert stored global extended index into local extended index
            int postGlobalExtIdx = postIdxDataBatch[kPreExt];

            // If all inputs are zero and input layer is sparse, postGlobalExtIdx will still be -1.
            if(postGlobalExtIdx == -1) { continue; }

            //Make sure the index is in bounds
            assert(postGlobalExtIdx >= 0 && postGlobalExtIdx <
                  (postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt) * 
                  (postLoc->nyGlobal + postLoc->halo.up + postLoc->halo.dn) * 
                  postLoc->nf);

            const int kxPostGlobalExt = kxPos(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf);
            const int kyPostGlobalExt = kyPos(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf);
            const int kfPost = featureIndex(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf);

            const int kxPostLocalRes = kxPostGlobalExt - postLoc->kx0 - postLoc->halo.lt;
            const int kyPostLocalRes = kyPostGlobalExt - postLoc->ky0 - postLoc->halo.up;
            if(kxPostLocalRes < 0 || kxPostLocalRes >= postLoc->nx|| 
               kyPostLocalRes < 0 || kyPostLocalRes >= postLoc->ny){
               continue;
            }

            const int kPostLocalRes = kIndex(kxPostLocalRes, kyPostLocalRes, kfPost, postLoc->nx, postLoc->ny, postLoc->nf);
            gSynPatchHeadBatch[kPostLocalRes] = a;
         }
         else{
            PVPatch * weights = getWeights(kPreExt, arborID);
            const int nk = weights->nx * fPatchSize();
            const int ny = weights->ny;
            pvgsyndata_t * postPatchStart = gSynPatchHead + getGSynPatchStart(kPreExt, arborID);
            const int sy  = getPostNonextStrides()->sy;       // stride in layer

            int offset = kfPre;
            int sf = fPatchSize();

            pvwdata_t w = 1.0;
            if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING){
              float relative_XScale = pow(2, (post->getXScale() - pre->getXScale()));
              float relative_YScale = pow(2, (post->getYScale() - pre->getYScale()));
              w = 1.0/(nxp*nyp*relative_XScale*relative_YScale);
            }
            void* auxPtr = NULL;
            for (int y = 0; y < ny; y++) {
               (accumulateFunctionPointer)(0, nk, postPatchStart + y*sy + offset, a, &w, auxPtr, sf);
            }
         }
      }

#ifdef PV_USE_OPENMP_THREADS
      //Set back into gSyn
      if(thread_gSyn){
         pvdata_t * gSynPatchHead = gSynPatchHeadBatch;
         int numNeurons = post->getNumNeurons();
         //Looping over neurons first to be thread safe
#pragma omp parallel for
         for(int ni = 0; ni < numNeurons; ni++){
            for(int ti = 0; ti < parent->getNumThreads(); ti++){
               if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){
                  if(gSynPatchHead[ni] < fabs(thread_gSyn[ti][ni])){
                     gSynPatchHead[ni] = thread_gSyn[ti][ni];
                  }
               }
               else{
                  gSynPatchHead[ni] += thread_gSyn[ti][ni];
               }
            }
         }
      }
#endif
   }
   return PV_SUCCESS;
}