示例#1
0
//
// A replacement for globalIndexFromLocal from conversions.h.
// WARNING - any changes in conversions.h should be reflected here.
static inline int globalIndexFromLocal_nompi(int kl, PVLayerLoc loc)
{
   int kxg = loc.kx0 + kxPos(kl, loc.nx, loc.ny, loc.nf);
   int kyg = loc.ky0 + kyPos(kl, loc.nx, loc.ny, loc.nf);
   int  kf = featureIndex(kl, loc.nx, loc.ny, loc.nf);
   return kIndex(kxg, kyg, kf, loc.nxGlobal, loc.nyGlobal, loc.nf);
}
示例#2
0
int BIDSSensorLayer::updateState(double timef, double dt){
   pvdata_t * output = getCLayer()->V;
   pvdata_t * input = blayer->getCLayer()->activity->data;
   int index;
   //Iterate through post layer
   for (int i = 0; i < nx * ny; i++){
      assert(nf == 1);
      //Iterate through features
//      std::cout << "Node (" << coords[i].xCoord << ", " << coords[i].yCoord << ")\n";
      for (int k = 0; k < nf; k++){
         int x = i % nx;
         int y = (int) floor(i/nx);
         index = kIndex(x, y, k, nx, ny, nf);
         data[i][buf_index] = input[index] - (neutral_val / 256);
//         std::cout << "\tBuf_index: " << buf_index << ": " << data[i][buf_index] << "\n";
         //Next buf index, or reset if at end
         float out = matchFilter(i, (int)(timef * dt));
         output[index] = out * weight;
      }
   }
   if(buf_index < buf_size - 1){
      buf_index++;
   }
   else{
      buf_index = 0;
   }
   HyPerLayer::setActivity();
   return PV_SUCCESS;
}
示例#3
0
int ImageTestLayer::updateStateWrapper(double time, double dt)
{
   Image::updateStateWrapper(time, dt);
   const PVLayerLoc * loc = getLayerLoc();
   int nx = loc->nx;
   int ny = loc->ny;
   int nf = loc->nf;
   int nbatch = loc->nbatch;
   for(int b = 0; b < nbatch; b++){
      pvdata_t * dataBatch = data + b * getNumExtended();
      for(int nkRes = 0; nkRes < getNumNeurons(); nkRes++){
         //Calculate extended index
         int nkExt = kIndexExtended(nkRes, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up);  
         //checkVal is the value from batch index 0
         pvdata_t checkVal = dataBatch[nkExt] * 255;

         int kxGlobal = kxPos(nkRes, nx, ny, nf) + loc->kx0;
         int kyGlobal = kyPos(nkRes, nx, ny, nf) + loc->ky0; 
         int kf = featureIndex(nkRes, nx, ny, nf);

         pvdata_t expectedVal = kIndex(kxGlobal, kyGlobal, kf, loc->nxGlobal, loc->nyGlobal, nf);
         if(fabs(checkVal - expectedVal) >= 1e-5){
            std::cout << "ImageFileIO test Expected: " << expectedVal << " Actual: " << checkVal << "\n";
            exit(-1);
         }
      }
   }
   return PV_SUCCESS;
}
示例#4
0
int KernelProbe::outputState(double timed) {
   InterColComm * icComm = parent->icCommunicator();
   const int rank = icComm->commRank();
   if( rank != 0 ) return PV_SUCCESS;
   assert(getTargetConn()!=NULL);
   int nxp = getTargetHyPerConn()->xPatchSize();
   int nyp = getTargetHyPerConn()->yPatchSize();
   int nfp = getTargetHyPerConn()->fPatchSize();
   int patchSize = nxp*nyp*nfp;

   const pvwdata_t * wdata = getTargetHyPerConn()->get_wDataStart(arborID)+patchSize*kernelIndex;
   const pvwdata_t * dwdata = outputPlasticIncr ?
         getTargetHyPerConn()->get_dwDataStart(arborID)+patchSize*kernelIndex : NULL;
   fprintf(outputstream->fp, "Time %f, Conn \"%s\", nxp=%d, nyp=%d, nfp=%d\n",
           timed, getTargetConn()->getName(),nxp, nyp, nfp);
   for(int f=0; f<nfp; f++) {
      for(int y=0; y<nyp; y++) {
         for(int x=0; x<nxp; x++) {
            int k = kIndex(x,y,f,nxp,nyp,nfp);
            fprintf(outputstream->fp, "    x=%d, y=%d, f=%d (index %d):", x, y, f, k);
            if(getOutputWeights()) {
               fprintf(outputstream->fp, "  weight=%f", (float)wdata[k]);
            }
            if(getOutputPlasticIncr()) {
               fprintf(outputstream->fp, "  dw=%f", (float)dwdata[k]);
            }
            fprintf(outputstream->fp,"\n");
         }
      }
   }

   return PV_SUCCESS;
}
示例#5
0
int StreamReconLayer::updateState(double timef, double dt) {
    
    update_timer->start();
    
    pvdata_t * V = getV();
    
    int nx = getLayerLoc()->nx;
    int ny = getLayerLoc()->ny;
    int nf = getLayerLoc()->nf;
    
    for (int i = 0; i < nx; i++) {
        int vx = i;
        int gx = i;
        for (int j = 0; j < nf; j ++) {
            int vf = j;
            int gf;
            if (vf + bufferLevel < nf) {
                gf = bufferLevel + j;
            }
            
            else {
                gf = bufferLevel + j - nf;
            }
            
            int vindex = kIndex(vx, 0, vf, nx, ny, nf);
            int gindex = kIndex(gx, 0, gf, nx, ny, nf);
            
            V[vindex] = GSyn[0][gindex];
        }
    }
    
    //Copy V to A buffer
    PV::HyPerLayer::setActivity();
    
    if (bufferLevel < nf - 1) {
        bufferLevel++; }
    else {
        bufferLevel = 0;
    }
    
    update_timer->stop();
    return PV_SUCCESS;
} // end update state
int dumponeweight(HyPerConn * conn) {
   int status = PV_SUCCESS;
   bool errorfound = false;
   int rank = conn->getParent()->icCommunicator()->commRank();
   int nxp = conn->xPatchSize();
   int nyp = conn->yPatchSize();
   int nfp = conn->fPatchSize();
   int xcenter = (nxp-1)/2;
   int ycenter = (nyp-1)/2;
   int nxpre = conn->preSynapticLayer()->getLayerLoc()->nxGlobal;
   int nypre = conn->preSynapticLayer()->getLayerLoc()->nyGlobal;
   bool usingMirrorBCs = conn->preSynapticLayer()->useMirrorBCs();

   // If xScaleDiff > 0, it's a many-to-one connection.
   int xScaleDiff = conn->postSynapticLayer()->getXScale() - conn->preSynapticLayer()->getXScale();
   float xFalloff = powf(2,xScaleDiff);
   int yScaleDiff = conn->postSynapticLayer()->getYScale() - conn->preSynapticLayer()->getYScale();
   float yFalloff = powf(2,yScaleDiff);

   for( int p=0; p<conn->getNumDataPatches(); p++ ) {
      pvwdata_t * wgtData = conn->get_wDataHead(0,p); // conn->getKernelPatch(0,p)->data;
      for( int f=0; f<nfp; f++ ) {
         for( int x=0; x<nxp; x++ ) {
            int xoffset = abs((int) floor((x-xcenter)*xFalloff));
            for( int y=0; y<nyp; y++ ) {
            int yoffset = abs((int) floor((y-ycenter)*yFalloff));
               int idx = kIndex(x, y, f, nxp, nyp, nfp);
               //TODO-CER-2014.4.4 - weight conversion
               pvdata_t wgt = wgtData[idx];
               //pvdata_t correct = usingMirrorBCs ? 1 : (nxpre-xoffset)*(nypre-yoffset)/((pvdata_t) (nxpre*nypre));
               //New normalization takes into account if pre is not active
               //The pixel value from the input is actually 127, where we divide it by 255.
               //Not exaclty .5, a little less
               //Squared because both pre and post is grabbing it's activity from the image
               pvdata_t correct = usingMirrorBCs ? pow(float(127)/float(255),2) : (float(127)/float(255)) * .5;
               if( fabs(wgt-correct)>1.0e-5 ) {
                  pvErrorNoExit(errorMessage);
                  if( errorfound == false ) {
                      errorfound = true;
                      for( int k=0; k<72; k++ ) { pvInfo().printf("="); }
                      errorMessage.printf("\n");
                      errorMessage.printf("Rank %d, Connection \"%s\":\n",rank, conn->getName());
                  }
                  errorMessage.printf("Rank %d, Patch %d, x=%d, y=%d, f=%d: weight=%f, correct=%f, off by a factor of %f\n", rank, p, x, y, f, wgt, correct, wgt/correct);
                  status = PV_FAILURE;
               }
            }
         }
      }
   }
   if( status == PV_SUCCESS ) {
      pvInfo().printf("Rank %d, connection \"%s\": Weights are correct.\n", rank, conn->getName());
   }
   return status;
}
示例#7
0
//Makes a layer such that the restricted space is the index, but with spinning order be [x, y, f] as opposed to [f, x, y]
int InputLayer::updateState(double timef, double dt){
   //Grab layer size
   const PVLayerLoc* loc = getLayerLoc();
   int nx = loc->nx;
   int ny = loc->ny;
   int nf = loc->nf;
   int nxGlobal = loc->nxGlobal;
   int nyGlobal = loc->nyGlobal;
   int kx0 = loc->kx0;
   int ky0 = loc->ky0;

   for(int b = 0; b < parent->getNBatch(); b++){
      pvdata_t * A = getActivity() + b * getNumExtended();
      //looping over ext
      for(int iY = 0; iY < ny+loc->halo.up+loc->halo.dn; iY++){
         for(int iX = 0; iX < nx+loc->halo.lt+loc->halo.rt; iX++){
            //Calculate x and y global extended
            int xGlobalExt = iX + loc->kx0;
            int yGlobalExt = iY + loc->ky0;
            //Calculate x and y in restricted space
            int xGlobalRes = xGlobalExt - loc->halo.lt;
            int yGlobalRes = yGlobalExt - loc->halo.up;
            //Calculate base value
            //xGlobal and yGlobalRes can be negative
            int baseActivityVal = yGlobalRes * nxGlobal + xGlobalRes;

            for(int iFeature = 0; iFeature < nf; iFeature++){
              int ext_idx = kIndex(iX, iY, iFeature, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf);
              //Feature gives an offset, since it spins slowest
              int activityVal = baseActivityVal + iFeature * nxGlobal * nyGlobal;
              A[ext_idx] = activityVal;
            }
         }
      }
   }

   ////Printing for double checking 
   //printf("\nOutMat\n");
   ////looping over ext
   //for(int iFeature = 0; iFeature < nf; iFeature++){
   //   for(int iY = 0; iY < ny+loc->halo.up+loc->halo.dn; iY++){
   //      for(int iX = 0; iX < nx+loc->halo.lt+loc->halo.rt; iX++){
   //         int ext_idx = kIndex(iX, iY, iFeature, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf);
   //         printf("%03d ", (int)A[ext_idx]);
   //      }
   //      printf("\n");
   //   }
   //   printf("\n\n");
   //}

   return PV_SUCCESS;
}
示例#8
0
int GatePoolTestLayer::updateState(double timef, double dt) {
    //Do update state of ANN Layer first
    ANNLayer::updateState(timef, dt);

    //Grab layer size
    const PVLayerLoc* loc = getLayerLoc();
    int nx = loc->nx;
    int ny = loc->ny;
    int nxGlobal = loc->nxGlobal;
    int nyGlobal = loc->nyGlobal;
    int nf = loc->nf;
    int kx0 = loc->kx0;
    int ky0 = loc->ky0;

    bool isCorrect = true;
    //Grab the activity layer of current layer
    for(int b = 0; b < loc->nbatch; b++) {
        const pvdata_t * A = getActivity() + b * getNumExtended();
        //We only care about restricted space, but iY and iX are extended
        for(int iY = loc->halo.up; iY < ny + loc->halo.up; iY++) {
            for(int iX = loc->halo.lt; iX < nx + loc->halo.lt; iX++) {
                for(int iFeature = 0; iFeature < nf; iFeature++) {
                    int ext_idx = kIndex(iX, iY, iFeature, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf);

                    float actualvalue = A[ext_idx];

                    int xval = (iX + kx0 - loc->halo.lt)/2;
                    int yval = (iY + ky0 - loc->halo.up)/2;
                    assert(xval >= 0 && xval < loc->nxGlobal);
                    assert(yval >= 0 && yval < loc->nxGlobal);

                    float expectedvalue;
                    expectedvalue = iFeature * 64 + yval * 16 + xval * 2 + 4.5;
                    expectedvalue*=4;

                    if(fabs(actualvalue - expectedvalue) >= 1e-4) {
                        pvErrorNoExit() << "Connection " << name << " Mismatch at (" << iX << "," << iY << ") : actual value: " << actualvalue << " Expected value: " << expectedvalue << ".  Discrepancy is a whopping " << actualvalue - expectedvalue << "!  Horrors!" << "\n";
                        isCorrect = false;
                    }
                }
            }
        }
    }
    if(!isCorrect) {
        InterColComm * icComm = parent->icCommunicator();
        MPI_Barrier(icComm->communicator()); // If there is an error, make sure that MPI doesn't kill the run before process 0 reports the error.
        exit(-1);
    }
    return PV_SUCCESS;
}
示例#9
0
int CPTestInputLayer::initializeV() {
   assert(parent->parameters()->value(name, "restart", 0.0f, false)==0.0f); // initializeV should only be called if restart is false
   const PVLayerLoc * loc = getLayerLoc();
   for (int b = 0; b < parent->getNBatch(); b++){
      pvdata_t * VBatch = getV() + b * getNumNeurons();
      for (int k = 0; k < getNumNeurons(); k++){
         int kx = kxPos(k,loc->nx,loc->nx,loc->nf);
         int ky = kyPos(k,loc->nx,loc->ny,loc->nf);
         int kf = featureIndex(k,loc->nx,loc->ny,loc->nf);
         int kGlobal = kIndex(loc->kx0+kx,loc->ky0+ky,kf,loc->nxGlobal,loc->nyGlobal,loc->nf);
         VBatch[k] = (pvdata_t) kGlobal;
      }
   }
   return PV_SUCCESS;
}
示例#10
0
int BIDSCloneLayer::mapCoords(){

   //Copy restricted clone data to current clayer data
   for(int i = 0; i < numNodes; i++){

   }

   const PVLayerLoc origLoc = originalLayer->getCLayer()->loc;

   for(int i = 0; i < numNodes; i++){
      int index = kIndex(coords[i].xCoord, coords[i].yCoord, 0, clayer->loc.nx, clayer->loc.ny, clayer->loc.nf);
      int destIndexEx = kIndexExtended(index, clayer->loc.nx, clayer->loc.ny, clayer->loc.nf, clayer->loc.halo.lt, clayer->loc.halo.rt, clayer->loc.halo.dn, clayer->loc.halo.up);
      int srcIndexEx = kIndexExtended(index, origLoc.nx, origLoc.ny, origLoc.nf, origLoc.halo.lt, origLoc.halo.rt, origLoc.halo.dn, origLoc.halo.up);

      this->clayer->activity->data[destIndexEx] = originalLayer->getCLayer()->activity->data[srcIndexEx] == 0 ? 0:1;
   }
   return PV_SUCCESS;
}
示例#11
0
int InputLayer::updateState(double timef, double dt){
   //Grab the activity layer of current layer
   pvdata_t * A = getActivity();
   //Grab layer size
   const PVLayerLoc* loc = getLayerLoc();

   int nx = loc->nx;
   int ny = loc->ny;
   int nf = loc->nf;
   int kx0 = loc->kx0;
   int ky0 = loc->ky0;

   assert(nf == 4);
   assert(loc->nxGlobal == 2 && loc->nyGlobal == 2);

   //We only care about restricted space
   for(int iY = loc->halo.up; iY < ny + loc->halo.up; iY++){
      for(int iX = loc->halo.lt; iX < nx + loc->halo.lt; iX++){
         for(int iF = 0; iF < loc->nf; iF++){
            int idx = kIndex(iX, iY, iF, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf);
            int xval = iX+kx0-loc->halo.lt;
            int yval = iY+ky0-loc->halo.up;

            if(timef == 10 && xval == 0 && yval == 0 && iF == 0){
               A[idx] = 1;
            }
            else if(timef == 10 && xval == 1 && yval == 0 && iF == 1){
               A[idx] = 1;
            }
            else if(timef == 10 && xval == 0 && yval == 1 && iF == 2){
               A[idx] = 1;
            }
            else if(timef == 10 && xval == 1 && yval == 1 && iF == 3){ 
               A[idx] = 1;
            }
            else{
               A[idx] = 0;
            }
         }
      }
   }

   return PV_SUCCESS;
}
示例#12
0
/**
 * @time
 * @l
 * @k
 * @kex
 * NOTES:
 *     - Only the activity buffer covers the extended frame - this is the frame that
 * includes boundaries.
 *     - The other dynamic variables (G_E, G_I, V, Vth) cover the "real" or "restricted"
 *     frame.
 */
int PointLIFProbe::writeState(double timed)
{
   if (parent->columnId()==0 && timed >= writeTime) {
      pvAssert(outputStream);
      writeTime += writeStep;
      PVLayerLoc const * loc = getTargetLayer()->getLayerLoc();
      const int k = kIndex(xLoc, yLoc, fLoc, loc->nxGlobal, loc->nyGlobal, loc->nf);
      double * valuesBuffer = getValuesBuffer();
      outputStream->printf("%s t=%.1f %d"
            "G_E=" CONDUCTANCE_PRINT_FORMAT
            " G_I=" CONDUCTANCE_PRINT_FORMAT
            " G_IB=" CONDUCTANCE_PRINT_FORMAT
            " V=" CONDUCTANCE_PRINT_FORMAT
            " Vth=" CONDUCTANCE_PRINT_FORMAT
            " a=%.1f",
            getMessage(), timed, k,
            valuesBuffer[0], valuesBuffer[1], valuesBuffer[2],
            valuesBuffer[3], valuesBuffer[4], valuesBuffer[5]);
      output() << std::endl;
   }
   return PV_SUCCESS;
}
示例#13
0
int PursuitLayer::updateState(double time, double dt) {
    if (!updateReady) return PV_SUCCESS;
    int nx = getLayerLoc()->nx;
    int ny = getLayerLoc()->ny;
    int nf = getLayerLoc()->nf;
    PVHalo const * halo = &getLayerLoc()->halo;
    pvdata_t * activity = getActivity();
    memset(activity, 0, getNumExtended()*sizeof(*activity));

    int nxy = nx*ny;
    for (int kxy=0; kxy<nxy; kxy++) {
        int kf = foundFeatures[kxy];
        if (kf>=0) {
            int kx = kxPos(kxy,nx,ny,1);
            int ky = kyPos(kxy,nx,ny,1);
            int kex = kIndex(kx+halo->lt, ky+halo->up, kf, nx+halo->lt+halo->rt, ny+halo->dn+halo->up, nf); /* Is this correct? Before splitting x- and y- margin widths, the ny argument was ny*nb, which seems weird. */
            activity[kex] = gSynSparse[kxy];
        }
    }
    //resetGSynBuffers_HyPerLayer(getNumNeurons(), getNumChannels(), GSyn[0]);
    updateReady = false;
    return PV_SUCCESS;
}
int OjaKernelSpikeRateProbe::allocateDataStructures() {
   targetOjaKernelConn = dynamic_cast<OjaKernelConn *>(getTargetConn());
   if (targetOjaKernelConn == NULL) {
      if (getParent()->columnId()==0) {
         fprintf(stderr, "LCATraceProbe error: connection \"%s\" must be an LCALIFLateralConn.\n", getTargetConn()->getName());
      }
      abort();
   }
   HyPerLayer * targetLayer = NULL;
   if (isInputRate) {
      targetLayer = targetOjaKernelConn->preSynapticLayer();
   }
   else {
      targetLayer = targetOjaKernelConn->postSynapticLayer();
   }
   const PVLayerLoc * loc = targetLayer->getLayerLoc();
   int x_local = xg - loc->kx0;
   int y_local = yg - loc->ky0;
   bool inBounds = (x_local >= 0 && x_local < loc->nx && y_local >= 0 && y_local < loc->ny);
   if(inBounds ) { // if inBounds
      int krestricted = kIndex(x_local, y_local, feature, loc->nx, loc->ny, loc->nf);
      if (isInputRate) {
         int kextended = kIndexExtended(krestricted, loc->nx, loc->ny, loc->nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up);
         spikeRate = &targetOjaKernelConn->getInputFiringRate(arbor)[kextended];
      }
      else {
         spikeRate = &targetOjaKernelConn->getOutputFiringRate()[krestricted];
      }
   }
   else {
      outputstream = NULL;
   }
   //This is now being done in BaseConnectionProbe
   //getTargetConn()->insertProbe(this);

   return PV_SUCCESS;
}
示例#15
0
int MoviePvpTestLayer::updateStateWrapper(double time, double dt)
{
   MoviePvp::updateStateWrapper(time, dt);
   const PVLayerLoc * loc = getLayerLoc();
   int nx = loc->nx;
   int ny = loc->ny;
   int nf = loc->nf;
   int nbatch = loc->nbatch;

   for(int b = 0; b < nbatch; b++){
      pvdata_t * dataBatch = data + b * getNumExtended();
      int frameIdx;
      if(strcmp(getBatchMethod(), "byImage") == 0){
         frameIdx = (time-1) * nbatch + b;
      }
      else if(strcmp(getBatchMethod(), "byMovie") == 0){
         frameIdx = b * 2 + (time-1);
      }
      for(int nkRes = 0; nkRes < getNumNeurons(); nkRes++){
         //Calculate extended index
         int nkExt = kIndexExtended(nkRes, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up);  
         //checkVal is the value from batch index 0
         pvdata_t checkVal = dataBatch[nkExt];

         int kxGlobal = kxPos(nkRes, nx, ny, nf) + loc->kx0;
         int kyGlobal = kyPos(nkRes, nx, ny, nf) + loc->ky0; 
         int kf = featureIndex(nkRes, nx, ny, nf);

         pvdata_t expectedVal = kIndex(kxGlobal, kyGlobal, kf, loc->nxGlobal, loc->nyGlobal, nf) + frameIdx*192;
         if(fabs(checkVal - expectedVal) >= 1e-5){
            std::cout << "ImageFileIO " << name << " test Expected: " << expectedVal << " Actual: " << checkVal << "\n";
            //exit(-1);
         }
      }
   }
   return PV_SUCCESS;
}
示例#16
0
/**
 * @timef
 * NOTES:
 *    - kPost, kxPost, kyPost are indices in the restricted post-synaptic layer.
 *
 */
int PostConnProbe::outputState(double timef)
{
   int k, kxPre, kyPre;
   HyPerConn * c = getTargetHyPerConn();
   PVPatch  * w;
   PVPatch *** wPost = c->convertPreSynapticWeights(timef);

   // TODO - WARNING: currently only works if nfPre==0

   const PVLayer * lPre = c->preSynapticLayer()->clayer;
   const PVLayer * lPost = c->postSynapticLayer()->clayer;

   const int nxPre = lPre->loc.nx;
   const int nyPre = lPre->loc.ny;
   const int nfPre = lPre->loc.nf;
   const PVHalo * haloPre = &lPre->loc.halo;

   const int nxPost = lPost->loc.nx;
   const int nyPost = lPost->loc.ny;
   const int nfPost = lPost->loc.nf;
   const PVHalo * haloPost = &lPost->loc.halo;

   // calc kPost if needed
   if (kPost < 0) {
      kPost = kIndex(kxPost, kyPost, kfPost, nxPost, nyPost, nfPost);
   }
   else {
      kxPost = kxPos(kPost, nxPost, nyPost, nfPost);
      kyPost = kyPos(kPost, nxPost, nyPost, nfPost);
      kfPost = featureIndex(kPost, nxPost, nyPost, nfPost);
   }

   c->preSynapticPatchHead(kxPost, kyPost, kfPost, &kxPre, &kyPre);

   const int kxPreEx = kxPre + haloPre->lt;
   const int kyPreEx = kyPre + haloPre->up;

   const int kxPostEx = kxPost + haloPost->lt;
   const int kyPostEx = kyPost + haloPost->up;
   const int kPostEx = kIndex(kxPostEx, kyPostEx, kfPost, nxPost+haloPost->lt+haloPost->rt, nyPost+haloPost->dn+haloPost->up, nfPost);

   const bool postFired = lPost->activity->data[kPostEx] > 0.0;

   w = wPost[getArborID()][kPost];
   pvwdata_t * wPostData = c->getWPostData(getArborID(),kPost);

   const int nw = w->nx * w->ny * nfPost; //w->nf;

   if (wPrev == NULL) {
      wPrev = (pvwdata_t *) calloc(nw, sizeof(pvwdata_t));
      for (k = 0; k < nw; k++) {
         wPrev[k] = wPostData[k]; // This is broken if the patch is shrunken
      }
   }
   if (wActiv == NULL) {
      wActiv = (pvwdata_t *) calloc(nw, sizeof(pvwdata_t));
   }

   k = 0;
   for (int ky = 0; ky < w->ny; ky++) {
      for (int kx = 0; kx < w->nx; kx++) {
         int kPre = kIndex(kx+kxPreEx, ky+kyPreEx, 0, nxPre+haloPre->lt+haloPre->rt, nyPre+haloPre->dn+haloPre->up, nfPre);
         wActiv[k++] = lPre->activity->data[kPre];
      }
   }

   bool changed = false;
   for (k = 0; k < nw; k++) {
      if (wPrev[k] != wPostData[k] || wActiv[k] != 0.0) {
         changed = true;
         break;
      }
   }
   FILE * fp = getStream()->fp;
   if (stdpVars && (postFired || changed)) {
      if (postFired) fprintf(fp, "*");
      else fprintf(fp, " ");
      fprintf(fp, "t=%.1f w%d(%d,%d,%d) prePatchHead(%d,%d): ", timef, kPost, kxPost,
            kyPost, kfPost, kxPre, kyPre);
      if (image) fprintf(fp, "tag==%d ", image->tag());
      fprintf(fp, "\n");
   }
   if (stdpVars && changed) {
      text_write_patch_extra(fp, w, wPostData, wPrev, wActiv, getTargetHyPerConn());
      fflush(fp);
   }

   for (k = 0; k < nw; k++) {
      wPrev[k] = wPostData[k];
   }

   if (outputIndices) {
      fprintf(fp, "w%d(%d,%d,%d) prePatchHead(%d,%d): ", kPost, kxPost, kyPost, kfPost, kxPre, kyPre);
      if(!stdpVars){
        fprintf(fp,"\n");
      }
      const PVLayer * lPre = c->preSynapticLayer()->clayer;
      write_patch_indices(fp, w, &lPre->loc, kxPre, kyPre, 0);
      fflush(fp);
   }

   return 0;
}
示例#17
0
int BinningLayer::doUpdateState(double timed, double dt, const PVLayerLoc * origLoc, const PVLayerLoc * currLoc, const pvdata_t * origData, pvdata_t * currA, float binMax, float binMin) {
   int status = PV_SUCCESS;
   //update_timer->start();
   int numBins = currLoc->nf;

   int nx = currLoc->nx;
   int ny = currLoc->ny;
   //Check that both nb are the same
   assert(origLoc->halo.lt == currLoc->halo.lt &&
          origLoc->halo.rt == currLoc->halo.rt &&
          origLoc->halo.dn == currLoc->halo.dn &&
          origLoc->halo.up == currLoc->halo.up);
   assert(origLoc->nf == 1);
   PVHalo const * halo = &origLoc->halo;
   float binRange = binMax - binMin;
   float stepSize = float(binRange)/numBins;
   int nbatch = currLoc->nbatch;


   
   for(int b = 0; b < nbatch; b++){
      const pvdata_t * origDataBatch = origData + b * (origLoc->nx + origLoc->halo.lt + origLoc->halo.rt) * (origLoc->ny + origLoc->halo.up + origLoc->halo.dn) * origLoc->nf;
      pvdata_t * currABatch = currA + b * (currLoc->nx + currLoc->halo.lt + currLoc->halo.rt) * (currLoc->ny + currLoc->halo.up + currLoc->halo.dn) * currLoc->nf;

      // each y value specifies a different target so ok to thread here (sum, sumsq are defined inside loop)
#ifdef PV_USE_OPENMP_THREADS
#pragma omp parallel for
#endif
      for (int iY = 0; iY < (ny+halo->dn+halo->up); iY++){
         for (int iX = 0; iX < (nx+halo->lt+halo->rt); iX++){
            int origIdx = kIndex(iX, iY, 0, nx+halo->lt+halo->rt, ny+halo->dn+halo->up, origLoc->nf);
            float inVal = origDataBatch[origIdx];
            //If inVal is out of bounds in either binMax or binMin, set the value to be the maximum or minimum val
            if(inVal < binMin){
               inVal = binMin;
            }
            if(inVal > binMax){
               inVal = binMax;
            }

            if(zeroDCR && inVal == 0){
               for(int iF = 0; iF < numBins; iF++){
                  int currIdx = kIndex(iX, iY, iF, nx+halo->lt+halo->rt, ny+halo->dn+halo->up, numBins);
                  currABatch[currIdx] = 0;
               }
            }
            else{
               //A sigma of zero means only the centered bin value should get input
               int featureIdx = round((inVal-binMin)/stepSize);

               for(int iF = 0; iF < numBins; iF++){
                  if(binSigma == 0){
                     int currIdx = kIndex(iX, iY, iF, nx+halo->lt+halo->rt, ny+halo->dn+halo->up, numBins);
                     if(iF == featureIdx){
                        currABatch[currIdx] = 1;
                     }
                     //Resetting value
                     else{
                        if(zeroNeg){
                           currABatch[currIdx] = 0;
                        }
                        else{
                           currABatch[currIdx] = -1;
                        }
                     }
                  }
                  else{
                     //Calculate center value for featureIdx (the bin that the value belongs to without a sigma) is binning
                     float mean;
                     if(normalDist){
                        mean = featureIdx * stepSize + (stepSize/2);
                     }
                     else{
                        mean = featureIdx;
                     }
                     //Possible bins
                     int intSigma = ceil(binSigma);
                     int currIdx = kIndex(iX, iY, iF, nx+halo->lt+halo->rt, ny+halo->dn+halo->up, numBins);
                     if(iF >= featureIdx-intSigma && iF <= featureIdx+intSigma){
                        //Get center of that aBin for the x pos of the normal dist
                        float xVal;
                        if(normalDist){
                           xVal = iF * stepSize + (stepSize/2);
                        }
                        else{
                           xVal = iF;
                        }
                        //Calculate normal dist
                        float outVal = calcNormDist(xVal, mean, binSigma);
                        //Put into activity buffer
                        currABatch[currIdx] = outVal;
                     }
                     //Resetting value
                     else{
                        if(zeroNeg){
                           currABatch[currIdx] = 0;
                        }
                        else{
                           currABatch[currIdx] = -1;
                        }
                     }
                  }
               }
            }
         }
      }
   }
   //update_timer->stop();
   return status;
}
示例#18
0
int PursuitLayer::recvSynapticInput(HyPerConn * conn, const PVLayerCube * activity, int arborID) {
    if (parent->simulationTime()<nextUpdate) return PV_SUCCESS;
    nextUpdate += updatePeriod;
    recvsyn_timer->start();

    assert(arborID >= 0);

    if (conn->usingSharedWeights() == false) {
        fprintf(stderr, "Error: PursuitLayer can only be the postsynaptic layer of a connection using shared weights (this condition should be removed eventually).\n");
        abort();
    }

    HyPerLayer * pre = conn->preSynapticLayer();
    const PVLayerLoc * pre_loc = pre->getLayerLoc();
    if (pre_loc->nx != getLayerLoc()->nx || pre_loc->ny != getLayerLoc()->ny) {
        fprintf(stderr, "Error: PursuitLayer requires incoming connections to be one-to-one.\n");
        abort();
    }


#ifdef DEBUG_OUTPUT
    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    //printf("[%d]: HyPerLayr::recvSyn: neighbor=%d num=%d actv=%p this=%p conn=%p\n", rank, neighbor, numExtended, activity, this, conn);
    printf("[%d]: HyPerLayr::recvSyn: neighbor=%d num=%d actv=%p this=%p conn=%p\n", rank, 0, numExtended, activity, this, conn);
    fflush(stdout);
#endif // DEBUG_OUTPUT


    const int numExtended = activity->numItems;
    for (int kPre = 0; kPre < numExtended; kPre++) {
        float a = activity->data[kPre];
        if (a == 0.0f) continue;

        PVPatch * weights = conn->getWeights(kPre, arborID);

        // WARNING - assumes every value in weights maps into a valid value in GSyn
        //         - assumes patch stride sf is 1

        int nk  = conn->fPatchSize() * weights->nx;
        int ny  = weights->ny;
        int sy  = conn->getPostNonextStrides()->sy; // stride in layer
        int syw = conn->yPatchStride();             // stride in patch
        pvdata_t * gSynPatchHead = this->getChannel(conn->getChannel());
        pvdata_t * gSynPatchStart = gSynPatchHead + conn->getGSynPatchStart(kPre, arborID);
        pvwdata_t * data = conn->get_wData(arborID,kPre);
        for (int y = 0; y < ny; y++) {
            (conn->accumulateFunctionPointer)(nk, gSynPatchStart + y*sy, a, data + y*syw, NULL);
        }
    }

    // Set |w(:,:,f)|^2.  Since this is a one-to-one connection with one presynaptic feature,
    // only have to do once for each feature of a single (x,y) site and then copy.
    int nxp = conn->xPatchSize();
    int nyp = conn->yPatchSize();
    int nfp = conn->fPatchSize();
    int num_weights = nxp*nyp*nfp;
    assert(zUnitCellSize(pre->getXScale(), getXScale())==1);
    assert(zUnitCellSize(pre->getYScale(), getYScale())==1);
    assert(conn->getNumDataPatches()==1);

    for (int kf=0; kf<nfp; kf++) {
        pvwdata_t * weight = conn->get_wDataHead(arborID, 0);
        pvdata_t sum = 0.0;
        for (int k=0; k<num_weights; k+=nfp) {
            pvwdata_t w = weight[k + kf]; // Assumes stride in features is 1.
            //TODO-CER-2014.4.4 - convert weights
            sum += w*w;
        }
        wnormsq[kf] = sum;
    }

    pvdata_t * gSynStart = GSyn[conn->getChannel()];

    int nx = getLayerLoc()->nx;
    int ny = getLayerLoc()->ny;
    int nxy = nx*ny;

    // TODO: Can I compute energyDropsBestFeature and minLocationsBestFeature without storing all the energyDrops and minimumLocations?

    for (int kxy=0; kxy<nxy; kxy++) {
        for (int kf=0; kf<nfp; kf++) {
            int k=kxy*nfp+kf; // Assumes stride in features is 1.
            minimumLocations[k] = gSynStart[k]/wnormsq[kf];
            energyDrops[k] = -gSynStart[k]*minimumLocations[k]/2;
        }
    }

    for (int kxy=0; kxy<nxy; kxy++) {
        minFeatures[kxy] = -1;
        energyDropsBestFeature[kxy] = FLT_MAX;
        int index0 = kxy*nfp; // assumes stride in features is 1.
        if (foundFeatures[kxy]>=0) {
            energyDropsBestFeature[kxy] = energyDrops[kxy*nfp+foundFeatures[kxy]];
            minFeatures[kxy] = foundFeatures[kxy];
        }
        else {
            for (int kf=0; kf<nfp; kf++) {
                if (energyDrops[index0+kf] < energyDropsBestFeature[kxy]) {
                    minFeatures[kxy] = kf;
                    energyDropsBestFeature[kxy] = energyDrops[index0+kf];
                }
            }
        }
    }

    for (int kxy=0; kxy<nxy; kxy++) {
        assert(minFeatures[kxy]>=0 && minFeatures[kxy]<nfp);
        int baseindex = kxy*nfp;
        minLocationsBestFeature[kxy] = minimumLocations[baseindex+minFeatures[kxy]];
    }

    bool mask[nxy];
    memset(mask, false, nxy*sizeof(*mask));

    pvdata_t smallestEnergyDrop;
    int minloc;

    while (constrainMinima(), minloc = filterMinEnergies(mask, &smallestEnergyDrop), smallestEnergyDrop<FLT_MAX) {
        assert(foundFeatures[minloc]<0 || foundFeatures[minloc]==minFeatures[minloc]);
        foundFeatures[minloc] = minFeatures[minloc];
        gSynSparse[minloc] += minLocationsBestFeature[minloc];
        if (gSynSparse[minloc] < 1e-4) {
            gSynSparse[minloc]=0;
            foundFeatures[minloc] = -1;
        }

        int minlocx = kxPos(minloc,nx,ny,1);
        int maskstartx = minlocx-(nxp-1);
        if (maskstartx<0) maskstartx=0;
        int maskstopx = minlocx+nxp;
        if (maskstopx>nx) maskstopx=nx;
        int minlocy = kyPos(minloc,nx,ny,1);
        int maskstarty = minlocy-(nyp-1);
        if (maskstarty<0) maskstarty=0;
        int maskstopy = minlocy+nyp;
        if (maskstopy>ny) maskstopy=ny;
        for (int ky=maskstarty; ky<maskstopy; ky++) {
            for (int kx=maskstartx; kx<maskstopx; kx++) {
                mask[kIndex(kx,ky,0,nx,ny,1)]=true;
            }
        }
    }


    recvsyn_timer->stop();

    updateReady = true;

    return 0;
}
示例#19
0
int PoolingConn::deliverPostsynapticPerspective(PVLayerCube const * activity, int arborID) {
    //Check channel number for noupdate
    if(getChannel() == CHANNEL_NOUPDATE) {
        return PV_SUCCESS;
    }
    assert(post->getChannel(getChannel()));

    assert(arborID >= 0);
    //Get number of neurons restricted target
    const int numPostRestricted = post->getNumNeurons();

    float dt_factor = getConvertToRateDeltaTimeFactor();

    const PVLayerLoc * sourceLoc = preSynapticLayer()->getLayerLoc();
    const PVLayerLoc * targetLoc = post->getLayerLoc();

    const int sourceNx = sourceLoc->nx;
    const int sourceNy = sourceLoc->ny;
    const int sourceNf = sourceLoc->nf;
    const int targetNx = targetLoc->nx;
    const int targetNy = targetLoc->ny;
    const int targetNf = targetLoc->nf;

    const PVHalo * sourceHalo = &sourceLoc->halo;
    const PVHalo * targetHalo = &targetLoc->halo;

    //get source layer's extended y stride
    int sy  = (sourceNx+sourceHalo->lt+sourceHalo->rt)*sourceNf;

    //The start of the gsyn buffer
    pvdata_t * gSynPatchHead = post->getChannel(this->getChannel());

    clearGateIdxBuffer();
    int* gatePatchHead = NULL;
    if(needPostIndexLayer) {
        gatePatchHead = postIndexLayer->getChannel(CHANNEL_EXC);
    }


    long * startSourceExtBuf = getPostToPreActivity();
    if(!startSourceExtBuf) {
        std::cout << "HyPerLayer::recvFromPost error getting preToPostActivity from connection. Is shrink_patches on?\n";
        exit(EXIT_FAILURE);
    }

    float resetVal = 0;
    if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) {
        resetVal = -INFINITY;
    }


    for(int b = 0; b < parent->getNBatch(); b++) {
#ifdef PV_USE_OPENMP_THREADS
        #pragma omp parallel for
#endif
        for (int kTargetRes = 0; kTargetRes < numPostRestricted; kTargetRes++) {
            pvdata_t * activityBatch = activity->data + b * (sourceNx + sourceHalo->rt + sourceHalo->lt) * (sourceNy + sourceHalo->up + sourceHalo->dn) * sourceNf;
            pvdata_t * gSynPatchHeadBatch = gSynPatchHead + b * targetNx * targetNy * targetNf;

            //Change restricted to extended post neuron
            int kTargetExt = kIndexExtended(kTargetRes, targetNx, targetNy, targetNf, targetHalo->lt, targetHalo->rt, targetHalo->dn, targetHalo->up);

            //Read from buffer
            long startSourceExt = startSourceExtBuf[kTargetRes];

            //Calculate target's start of gsyn
            pvdata_t * gSynPatchPos = gSynPatchHeadBatch + kTargetRes;
            //Initialize patch as a huge negative number
            *gSynPatchPos = resetVal;

            int* gatePatchPos = NULL;
            if(needPostIndexLayer) {
                gatePatchPos = gatePatchHead + b * postIndexLayer->getNumNeurons() + kTargetRes;
                //Initialize gatePatchPos as a negative number
                *gatePatchPos = -1;
            }

            float* activityStartBuf = &(activityBatch[startSourceExt]);

            pvwdata_t * weightY = NULL; //No weights in pooling
            int sf = postConn->fPatchSize();
            int yPatchSize = postConn->yPatchSize();
            int numPerStride = postConn->xPatchSize() * postConn->fPatchSize();

            const PVLayerLoc * postLoc = post->getLayerLoc();
            const int kfPost = featureIndex(kTargetExt, postLoc->nx + postLoc->halo.lt + postLoc->halo.rt, postLoc->ny + postLoc->halo.dn + postLoc->halo.up, postLoc->nf);
            int offset = kfPost;

            pvwdata_t w = 1.0;
            if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING) {
                float relative_XScale = pow(2, (post->getXScale() - pre->getXScale()));
                float relative_YScale = pow(2, (post->getYScale() - pre->getYScale()));
                w = 1.0/(nxp*nyp*relative_XScale*relative_YScale);
            }

            for (int ky = 0; ky < yPatchSize; ky++) {
                int kPreExt = startSourceExt + ky*sy+offset;
                const int kxPreExt = kxPos(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf);
                const int kyPreExt = kyPos(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf);
                const int kfPre = featureIndex(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf);
                const int kxPreGlobalExt = kxPreExt + sourceLoc->kx0;
                const int kyPreGlobalExt = kyPreExt + sourceLoc->ky0;
                const int kPreGlobalExt = kIndex(kxPreGlobalExt, kyPreGlobalExt, kfPre, sourceLoc->nxGlobal + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->nyGlobal + sourceLoc->halo.up + sourceLoc->halo.dn, sourceLoc->nf);

                float * activityY = &(activityStartBuf[ky*sy+offset]);

                (accumulateFunctionFromPostPointer)(kPreGlobalExt, numPerStride, gSynPatchPos, activityY, &w, dt_factor, gatePatchPos, sf);
            }
        }
    }
    return PV_SUCCESS;
}
示例#20
0
int PointLIFProbe::calcValues(double timevalue) {
   // TODO: Reduce duplicated code between PointProbe::calcValues and PointLIFProbe::calcValues.
   assert(this->getNumValues()==NUMBER_OF_VALUES);
   LIF * LIF_layer = dynamic_cast<LIF *>(getTargetLayer());
   assert(LIF_layer != NULL);
   pvconductance_t const * G_E  = LIF_layer->getConductance(CHANNEL_EXC) + batchLoc * LIF_layer->getNumNeurons();
   pvconductance_t const * G_I  = LIF_layer->getConductance(CHANNEL_INH) + batchLoc * LIF_layer->getNumNeurons();
   pvconductance_t const * G_IB = LIF_layer->getConductance(CHANNEL_INHB) + batchLoc * LIF_layer->getNumNeurons();
   pvdata_t const * V = getTargetLayer()->getV();
   pvdata_t const * Vth  = LIF_layer->getVth();
   pvdata_t const * activity = getTargetLayer()->getLayerData();
   assert(V && activity && G_E && G_I && G_IB && Vth);
   double * valuesBuffer = this->getValuesBuffer();
   //We need to calculate which mpi process contains the target point, and send that info to the root process
   //Each process calculates local index
   const PVLayerLoc * loc = getTargetLayer()->getLayerLoc();
   //Calculate local cords from global
   const int kx0 = loc->kx0;
   const int ky0 = loc->ky0;
   const int kb0 = loc->kb0;
   const int nx = loc->nx;
   const int ny = loc->ny;
   const int nf = loc->nf;
   const int nbatch = loc->nbatch;
   const int xLocLocal = xLoc - kx0;
   const int yLocLocal = yLoc - ky0;
   const int nbatchLocal = batchLoc - kb0;
   
   //if in bounds
   if( xLocLocal >= 0 && xLocLocal < nx &&
       yLocLocal >= 0 && yLocLocal < ny &&
       nbatchLocal >= 0 && nbatchLocal < nbatch){
      const pvdata_t * V = getTargetLayer()->getV();
      const pvdata_t * activity = getTargetLayer()->getLayerData();
      //Send V and A to root
      const int k = kIndex(xLocLocal, yLocLocal, fLoc, nx, ny, nf);
      const int kbatch = k + nbatchLocal*getTargetLayer()->getNumNeurons();
      valuesBuffer[0] = G_E[kbatch];
      valuesBuffer[1] = G_I[kbatch];
      valuesBuffer[2] = G_IB[kbatch];
      valuesBuffer[3] = V[kbatch];
      valuesBuffer[4] = Vth[kbatch];
      const int kex = kIndexExtended(k, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up);
      valuesBuffer[5] = activity[kex + nbatchLocal * getTargetLayer()->getNumExtended()];
      //If not in root process, send to root process
      if(parent->columnId()!=0){
         MPI_Send(valuesBuffer, NUMBER_OF_VALUES, MPI_DOUBLE, 0, 0, parent->icCommunicator()->communicator());
      }
   }

   //Root process
   if(parent->columnId()==0){
      //Calculate which rank target neuron is
      //TODO we need to calculate rank from batch as well
      int xRank = xLoc/nx;
      int yRank = yLoc/ny;

      int srcRank = rankFromRowAndColumn(yRank, xRank, parent->icCommunicator()->numCommRows(), parent->icCommunicator()->numCommColumns());

      //If srcRank is not root process, MPI_Recv from that rank
      if(srcRank != 0){
         MPI_Recv(valuesBuffer, NUMBER_OF_VALUES, MPI_DOUBLE, srcRank, 0, parent->icCommunicator()->communicator(), MPI_STATUS_IGNORE);
      }
   }
   return PV_SUCCESS;
}
示例#21
0
int BackwardsBatchNorm::updateState(double timef, double dt) {
    int status = PV_SUCCESS;

    //We are filling this activity buffer
    pvdata_t * thisA = clayer->activity->data;
    assert(thisA);

    //We need the normalized input vals, orig input vals, and the input gradients
    const pvdata_t * inputGradA = originalLayer->getCLayer()->activity->data;
    const pvdata_t * forwardA = forwardLayer->getCLayer()->activity->data;
    const pvdata_t * origInputA = forwardLayer->getOriginalLayer()->getCLayer()->activity->data;
    assert(inputGradA && forwardA && origInputA);

    //Get locs for all buffers
    const PVLayerLoc * thisLoc = getLayerLoc();
    const PVLayerLoc * inputGradLoc = originalLayer->getLayerLoc();
    const PVLayerLoc * forwardLoc = forwardLayer->getLayerLoc();
    const PVLayerLoc * origInputLoc = forwardLayer->getOriginalLayer()->getLayerLoc();

    int nbatch = thisLoc->nbatch;

    //All nx, ny, and nf should be the same
    int nx = thisLoc->nx;
    int ny = thisLoc->ny;
    int nf = thisLoc->nf;

    //Get buffer margins here
    int xThisMargin = thisLoc->halo.lt + thisLoc->halo.rt;
    int yThisMargin = thisLoc->halo.up + thisLoc->halo.dn;
    int xInputGradMargin = inputGradLoc->halo.lt + inputGradLoc->halo.rt;
    int yInputGradMargin = inputGradLoc->halo.up + inputGradLoc->halo.dn;
    int xForwardMargin = forwardLoc->halo.lt + forwardLoc->halo.rt;
    int yForwardMargin = forwardLoc->halo.up + forwardLoc->halo.dn;
    int xOrigInputMargin = origInputLoc->halo.lt + origInputLoc->halo.rt;
    int yOrigInputMargin = origInputLoc->halo.up + origInputLoc->halo.dn;

    //We also need various mean and var buffers from the forward layer
    const float* batchMean = forwardLayer->getBatchMean();
    const float* batchVar = forwardLayer->getBatchVar();
    float*       batchMeanShift = forwardLayer->getBatchMeanShift();
    float*       batchVarShift = forwardLayer->getBatchVarShift();
    float        epsilon = forwardLayer->getEpsilon();

    //Total number of neurons to divide by for each feature
    float normVal = parent->getNBatchGlobal() * thisLoc->nyGlobal * thisLoc->nxGlobal;

    //We're accumulating into delta buffers, so clear
    clearDelta();

    //Ioffe et. al. Batch Normalization

    //Calculate deltaVar
    //TODO parallize over threads
    for(int iF = 0; iF < nf; iF++) {
        float secondTerm = -.5*(powf(batchVar[iF] + epsilon, -1.5));
        for(int b = 0; b < nbatch; b++) {
            const pvdata_t* batchOrigInputA = origInputA + b * forwardLayer->getOriginalLayer()->getNumExtended();
            const pvdata_t* batchInputGradA = inputGradA + b * originalLayer->getNumExtended();
            for(int iY = 0; iY < ny; iY++) {
                for(int iX = 0; iX < nx; iX++) {
                    int kExtOrigInput = kIndex(iX, iY, iF, nx+xOrigInputMargin, ny+yOrigInputMargin, nf);
                    int kExtInputGrad = kIndex(iX, iY, iF, nx+xInputGradMargin, ny+yInputGradMargin, nf);
                    float deltaNorm = batchInputGradA[kExtInputGrad] * batchVarShift[iF];
                    deltaVar[iF] += deltaNorm * (batchOrigInputA[kExtOrigInput] - batchMean[iF]);
                }
            }
        }
        //Multiply deltaVar by secondTerm
        deltaVar[iF] = deltaVar[iF] * secondTerm;
    }

    //Reduce deltaVar
#ifdef PV_USE_MPI
    MPI_Allreduce(MPI_IN_PLACE, deltaVar, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator());
#endif // PV_USE_MPI

    //Calculate deltaMean
    //Calculate first term first
    //TODO parallize over threads
    for(int iF = 0; iF < nf; iF++) {
        float multiplier = -1.0/(sqrtf(batchVar[iF]+epsilon));
        for(int b = 0; b < nbatch; b++) {
            const pvdata_t* batchInputGradA = inputGradA + b * originalLayer->getNumExtended();
            for(int iY = 0; iY < ny; iY++) {
                for(int iX = 0; iX < nx; iX++) {
                    int kExtInputGrad = kIndex(iX, iY, iF, nx+xInputGradMargin, ny+yInputGradMargin, nf);
                    float deltaNorm = batchInputGradA[kExtInputGrad] * batchVarShift[iF];
                    deltaMean[iF] += deltaNorm * multiplier;
                }
            }
        }
    }
    //Reduce deltaMean across mpi
#ifdef PV_USE_MPI
    MPI_Allreduce(MPI_IN_PLACE, deltaMean, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator());
#endif // PV_USE_MPI

    //Calculate second term
    //TODO parallize over threads
    for(int iF = 0; iF < nf; iF++) {
        float tmpMean = 0;
        for(int b = 0; b < nbatch; b++) {
            const pvdata_t* batchOrigInputA = origInputA + b * forwardLayer->getOriginalLayer()->getNumExtended();
            for(int iY = 0; iY < ny; iY++) {
                for(int iX = 0; iX < nx; iX++) {
                    int kExtOrigInput = kIndex(iX, iY, iF, nx+xOrigInputMargin, ny+yOrigInputMargin, nf);
                    tmpMean += -2 * (batchOrigInputA[kExtOrigInput] - batchMean[iF]);
                }
            }
        }
        //Reduce tmpMean
#ifdef PV_USE_MPI
        MPI_Allreduce(MPI_IN_PLACE, &tmpMean, 1, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator());
#endif // PV_USE_MPI
        tmpMean = tmpMean / normVal;
        //Add second term to first term
        deltaMean[iF] += deltaVar[iF] * tmpMean;
    }

    //No more sums, go with efficient loop
    //TODO Is the efficient loop better for optimization or do we put
    //features on the outer most loop for precalculation of constants over features?
    for(int b = 0; b < nbatch; b++) {
        const pvdata_t* batchOrigInputA = origInputA + b * forwardLayer->getOriginalLayer()->getNumExtended();
        const pvdata_t* batchInputGradA = inputGradA + b * originalLayer->getNumExtended();
        pvdata_t* batchThisA = thisA + b * getNumExtended();
#ifdef PV_USE_OPENMP_THREADS
        #pragma omp parallel for collapse(3)
#endif
        for(int iY = 0; iY < ny; iY++) {
            for(int iX = 0; iX < nx; iX++) {
                for(int iF = 0; iF < nf; iF++) {
                    int kExtOrigInput = kIndex(iX, iY, iF, nx+xOrigInputMargin, ny+yOrigInputMargin, nf);
                    int kExtInputGrad = kIndex(iX, iY, iF, nx+xInputGradMargin, ny+yInputGradMargin, nf);
                    int kExtThis = kIndex(iX, iY, iF, nx+xThisMargin, ny+yThisMargin, nf);
                    float deltaNorm = batchInputGradA[kExtInputGrad] * batchVarShift[iF];
                    float firstTerm = deltaNorm/sqrtf(batchVar[iF] + epsilon);
                    float secondTerm = deltaVar[iF] * (2*(batchOrigInputA[kExtOrigInput] - batchMean[iF])/normVal);
                    float thirdTerm = deltaMean[iF]/normVal;
                    batchThisA[kExtThis] = firstTerm + secondTerm + thirdTerm;
                }
            }
        }
    }

    //We calculate delta varShift and deltaMeanShift here
    //TODO parallize over threads
    //Since we're summing into delta*shift buffers, we have to sequentialize over features
    for(int iF = 0; iF < nf; iF++) {
        for(int b = 0; b < nbatch; b++) {
            const pvdata_t* batchForwardA = forwardA + b * forwardLayer->getNumExtended();
            const pvdata_t* batchInputGradA = inputGradA + b * originalLayer->getNumExtended();
            for(int iY = 0; iY < ny; iY++) {
                for(int iX = 0; iX < nx; iX++) {
                    int kExtInputGrad = kIndex(iX, iY, iF, nx+xInputGradMargin, ny+yInputGradMargin, nf);
                    int kExtForwardA = kIndex(iX, iY, iF, nx+xForwardMargin, ny + yForwardMargin, nf);
                    deltaVarShift[iF] += batchInputGradA[kExtInputGrad] * batchForwardA[kExtForwardA];
                    deltaMeanShift[iF] += batchInputGradA[kExtInputGrad];
                }
            }
        }
    }

    //Reduce delta*Shift across all mpi
#ifdef PV_USE_MPI
    MPI_Allreduce(MPI_IN_PLACE, deltaVarShift, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator());
    MPI_Allreduce(MPI_IN_PLACE, deltaMeanShift, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator());
#endif // PV_USE_MPI

    //TODO implement learning rule for meanShift and varShift

    return status;
}
示例#22
0
void MLPOutputLayer::multiclassNonlocalStats(){
   const PVLayerLoc * loc = getLayerLoc();
   int nx = loc->nx;
   int ny = loc->ny;
   int nf = loc->nf;
   int numNeurons = getNumNeurons();
   pvdata_t * A = getCLayer()->activity->data;
   pvdata_t * gtA = gtLayer->getCLayer()->activity->data;
   float sumsq = 0;
   //Winner take all in the output layer
   int currNumRight = 0;
   int currNumWrong = 0;
   assert(classBuffer);
   //Clear classBuffer
   for(int i = 0; i < nf; i++){
      classBuffer[i] = 0;
   }
   //Only go through restricted
   //Calculate the sum squared error
   for(int ni = 0; ni < numNeurons; ni++){
      int nExt = kIndexExtended(ni, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up);
      int fi = featureIndex(nExt, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf);
      //Sum over x and y direction
      classBuffer[fi] += A[nExt];
      sumsq += pow(A[nExt] - gtA[nExt], 2);
   }
   //Normalize classBuffer to find mean
   for(int i = 0; i < nf; i++){
      classBuffer[i] /= nx*ny;
   }
   //Reduce all classBuffers through a mean
#ifdef PV_USE_MPI
   MPI_Allreduce(MPI_IN_PLACE, &sumsq, 1, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->communicator());
   MPI_Allreduce(MPI_IN_PLACE, classBuffer, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->communicator());
   //Normalize classBuffer across processors
   for(int i = 0; i < nf; i++){
      classBuffer[i] /= parent->icCommunicator()->commSize();
   }
#endif // PV_USE_MPI
   //Find max
   float estMaxF = -1000;
   int estMaxFi = -1;
   float actualMaxF = -1000;
   int actualMaxFi = -1;
   for(int i = 0; i < nf; i++){
      if(classBuffer[i] >= estMaxF){
         estMaxF = classBuffer[i];
         estMaxFi = i;
      }
      int nExt = kIndex(loc->halo.lt, loc->halo.up, i, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf);
      if(gtA[nExt] >= actualMaxF){
         actualMaxF = gtA[nExt];
         actualMaxFi = i;
      }
   }
   //Calculate stats
   //Found winning feature, compare to ground truth
   if(estMaxFi == actualMaxFi){
      currNumRight++;
   }
   else{
      currNumWrong++;
   }
#ifdef PV_USE_MPI
   MPI_Allreduce(MPI_IN_PLACE, &currNumRight, 1, MPI_INT, MPI_SUM, parent->icCommunicator()->communicator());
   MPI_Allreduce(MPI_IN_PLACE, &currNumWrong, 1, MPI_INT, MPI_SUM, parent->icCommunicator()->communicator());
#endif // PV_USE_MPI
   numRight += currNumRight;
   numWrong += currNumWrong;
   progressNumRight += currNumRight;
   progressNumWrong += currNumWrong;
   //Print if need
   float timef = parent->simulationTime();
   if(timef >= nextStatProgress){
      //Update nextStatProgress
      nextStatProgress += statProgressPeriod;
      if (parent->columnId()==0) {
         float totalScore = 100*float(numRight)/float(numRight+numWrong);
         float progressScore = 100*float(progressNumRight)/float(progressNumRight+progressNumWrong);
         fprintf(stdout, "time:%f  layer:\"%s\"  total:%f%%  progressStep:%f%%  energy:%f\n", timef, name, totalScore, progressScore, sumsq/2);
      }
      //Reset progressStats
      progressNumRight = 0;
      progressNumWrong = 0;
   }
}
int TransposePoolingConn::deliverPresynapticPerspective(PVLayerCube const * activity, int arborID) {
   //Check if we need to update based on connection's channel
   if(getChannel() == CHANNEL_NOUPDATE){
      return PV_SUCCESS;
   }
   assert(post->getChannel(getChannel()));

   const PVLayerLoc * preLoc = preSynapticLayer()->getLayerLoc();
   const PVLayerLoc * postLoc = postSynapticLayer()->getLayerLoc();

   assert(arborID >= 0);
   const int numExtended = activity->numItems;

   //Grab postIdxLayer's data
   int* postIdxData = NULL;
   if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){
      PoolingIndexLayer* postIndexLayer = originalConn->getPostIndexLayer();
      assert(postIndexLayer);
      //Make sure this layer is an integer layer
      assert(postIndexLayer->getDataType() == PV_INT);
      DataStore * store = parent->icCommunicator()->publisherStore(postIndexLayer->getLayerId());
      int delay = getDelay(arborID);

      //TODO this is currently a hack, need to properly implement data types.
      postIdxData = (int*) store->buffer(LOCAL, delay);
   }

   for(int b = 0; b < parent->getNBatch(); b++){
      pvdata_t * activityBatch = activity->data + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf;
      pvdata_t * gSynPatchHeadBatch = post->getChannel(getChannel()) + b * postLoc->nx * postLoc->ny * postLoc->nf;
      int * postIdxDataBatch = NULL;
      if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){
         postIdxDataBatch = postIdxData + b * originalConn->getPostIndexLayer()->getNumExtended();
      }

      unsigned int * activeIndicesBatch = NULL;
      if(activity->isSparse){
         activeIndicesBatch = activity->activeIndices + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf;
      }

      int numLoop;
      if(activity->isSparse){
         numLoop = activity->numActive[b];
      }
      else{
         numLoop = numExtended;
      }

#ifdef PV_USE_OPENMP_THREADS
      //Clear all thread gsyn buffer
      if(thread_gSyn){
         int numNeurons = post->getNumNeurons();
#ifdef PV_USE_OPENMP_THREADS
#pragma omp parallel for
#endif
         for(int i = 0; i < parent->getNumThreads() * numNeurons; i++){
            int ti = i/numNeurons;
            int ni = i % numNeurons;
            thread_gSyn[ti][ni] = 0;
         }
      }
#endif // PV_USE_OPENMP_THREADS


#ifdef PV_USE_OPENMP_THREADS
#pragma omp parallel for schedule(static)
#endif
      for (int loopIndex = 0; loopIndex < numLoop; loopIndex++) {
         int kPreExt;
         if(activity->isSparse){
            kPreExt = activeIndicesBatch[loopIndex];
         }
         else{
            kPreExt = loopIndex;
         }

         float a = activityBatch[kPreExt];
         if (a == 0.0f) continue;

         //If we're using thread_gSyn, set this here
         pvdata_t * gSynPatchHead;
#ifdef PV_USE_OPENMP_THREADS
         if(thread_gSyn){
            int ti = omp_get_thread_num();
            gSynPatchHead = thread_gSyn[ti];
         }
         else{
            gSynPatchHead = gSynPatchHeadBatch;
         }
#else // PV_USE_OPENMP_THREADS
         gSynPatchHead = gSynPatchHeadBatch;
#endif // PV_USE_OPENMP_THREADS

         const int kxPreExt = kxPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf);
         const int kyPreExt = kyPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf);
         const int kfPre = featureIndex(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf);

         if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){
            const int kxPreGlobalExt = kxPreExt + preLoc->kx0;
            const int kyPreGlobalExt = kyPreExt + preLoc->ky0;
            if(kxPreGlobalExt < preLoc->halo.lt || kxPreGlobalExt >= preLoc->nxGlobal + preLoc->halo.lt ||
               kyPreGlobalExt < preLoc->halo.up || kyPreGlobalExt >= preLoc->nyGlobal + preLoc->halo.up){
               continue;
            }

            //Convert stored global extended index into local extended index
            int postGlobalExtIdx = postIdxDataBatch[kPreExt];

            // If all inputs are zero and input layer is sparse, postGlobalExtIdx will still be -1.
            if(postGlobalExtIdx == -1) { continue; }

            //Make sure the index is in bounds
            assert(postGlobalExtIdx >= 0 && postGlobalExtIdx <
                  (postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt) * 
                  (postLoc->nyGlobal + postLoc->halo.up + postLoc->halo.dn) * 
                  postLoc->nf);

            const int kxPostGlobalExt = kxPos(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf);
            const int kyPostGlobalExt = kyPos(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf);
            const int kfPost = featureIndex(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf);

            const int kxPostLocalRes = kxPostGlobalExt - postLoc->kx0 - postLoc->halo.lt;
            const int kyPostLocalRes = kyPostGlobalExt - postLoc->ky0 - postLoc->halo.up;
            if(kxPostLocalRes < 0 || kxPostLocalRes >= postLoc->nx|| 
               kyPostLocalRes < 0 || kyPostLocalRes >= postLoc->ny){
               continue;
            }

            const int kPostLocalRes = kIndex(kxPostLocalRes, kyPostLocalRes, kfPost, postLoc->nx, postLoc->ny, postLoc->nf);
            gSynPatchHeadBatch[kPostLocalRes] = a;
         }
         else{
            PVPatch * weights = getWeights(kPreExt, arborID);
            const int nk = weights->nx * fPatchSize();
            const int ny = weights->ny;
            pvgsyndata_t * postPatchStart = gSynPatchHead + getGSynPatchStart(kPreExt, arborID);
            const int sy  = getPostNonextStrides()->sy;       // stride in layer

            int offset = kfPre;
            int sf = fPatchSize();

            pvwdata_t w = 1.0;
            if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING){
              float relative_XScale = pow(2, (post->getXScale() - pre->getXScale()));
              float relative_YScale = pow(2, (post->getYScale() - pre->getYScale()));
              w = 1.0/(nxp*nyp*relative_XScale*relative_YScale);
            }
            void* auxPtr = NULL;
            for (int y = 0; y < ny; y++) {
               (accumulateFunctionPointer)(0, nk, postPatchStart + y*sy + offset, a, &w, auxPtr, sf);
            }
         }
      }

#ifdef PV_USE_OPENMP_THREADS
      //Set back into gSyn
      if(thread_gSyn){
         pvdata_t * gSynPatchHead = gSynPatchHeadBatch;
         int numNeurons = post->getNumNeurons();
         //Looping over neurons first to be thread safe
#pragma omp parallel for
         for(int ni = 0; ni < numNeurons; ni++){
            for(int ti = 0; ti < parent->getNumThreads(); ti++){
               if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){
                  if(gSynPatchHead[ni] < fabs(thread_gSyn[ti][ni])){
                     gSynPatchHead[ni] = thread_gSyn[ti][ni];
                  }
               }
               else{
                  gSynPatchHead[ni] += thread_gSyn[ti][ni];
               }
            }
         }
      }
#endif
   }
   return PV_SUCCESS;
}
int LCALIFLateralKernelConn::allocateDataStructures() {
   int status = HyPerConn::allocateDataStructures();

   // Neurons don't inhibit themselves, only their neighbors; set self-interaction weights to mmzero.
   assert(nxp % 2 == 1 && nyp % 2 == 1 && getNumDataPatches()==nfp);
   for (int k=0; k<getNumDataPatches(); k++) {
      int n = kIndex((nxp-1)/2, (nyp-1)/2, k, nxp, nyp, nfp);
      get_wDataHead(0, k)[n] = 0.0f;
   }

   integratedSpikeCountCube = pvcube_new(pre->getLayerLoc(), pre->getNumExtended());
   integratedSpikeCount = integratedSpikeCountCube->data;
   for (int k=0; k<pre->getNumExtended(); k++) {
      integratedSpikeCount[k] = integrationTimeConstant*getTargetRateKHz(); // Spike counts initialized to equilibrium value
   }
   mpi_datatype = Communicator::newDatatypes(pre->getLayerLoc());
   if (mpi_datatype==NULL) {
      fprintf(stderr, "LCALIFLateralKernelConn \"%s\" error creating mpi_datatype\n", name);
      abort();
   }

   // Compute the number of times each patch contributes to dw, for proper averaging.
   int num_arbors = numberOfAxonalArborLists();
   interiorCounts = (float **) calloc(num_arbors, sizeof(float *));
   if (interiorCounts==NULL) {
      fprintf(stderr, "LCALIFLateralKernelConn::initialize \"%s\" error: unable to allocate memory for interiorCounts pointer\n", name);
   }
   interiorCounts[0] = (float *) calloc(getNumDataPatches()*nxp*nyp*nfp, sizeof(float));
   if (interiorCounts[0]==NULL) {
      fprintf(stderr, "LCALIFLateralKernelConn::initialize \"%s\" error: unable to allocate memory for interiorCounts\n", name);
   }
   for (int arbor=1; arbor<num_arbors; arbor++) {
      interiorCounts[arbor] = interiorCounts[0]+arbor*getNumDataPatches()*nxp*nyp*nfp;
   }

   const PVLayerLoc * preloc = pre->getLayerLoc();
   int nxpre = preloc->nx;
   int nypre = preloc->ny;
   int nfpre = preloc->nf;

   int nExt = pre->getNumExtended();
   int sya = getPostExtStrides()->sy;
   int nxglob = preloc->nxGlobal;
   int nyglob = preloc->nyGlobal;
   int kx0 = preloc->kx0;
   int ky0 = preloc->ky0;
   for (int arbor=0; arbor<numberOfAxonalArborLists(); arbor++) {
      for(int kExt=0; kExt<nExt;kExt++) {
         int xglob = kxPos(kExt, nxpre + preloc->halo.lt + preloc->halo.rt, nypre + preloc->halo.dn + preloc->halo.up, nfpre) + kx0 - preloc->halo.lt;
         int yglob = kyPos(kExt, nypre + preloc->halo.lt + preloc->halo.rt, nypre + preloc->halo.dn + preloc->halo.up, nfpre) + ky0 - preloc->halo.up;
         if (xglob < 0 || xglob >= nxglob || yglob < 0 || yglob >= nyglob) {
            continue;
         }
         PVPatch * weights = getWeights(kExt,arbor);
         int offset = (int) getAPostOffset(kExt, arbor);
         int ny = weights->ny;
         int nk = weights->nx * nfp;
         int interiorCountOffset = get_wData(arbor, kExt)-get_wDataStart(arbor);
         int lineoffsetw = 0;
         int lineoffseta = 0;
         for( int y=0; y<ny; y++ ) {
            for( int k=0; k<nk; k++ ) {
               int postactindex = offset+lineoffseta+k;
               if (postactindex != kExt) { // Neurons don't inhibit themselves
                  interiorCounts[arbor][interiorCountOffset + lineoffsetw + k]++;
               }
            }
            lineoffsetw += syp;
            lineoffseta += sya;
         }
      }
   }
   int bufsize = numberOfAxonalArborLists() * getNumDataPatches() * nxp * nyp * nfp;
// TODO-CER-2014.3.26 - Ensure that reduction is done when not using MPI
#ifdef PV_USE_MPI
   MPI_Allreduce(MPI_IN_PLACE, interiorCounts[0], bufsize, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->communicator());
#endif

   return status;
}
示例#25
0
int PointProbe::calcValues(double timevalue) {
   assert(this->getNumValues()==2);
   double * valuesBuffer = this->getValuesBuffer();
   //We need to calculate which mpi process contains the target point, and send that info to the root process
   //Each process calculates local index
   const PVLayerLoc * loc = getTargetLayer()->getLayerLoc();
   //Calculate local cords from global
   const int kx0 = loc->kx0;
   const int ky0 = loc->ky0;
   const int kb0 = loc->kb0;
   const int nx = loc->nx;
   const int ny = loc->ny;
   const int nf = loc->nf;
   const int nbatch = loc->nbatch;
   const int xLocLocal = xLoc - kx0;
   const int yLocLocal = yLoc - ky0;
   const int nbatchLocal = batchLoc - kb0;
   
   //if in bounds
   if( xLocLocal >= 0 && xLocLocal < nx &&
       yLocLocal >= 0 && yLocLocal < ny &&
       nbatchLocal >= 0 && nbatchLocal < nbatch){
      const pvdata_t * V = getTargetLayer()->getV();
      const pvdata_t * activity = getTargetLayer()->getLayerData();
      //Send V and A to root
      const int k = kIndex(xLocLocal, yLocLocal, fLoc, nx, ny, nf);
      if(V){
         valuesBuffer[0] = V[k + nbatchLocal*getTargetLayer()->getNumNeurons()];
      }
      else {
         valuesBuffer[0] = 0.0;
      }
      if(activity){
         const int kex = kIndexExtended(k, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up);
         valuesBuffer[1] = activity[kex + nbatchLocal * getTargetLayer()->getNumExtended()];
      }
      else {
         valuesBuffer[1] = 0.0;
      }
      //If not in root process, send to root process
      if(parent->columnId()!=0){
         MPI_Send(&valuesBuffer, 2, MPI_DOUBLE, 0, 0, parent->icCommunicator()->communicator());
      }
   }

   //Root process
   if(parent->columnId()==0){
      //Calculate which rank target neuron is
      //TODO we need to calculate rank from batch as well
      int xRank = xLoc/nx;
      int yRank = yLoc/ny;

      int srcRank = rankFromRowAndColumn(yRank, xRank, parent->icCommunicator()->numCommRows(), parent->icCommunicator()->numCommColumns());

      //If srcRank is not root process, MPI_Recv from that rank
      if(srcRank != 0){
         MPI_Recv(&valuesBuffer, 2, MPI_DOUBLE, srcRank, 0, parent->icCommunicator()->communicator(), MPI_STATUS_IGNORE);
      }
   }
   return PV_SUCCESS;
}
示例#26
0
int PoolingConn::deliverPresynapticPerspective(PVLayerCube const * activity, int arborID) {

    //Check if we need to update based on connection's channel
    if(getChannel() == CHANNEL_NOUPDATE) {
        return PV_SUCCESS;
    }
    assert(post->getChannel(getChannel()));

    float dt_factor;
    if (getPvpatchAccumulateType()==ACCUMULATE_STOCHASTIC) {
        dt_factor = getParent()->getDeltaTime();
    }
    else {
        dt_factor = getConvertToRateDeltaTimeFactor();
    }

    const PVLayerLoc * preLoc = preSynapticLayer()->getLayerLoc();
    const PVLayerLoc * postLoc = postSynapticLayer()->getLayerLoc();

    assert(arborID >= 0);
    const int numExtended = activity->numItems;

    float resetVal = 0;
    if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) {
        resetVal = -INFINITY;
        float* gSyn = post->getChannel(getChannel());
        //gSyn is res
#ifdef PV_USE_OPENMP_THREADS
        #pragma omp parallel for
#endif
        for(int i = 0; i < post->getNumNeuronsAllBatches(); i++) {
            gSyn[i] = resetVal;
        }

    }


    clearGateIdxBuffer();

    for(int b = 0; b < parent->getNBatch(); b++) {
        pvdata_t * activityBatch = activity->data + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf;
        pvdata_t * gSynPatchHeadBatch = post->getChannel(getChannel()) + b * postLoc->nx * postLoc->ny * postLoc->nf;
        int* gatePatchHeadBatch = NULL;
        if(needPostIndexLayer) {
            gatePatchHeadBatch = postIndexLayer->getChannel(CHANNEL_EXC) + b * postIndexLayer->getNumNeurons();
        }

        unsigned int * activeIndicesBatch = NULL;
        if(activity->isSparse) {
            activeIndicesBatch = activity->activeIndices + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf;
        }
        int numLoop;
        if(activity->isSparse) {
            numLoop = activity->numActive[b];
        }
        else {
            numLoop = numExtended;
        }

        if(thread_gateIdxBuffer) {
#ifdef PV_USE_OPENMP_THREADS
            #pragma omp parallel for
#endif
            for(int i = 0; i < parent->getNumThreads() * post->getNumNeurons(); i++) {
                int ti = i/post->getNumNeurons();
                int ni = i % post->getNumNeurons();
                thread_gateIdxBuffer[ti][ni] = -1;
            }
        }

#ifdef PV_USE_OPENMP_THREADS
        //Clear all gsyn buffers
        if(thread_gSyn) {
            int numNeurons = post->getNumNeurons();
#ifdef PV_USE_OPENMP_THREADS
            #pragma omp parallel for
#endif
            for(int i = 0; i < parent->getNumThreads() * numNeurons; i++) {
                int ti = i/numNeurons;
                int ni = i % numNeurons;
                thread_gSyn[ti][ni] = resetVal;
            }
        }
#endif // PV_USE_OPENMP_THREADS

#ifdef PV_USE_OPENMP_THREADS
        #pragma omp parallel for schedule(static)
#endif
        for (int loopIndex = 0; loopIndex < numLoop; loopIndex++) {
            int kPreExt;
            if(activity->isSparse) {
                kPreExt = activeIndicesBatch[loopIndex];
            }
            else {
                kPreExt = loopIndex;
            }

            float a = activityBatch[kPreExt] * dt_factor;
            //if (a == 0.0f) continue;

            //If we're using thread_gSyn, set this here
            pvdata_t * gSynPatchHead;
            //float * gatePatchHead = NULL;
            int * gatePatchHead = NULL;
#ifdef PV_USE_OPENMP_THREADS
            if(thread_gSyn) {
                int ti = omp_get_thread_num();
                gSynPatchHead = thread_gSyn[ti];
            }
            else {
                gSynPatchHead = gSynPatchHeadBatch;
            }

            if(needPostIndexLayer) {
                if(thread_gateIdxBuffer) {
                    int ti = omp_get_thread_num();
                    gatePatchHead = thread_gateIdxBuffer[ti];
                }
                else {
                    gatePatchHead = gatePatchHeadBatch;
                }
            }
#else // PV_USE_OPENMP_THREADS
            gSynPatchHead = gSynPatchHeadBatch;
            if(needPostIndexLayer) {
                gatePatchHead = gatePatchHeadBatch;
            }
#endif // PV_USE_OPENMP_THREADS
            //deliverOnePreNeuronActivity(kPreExt, arborID, a, gSynPatchHead, gatePatchHead);

            PVPatch * weights = getWeights(kPreExt, arborID);
            const int nk = weights->nx * fPatchSize();
            const int ny = weights->ny;
            const int sy  = getPostNonextStrides()->sy;       // stride in layer
            pvwdata_t * weightDataStart = NULL;
            pvgsyndata_t * postPatchStart = gSynPatchHead + getGSynPatchStart(kPreExt, arborID);
            int* postGatePatchStart = gatePatchHead + getGSynPatchStart(kPreExt, arborID);
            //float* postGatePatchStart = gatePatchHead + getGSynPatchStart(kPreExt, arborID);

            const int kxPreExt = kxPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf);
            const int kyPreExt = kyPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf);
            const int kfPre = featureIndex(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf);

            const int kxPreGlobalExt = kxPreExt + preLoc->kx0;
            const int kyPreGlobalExt = kyPreExt + preLoc->ky0;

            const int kPreGlobalExt = kIndex(kxPreGlobalExt, kyPreGlobalExt, kfPre, preLoc->nxGlobal + preLoc->halo.lt + preLoc->halo.rt, preLoc->nyGlobal + preLoc->halo.up + preLoc->halo.dn, preLoc->nf);

            int offset = kfPre;
            int sf = fPatchSize();
            pvwdata_t w = 1.0;
            if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING) {
                float relative_XScale = pow(2, (post->getXScale() - pre->getXScale()));
                float relative_YScale = pow(2, (post->getYScale() - pre->getYScale()));
                w = 1.0/(nxp*nyp*relative_XScale*relative_YScale);
            }
            void* auxPtr = NULL;
            for (int y = 0; y < ny; y++) {
                if(needPostIndexLayer) {
                    auxPtr = (postGatePatchStart+ y*sy + offset);
                }
                (accumulateFunctionPointer)(kPreGlobalExt, nk, postPatchStart + y*sy + offset, a, &w, auxPtr, sf);
            }
        }
#ifdef PV_USE_OPENMP_THREADS
        //Accumulate back into gSyn // Should this be done in HyPerLayer where it can be done once, as opposed to once per connection?
        if(thread_gSyn) {
            pvdata_t * gSynPatchHead = gSynPatchHeadBatch;
            //float* gateIdxBuffer = postIndexLayer->getChannel(CHANNEL_EXC);
            int * gateIdxBuffer = NULL;
            if(needPostIndexLayer && thread_gateIdxBuffer) {
                gateIdxBuffer = gatePatchHeadBatch;
            }
            int numNeurons = post->getNumNeurons();
            //Looping over neurons first to be thread safe
            #pragma omp parallel for
            for(int ni = 0; ni < numNeurons; ni++) {
                //Different for maxpooling
                if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) {
                    for(int ti = 0; ti < parent->getNumThreads(); ti++) {
                        if(gSynPatchHead[ni] < thread_gSyn[ti][ni]) {
                            gSynPatchHead[ni] = thread_gSyn[ti][ni];
                            if(needPostIndexLayer && thread_gateIdxBuffer) {
                                gateIdxBuffer[ni] = thread_gateIdxBuffer[ti][ni];
                                assert(gateIdxBuffer >= 0);
                            }
                        }
                    }
                }
                else {
                    for(int ti = 0; ti < parent->getNumThreads(); ti++) {
                        gSynPatchHead[ni] += thread_gSyn[ti][ni];
                    }
                }
            }
        }
#endif
    }
    if(activity->isSparse) {
        pvdata_t * gSyn = post->getChannel(getChannel());
        for (int k=0; k<post->getNumNeuronsAllBatches(); k++) {
            if (gSyn[k]==-INFINITY) {
                gSyn[k] = 0.0f;
            }
        }
    }
    return PV_SUCCESS;
}