// // A replacement for globalIndexFromLocal from conversions.h. // WARNING - any changes in conversions.h should be reflected here. static inline int globalIndexFromLocal_nompi(int kl, PVLayerLoc loc) { int kxg = loc.kx0 + kxPos(kl, loc.nx, loc.ny, loc.nf); int kyg = loc.ky0 + kyPos(kl, loc.nx, loc.ny, loc.nf); int kf = featureIndex(kl, loc.nx, loc.ny, loc.nf); return kIndex(kxg, kyg, kf, loc.nxGlobal, loc.nyGlobal, loc.nf); }
int BIDSSensorLayer::updateState(double timef, double dt){ pvdata_t * output = getCLayer()->V; pvdata_t * input = blayer->getCLayer()->activity->data; int index; //Iterate through post layer for (int i = 0; i < nx * ny; i++){ assert(nf == 1); //Iterate through features // std::cout << "Node (" << coords[i].xCoord << ", " << coords[i].yCoord << ")\n"; for (int k = 0; k < nf; k++){ int x = i % nx; int y = (int) floor(i/nx); index = kIndex(x, y, k, nx, ny, nf); data[i][buf_index] = input[index] - (neutral_val / 256); // std::cout << "\tBuf_index: " << buf_index << ": " << data[i][buf_index] << "\n"; //Next buf index, or reset if at end float out = matchFilter(i, (int)(timef * dt)); output[index] = out * weight; } } if(buf_index < buf_size - 1){ buf_index++; } else{ buf_index = 0; } HyPerLayer::setActivity(); return PV_SUCCESS; }
int ImageTestLayer::updateStateWrapper(double time, double dt) { Image::updateStateWrapper(time, dt); const PVLayerLoc * loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; int nbatch = loc->nbatch; for(int b = 0; b < nbatch; b++){ pvdata_t * dataBatch = data + b * getNumExtended(); for(int nkRes = 0; nkRes < getNumNeurons(); nkRes++){ //Calculate extended index int nkExt = kIndexExtended(nkRes, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); //checkVal is the value from batch index 0 pvdata_t checkVal = dataBatch[nkExt] * 255; int kxGlobal = kxPos(nkRes, nx, ny, nf) + loc->kx0; int kyGlobal = kyPos(nkRes, nx, ny, nf) + loc->ky0; int kf = featureIndex(nkRes, nx, ny, nf); pvdata_t expectedVal = kIndex(kxGlobal, kyGlobal, kf, loc->nxGlobal, loc->nyGlobal, nf); if(fabs(checkVal - expectedVal) >= 1e-5){ std::cout << "ImageFileIO test Expected: " << expectedVal << " Actual: " << checkVal << "\n"; exit(-1); } } } return PV_SUCCESS; }
int KernelProbe::outputState(double timed) { InterColComm * icComm = parent->icCommunicator(); const int rank = icComm->commRank(); if( rank != 0 ) return PV_SUCCESS; assert(getTargetConn()!=NULL); int nxp = getTargetHyPerConn()->xPatchSize(); int nyp = getTargetHyPerConn()->yPatchSize(); int nfp = getTargetHyPerConn()->fPatchSize(); int patchSize = nxp*nyp*nfp; const pvwdata_t * wdata = getTargetHyPerConn()->get_wDataStart(arborID)+patchSize*kernelIndex; const pvwdata_t * dwdata = outputPlasticIncr ? getTargetHyPerConn()->get_dwDataStart(arborID)+patchSize*kernelIndex : NULL; fprintf(outputstream->fp, "Time %f, Conn \"%s\", nxp=%d, nyp=%d, nfp=%d\n", timed, getTargetConn()->getName(),nxp, nyp, nfp); for(int f=0; f<nfp; f++) { for(int y=0; y<nyp; y++) { for(int x=0; x<nxp; x++) { int k = kIndex(x,y,f,nxp,nyp,nfp); fprintf(outputstream->fp, " x=%d, y=%d, f=%d (index %d):", x, y, f, k); if(getOutputWeights()) { fprintf(outputstream->fp, " weight=%f", (float)wdata[k]); } if(getOutputPlasticIncr()) { fprintf(outputstream->fp, " dw=%f", (float)dwdata[k]); } fprintf(outputstream->fp,"\n"); } } } return PV_SUCCESS; }
int StreamReconLayer::updateState(double timef, double dt) { update_timer->start(); pvdata_t * V = getV(); int nx = getLayerLoc()->nx; int ny = getLayerLoc()->ny; int nf = getLayerLoc()->nf; for (int i = 0; i < nx; i++) { int vx = i; int gx = i; for (int j = 0; j < nf; j ++) { int vf = j; int gf; if (vf + bufferLevel < nf) { gf = bufferLevel + j; } else { gf = bufferLevel + j - nf; } int vindex = kIndex(vx, 0, vf, nx, ny, nf); int gindex = kIndex(gx, 0, gf, nx, ny, nf); V[vindex] = GSyn[0][gindex]; } } //Copy V to A buffer PV::HyPerLayer::setActivity(); if (bufferLevel < nf - 1) { bufferLevel++; } else { bufferLevel = 0; } update_timer->stop(); return PV_SUCCESS; } // end update state
int dumponeweight(HyPerConn * conn) { int status = PV_SUCCESS; bool errorfound = false; int rank = conn->getParent()->icCommunicator()->commRank(); int nxp = conn->xPatchSize(); int nyp = conn->yPatchSize(); int nfp = conn->fPatchSize(); int xcenter = (nxp-1)/2; int ycenter = (nyp-1)/2; int nxpre = conn->preSynapticLayer()->getLayerLoc()->nxGlobal; int nypre = conn->preSynapticLayer()->getLayerLoc()->nyGlobal; bool usingMirrorBCs = conn->preSynapticLayer()->useMirrorBCs(); // If xScaleDiff > 0, it's a many-to-one connection. int xScaleDiff = conn->postSynapticLayer()->getXScale() - conn->preSynapticLayer()->getXScale(); float xFalloff = powf(2,xScaleDiff); int yScaleDiff = conn->postSynapticLayer()->getYScale() - conn->preSynapticLayer()->getYScale(); float yFalloff = powf(2,yScaleDiff); for( int p=0; p<conn->getNumDataPatches(); p++ ) { pvwdata_t * wgtData = conn->get_wDataHead(0,p); // conn->getKernelPatch(0,p)->data; for( int f=0; f<nfp; f++ ) { for( int x=0; x<nxp; x++ ) { int xoffset = abs((int) floor((x-xcenter)*xFalloff)); for( int y=0; y<nyp; y++ ) { int yoffset = abs((int) floor((y-ycenter)*yFalloff)); int idx = kIndex(x, y, f, nxp, nyp, nfp); //TODO-CER-2014.4.4 - weight conversion pvdata_t wgt = wgtData[idx]; //pvdata_t correct = usingMirrorBCs ? 1 : (nxpre-xoffset)*(nypre-yoffset)/((pvdata_t) (nxpre*nypre)); //New normalization takes into account if pre is not active //The pixel value from the input is actually 127, where we divide it by 255. //Not exaclty .5, a little less //Squared because both pre and post is grabbing it's activity from the image pvdata_t correct = usingMirrorBCs ? pow(float(127)/float(255),2) : (float(127)/float(255)) * .5; if( fabs(wgt-correct)>1.0e-5 ) { pvErrorNoExit(errorMessage); if( errorfound == false ) { errorfound = true; for( int k=0; k<72; k++ ) { pvInfo().printf("="); } errorMessage.printf("\n"); errorMessage.printf("Rank %d, Connection \"%s\":\n",rank, conn->getName()); } errorMessage.printf("Rank %d, Patch %d, x=%d, y=%d, f=%d: weight=%f, correct=%f, off by a factor of %f\n", rank, p, x, y, f, wgt, correct, wgt/correct); status = PV_FAILURE; } } } } } if( status == PV_SUCCESS ) { pvInfo().printf("Rank %d, connection \"%s\": Weights are correct.\n", rank, conn->getName()); } return status; }
//Makes a layer such that the restricted space is the index, but with spinning order be [x, y, f] as opposed to [f, x, y] int InputLayer::updateState(double timef, double dt){ //Grab layer size const PVLayerLoc* loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; int nxGlobal = loc->nxGlobal; int nyGlobal = loc->nyGlobal; int kx0 = loc->kx0; int ky0 = loc->ky0; for(int b = 0; b < parent->getNBatch(); b++){ pvdata_t * A = getActivity() + b * getNumExtended(); //looping over ext for(int iY = 0; iY < ny+loc->halo.up+loc->halo.dn; iY++){ for(int iX = 0; iX < nx+loc->halo.lt+loc->halo.rt; iX++){ //Calculate x and y global extended int xGlobalExt = iX + loc->kx0; int yGlobalExt = iY + loc->ky0; //Calculate x and y in restricted space int xGlobalRes = xGlobalExt - loc->halo.lt; int yGlobalRes = yGlobalExt - loc->halo.up; //Calculate base value //xGlobal and yGlobalRes can be negative int baseActivityVal = yGlobalRes * nxGlobal + xGlobalRes; for(int iFeature = 0; iFeature < nf; iFeature++){ int ext_idx = kIndex(iX, iY, iFeature, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf); //Feature gives an offset, since it spins slowest int activityVal = baseActivityVal + iFeature * nxGlobal * nyGlobal; A[ext_idx] = activityVal; } } } } ////Printing for double checking //printf("\nOutMat\n"); ////looping over ext //for(int iFeature = 0; iFeature < nf; iFeature++){ // for(int iY = 0; iY < ny+loc->halo.up+loc->halo.dn; iY++){ // for(int iX = 0; iX < nx+loc->halo.lt+loc->halo.rt; iX++){ // int ext_idx = kIndex(iX, iY, iFeature, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf); // printf("%03d ", (int)A[ext_idx]); // } // printf("\n"); // } // printf("\n\n"); //} return PV_SUCCESS; }
int GatePoolTestLayer::updateState(double timef, double dt) { //Do update state of ANN Layer first ANNLayer::updateState(timef, dt); //Grab layer size const PVLayerLoc* loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nxGlobal = loc->nxGlobal; int nyGlobal = loc->nyGlobal; int nf = loc->nf; int kx0 = loc->kx0; int ky0 = loc->ky0; bool isCorrect = true; //Grab the activity layer of current layer for(int b = 0; b < loc->nbatch; b++) { const pvdata_t * A = getActivity() + b * getNumExtended(); //We only care about restricted space, but iY and iX are extended for(int iY = loc->halo.up; iY < ny + loc->halo.up; iY++) { for(int iX = loc->halo.lt; iX < nx + loc->halo.lt; iX++) { for(int iFeature = 0; iFeature < nf; iFeature++) { int ext_idx = kIndex(iX, iY, iFeature, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf); float actualvalue = A[ext_idx]; int xval = (iX + kx0 - loc->halo.lt)/2; int yval = (iY + ky0 - loc->halo.up)/2; assert(xval >= 0 && xval < loc->nxGlobal); assert(yval >= 0 && yval < loc->nxGlobal); float expectedvalue; expectedvalue = iFeature * 64 + yval * 16 + xval * 2 + 4.5; expectedvalue*=4; if(fabs(actualvalue - expectedvalue) >= 1e-4) { pvErrorNoExit() << "Connection " << name << " Mismatch at (" << iX << "," << iY << ") : actual value: " << actualvalue << " Expected value: " << expectedvalue << ". Discrepancy is a whopping " << actualvalue - expectedvalue << "! Horrors!" << "\n"; isCorrect = false; } } } } } if(!isCorrect) { InterColComm * icComm = parent->icCommunicator(); MPI_Barrier(icComm->communicator()); // If there is an error, make sure that MPI doesn't kill the run before process 0 reports the error. exit(-1); } return PV_SUCCESS; }
int CPTestInputLayer::initializeV() { assert(parent->parameters()->value(name, "restart", 0.0f, false)==0.0f); // initializeV should only be called if restart is false const PVLayerLoc * loc = getLayerLoc(); for (int b = 0; b < parent->getNBatch(); b++){ pvdata_t * VBatch = getV() + b * getNumNeurons(); for (int k = 0; k < getNumNeurons(); k++){ int kx = kxPos(k,loc->nx,loc->nx,loc->nf); int ky = kyPos(k,loc->nx,loc->ny,loc->nf); int kf = featureIndex(k,loc->nx,loc->ny,loc->nf); int kGlobal = kIndex(loc->kx0+kx,loc->ky0+ky,kf,loc->nxGlobal,loc->nyGlobal,loc->nf); VBatch[k] = (pvdata_t) kGlobal; } } return PV_SUCCESS; }
int BIDSCloneLayer::mapCoords(){ //Copy restricted clone data to current clayer data for(int i = 0; i < numNodes; i++){ } const PVLayerLoc origLoc = originalLayer->getCLayer()->loc; for(int i = 0; i < numNodes; i++){ int index = kIndex(coords[i].xCoord, coords[i].yCoord, 0, clayer->loc.nx, clayer->loc.ny, clayer->loc.nf); int destIndexEx = kIndexExtended(index, clayer->loc.nx, clayer->loc.ny, clayer->loc.nf, clayer->loc.halo.lt, clayer->loc.halo.rt, clayer->loc.halo.dn, clayer->loc.halo.up); int srcIndexEx = kIndexExtended(index, origLoc.nx, origLoc.ny, origLoc.nf, origLoc.halo.lt, origLoc.halo.rt, origLoc.halo.dn, origLoc.halo.up); this->clayer->activity->data[destIndexEx] = originalLayer->getCLayer()->activity->data[srcIndexEx] == 0 ? 0:1; } return PV_SUCCESS; }
int InputLayer::updateState(double timef, double dt){ //Grab the activity layer of current layer pvdata_t * A = getActivity(); //Grab layer size const PVLayerLoc* loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; int kx0 = loc->kx0; int ky0 = loc->ky0; assert(nf == 4); assert(loc->nxGlobal == 2 && loc->nyGlobal == 2); //We only care about restricted space for(int iY = loc->halo.up; iY < ny + loc->halo.up; iY++){ for(int iX = loc->halo.lt; iX < nx + loc->halo.lt; iX++){ for(int iF = 0; iF < loc->nf; iF++){ int idx = kIndex(iX, iY, iF, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf); int xval = iX+kx0-loc->halo.lt; int yval = iY+ky0-loc->halo.up; if(timef == 10 && xval == 0 && yval == 0 && iF == 0){ A[idx] = 1; } else if(timef == 10 && xval == 1 && yval == 0 && iF == 1){ A[idx] = 1; } else if(timef == 10 && xval == 0 && yval == 1 && iF == 2){ A[idx] = 1; } else if(timef == 10 && xval == 1 && yval == 1 && iF == 3){ A[idx] = 1; } else{ A[idx] = 0; } } } } return PV_SUCCESS; }
/** * @time * @l * @k * @kex * NOTES: * - Only the activity buffer covers the extended frame - this is the frame that * includes boundaries. * - The other dynamic variables (G_E, G_I, V, Vth) cover the "real" or "restricted" * frame. */ int PointLIFProbe::writeState(double timed) { if (parent->columnId()==0 && timed >= writeTime) { pvAssert(outputStream); writeTime += writeStep; PVLayerLoc const * loc = getTargetLayer()->getLayerLoc(); const int k = kIndex(xLoc, yLoc, fLoc, loc->nxGlobal, loc->nyGlobal, loc->nf); double * valuesBuffer = getValuesBuffer(); outputStream->printf("%s t=%.1f %d" "G_E=" CONDUCTANCE_PRINT_FORMAT " G_I=" CONDUCTANCE_PRINT_FORMAT " G_IB=" CONDUCTANCE_PRINT_FORMAT " V=" CONDUCTANCE_PRINT_FORMAT " Vth=" CONDUCTANCE_PRINT_FORMAT " a=%.1f", getMessage(), timed, k, valuesBuffer[0], valuesBuffer[1], valuesBuffer[2], valuesBuffer[3], valuesBuffer[4], valuesBuffer[5]); output() << std::endl; } return PV_SUCCESS; }
int PursuitLayer::updateState(double time, double dt) { if (!updateReady) return PV_SUCCESS; int nx = getLayerLoc()->nx; int ny = getLayerLoc()->ny; int nf = getLayerLoc()->nf; PVHalo const * halo = &getLayerLoc()->halo; pvdata_t * activity = getActivity(); memset(activity, 0, getNumExtended()*sizeof(*activity)); int nxy = nx*ny; for (int kxy=0; kxy<nxy; kxy++) { int kf = foundFeatures[kxy]; if (kf>=0) { int kx = kxPos(kxy,nx,ny,1); int ky = kyPos(kxy,nx,ny,1); int kex = kIndex(kx+halo->lt, ky+halo->up, kf, nx+halo->lt+halo->rt, ny+halo->dn+halo->up, nf); /* Is this correct? Before splitting x- and y- margin widths, the ny argument was ny*nb, which seems weird. */ activity[kex] = gSynSparse[kxy]; } } //resetGSynBuffers_HyPerLayer(getNumNeurons(), getNumChannels(), GSyn[0]); updateReady = false; return PV_SUCCESS; }
int OjaKernelSpikeRateProbe::allocateDataStructures() { targetOjaKernelConn = dynamic_cast<OjaKernelConn *>(getTargetConn()); if (targetOjaKernelConn == NULL) { if (getParent()->columnId()==0) { fprintf(stderr, "LCATraceProbe error: connection \"%s\" must be an LCALIFLateralConn.\n", getTargetConn()->getName()); } abort(); } HyPerLayer * targetLayer = NULL; if (isInputRate) { targetLayer = targetOjaKernelConn->preSynapticLayer(); } else { targetLayer = targetOjaKernelConn->postSynapticLayer(); } const PVLayerLoc * loc = targetLayer->getLayerLoc(); int x_local = xg - loc->kx0; int y_local = yg - loc->ky0; bool inBounds = (x_local >= 0 && x_local < loc->nx && y_local >= 0 && y_local < loc->ny); if(inBounds ) { // if inBounds int krestricted = kIndex(x_local, y_local, feature, loc->nx, loc->ny, loc->nf); if (isInputRate) { int kextended = kIndexExtended(krestricted, loc->nx, loc->ny, loc->nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); spikeRate = &targetOjaKernelConn->getInputFiringRate(arbor)[kextended]; } else { spikeRate = &targetOjaKernelConn->getOutputFiringRate()[krestricted]; } } else { outputstream = NULL; } //This is now being done in BaseConnectionProbe //getTargetConn()->insertProbe(this); return PV_SUCCESS; }
int MoviePvpTestLayer::updateStateWrapper(double time, double dt) { MoviePvp::updateStateWrapper(time, dt); const PVLayerLoc * loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; int nbatch = loc->nbatch; for(int b = 0; b < nbatch; b++){ pvdata_t * dataBatch = data + b * getNumExtended(); int frameIdx; if(strcmp(getBatchMethod(), "byImage") == 0){ frameIdx = (time-1) * nbatch + b; } else if(strcmp(getBatchMethod(), "byMovie") == 0){ frameIdx = b * 2 + (time-1); } for(int nkRes = 0; nkRes < getNumNeurons(); nkRes++){ //Calculate extended index int nkExt = kIndexExtended(nkRes, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); //checkVal is the value from batch index 0 pvdata_t checkVal = dataBatch[nkExt]; int kxGlobal = kxPos(nkRes, nx, ny, nf) + loc->kx0; int kyGlobal = kyPos(nkRes, nx, ny, nf) + loc->ky0; int kf = featureIndex(nkRes, nx, ny, nf); pvdata_t expectedVal = kIndex(kxGlobal, kyGlobal, kf, loc->nxGlobal, loc->nyGlobal, nf) + frameIdx*192; if(fabs(checkVal - expectedVal) >= 1e-5){ std::cout << "ImageFileIO " << name << " test Expected: " << expectedVal << " Actual: " << checkVal << "\n"; //exit(-1); } } } return PV_SUCCESS; }
/** * @timef * NOTES: * - kPost, kxPost, kyPost are indices in the restricted post-synaptic layer. * */ int PostConnProbe::outputState(double timef) { int k, kxPre, kyPre; HyPerConn * c = getTargetHyPerConn(); PVPatch * w; PVPatch *** wPost = c->convertPreSynapticWeights(timef); // TODO - WARNING: currently only works if nfPre==0 const PVLayer * lPre = c->preSynapticLayer()->clayer; const PVLayer * lPost = c->postSynapticLayer()->clayer; const int nxPre = lPre->loc.nx; const int nyPre = lPre->loc.ny; const int nfPre = lPre->loc.nf; const PVHalo * haloPre = &lPre->loc.halo; const int nxPost = lPost->loc.nx; const int nyPost = lPost->loc.ny; const int nfPost = lPost->loc.nf; const PVHalo * haloPost = &lPost->loc.halo; // calc kPost if needed if (kPost < 0) { kPost = kIndex(kxPost, kyPost, kfPost, nxPost, nyPost, nfPost); } else { kxPost = kxPos(kPost, nxPost, nyPost, nfPost); kyPost = kyPos(kPost, nxPost, nyPost, nfPost); kfPost = featureIndex(kPost, nxPost, nyPost, nfPost); } c->preSynapticPatchHead(kxPost, kyPost, kfPost, &kxPre, &kyPre); const int kxPreEx = kxPre + haloPre->lt; const int kyPreEx = kyPre + haloPre->up; const int kxPostEx = kxPost + haloPost->lt; const int kyPostEx = kyPost + haloPost->up; const int kPostEx = kIndex(kxPostEx, kyPostEx, kfPost, nxPost+haloPost->lt+haloPost->rt, nyPost+haloPost->dn+haloPost->up, nfPost); const bool postFired = lPost->activity->data[kPostEx] > 0.0; w = wPost[getArborID()][kPost]; pvwdata_t * wPostData = c->getWPostData(getArborID(),kPost); const int nw = w->nx * w->ny * nfPost; //w->nf; if (wPrev == NULL) { wPrev = (pvwdata_t *) calloc(nw, sizeof(pvwdata_t)); for (k = 0; k < nw; k++) { wPrev[k] = wPostData[k]; // This is broken if the patch is shrunken } } if (wActiv == NULL) { wActiv = (pvwdata_t *) calloc(nw, sizeof(pvwdata_t)); } k = 0; for (int ky = 0; ky < w->ny; ky++) { for (int kx = 0; kx < w->nx; kx++) { int kPre = kIndex(kx+kxPreEx, ky+kyPreEx, 0, nxPre+haloPre->lt+haloPre->rt, nyPre+haloPre->dn+haloPre->up, nfPre); wActiv[k++] = lPre->activity->data[kPre]; } } bool changed = false; for (k = 0; k < nw; k++) { if (wPrev[k] != wPostData[k] || wActiv[k] != 0.0) { changed = true; break; } } FILE * fp = getStream()->fp; if (stdpVars && (postFired || changed)) { if (postFired) fprintf(fp, "*"); else fprintf(fp, " "); fprintf(fp, "t=%.1f w%d(%d,%d,%d) prePatchHead(%d,%d): ", timef, kPost, kxPost, kyPost, kfPost, kxPre, kyPre); if (image) fprintf(fp, "tag==%d ", image->tag()); fprintf(fp, "\n"); } if (stdpVars && changed) { text_write_patch_extra(fp, w, wPostData, wPrev, wActiv, getTargetHyPerConn()); fflush(fp); } for (k = 0; k < nw; k++) { wPrev[k] = wPostData[k]; } if (outputIndices) { fprintf(fp, "w%d(%d,%d,%d) prePatchHead(%d,%d): ", kPost, kxPost, kyPost, kfPost, kxPre, kyPre); if(!stdpVars){ fprintf(fp,"\n"); } const PVLayer * lPre = c->preSynapticLayer()->clayer; write_patch_indices(fp, w, &lPre->loc, kxPre, kyPre, 0); fflush(fp); } return 0; }
int BinningLayer::doUpdateState(double timed, double dt, const PVLayerLoc * origLoc, const PVLayerLoc * currLoc, const pvdata_t * origData, pvdata_t * currA, float binMax, float binMin) { int status = PV_SUCCESS; //update_timer->start(); int numBins = currLoc->nf; int nx = currLoc->nx; int ny = currLoc->ny; //Check that both nb are the same assert(origLoc->halo.lt == currLoc->halo.lt && origLoc->halo.rt == currLoc->halo.rt && origLoc->halo.dn == currLoc->halo.dn && origLoc->halo.up == currLoc->halo.up); assert(origLoc->nf == 1); PVHalo const * halo = &origLoc->halo; float binRange = binMax - binMin; float stepSize = float(binRange)/numBins; int nbatch = currLoc->nbatch; for(int b = 0; b < nbatch; b++){ const pvdata_t * origDataBatch = origData + b * (origLoc->nx + origLoc->halo.lt + origLoc->halo.rt) * (origLoc->ny + origLoc->halo.up + origLoc->halo.dn) * origLoc->nf; pvdata_t * currABatch = currA + b * (currLoc->nx + currLoc->halo.lt + currLoc->halo.rt) * (currLoc->ny + currLoc->halo.up + currLoc->halo.dn) * currLoc->nf; // each y value specifies a different target so ok to thread here (sum, sumsq are defined inside loop) #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for (int iY = 0; iY < (ny+halo->dn+halo->up); iY++){ for (int iX = 0; iX < (nx+halo->lt+halo->rt); iX++){ int origIdx = kIndex(iX, iY, 0, nx+halo->lt+halo->rt, ny+halo->dn+halo->up, origLoc->nf); float inVal = origDataBatch[origIdx]; //If inVal is out of bounds in either binMax or binMin, set the value to be the maximum or minimum val if(inVal < binMin){ inVal = binMin; } if(inVal > binMax){ inVal = binMax; } if(zeroDCR && inVal == 0){ for(int iF = 0; iF < numBins; iF++){ int currIdx = kIndex(iX, iY, iF, nx+halo->lt+halo->rt, ny+halo->dn+halo->up, numBins); currABatch[currIdx] = 0; } } else{ //A sigma of zero means only the centered bin value should get input int featureIdx = round((inVal-binMin)/stepSize); for(int iF = 0; iF < numBins; iF++){ if(binSigma == 0){ int currIdx = kIndex(iX, iY, iF, nx+halo->lt+halo->rt, ny+halo->dn+halo->up, numBins); if(iF == featureIdx){ currABatch[currIdx] = 1; } //Resetting value else{ if(zeroNeg){ currABatch[currIdx] = 0; } else{ currABatch[currIdx] = -1; } } } else{ //Calculate center value for featureIdx (the bin that the value belongs to without a sigma) is binning float mean; if(normalDist){ mean = featureIdx * stepSize + (stepSize/2); } else{ mean = featureIdx; } //Possible bins int intSigma = ceil(binSigma); int currIdx = kIndex(iX, iY, iF, nx+halo->lt+halo->rt, ny+halo->dn+halo->up, numBins); if(iF >= featureIdx-intSigma && iF <= featureIdx+intSigma){ //Get center of that aBin for the x pos of the normal dist float xVal; if(normalDist){ xVal = iF * stepSize + (stepSize/2); } else{ xVal = iF; } //Calculate normal dist float outVal = calcNormDist(xVal, mean, binSigma); //Put into activity buffer currABatch[currIdx] = outVal; } //Resetting value else{ if(zeroNeg){ currABatch[currIdx] = 0; } else{ currABatch[currIdx] = -1; } } } } } } } } //update_timer->stop(); return status; }
int PursuitLayer::recvSynapticInput(HyPerConn * conn, const PVLayerCube * activity, int arborID) { if (parent->simulationTime()<nextUpdate) return PV_SUCCESS; nextUpdate += updatePeriod; recvsyn_timer->start(); assert(arborID >= 0); if (conn->usingSharedWeights() == false) { fprintf(stderr, "Error: PursuitLayer can only be the postsynaptic layer of a connection using shared weights (this condition should be removed eventually).\n"); abort(); } HyPerLayer * pre = conn->preSynapticLayer(); const PVLayerLoc * pre_loc = pre->getLayerLoc(); if (pre_loc->nx != getLayerLoc()->nx || pre_loc->ny != getLayerLoc()->ny) { fprintf(stderr, "Error: PursuitLayer requires incoming connections to be one-to-one.\n"); abort(); } #ifdef DEBUG_OUTPUT int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); //printf("[%d]: HyPerLayr::recvSyn: neighbor=%d num=%d actv=%p this=%p conn=%p\n", rank, neighbor, numExtended, activity, this, conn); printf("[%d]: HyPerLayr::recvSyn: neighbor=%d num=%d actv=%p this=%p conn=%p\n", rank, 0, numExtended, activity, this, conn); fflush(stdout); #endif // DEBUG_OUTPUT const int numExtended = activity->numItems; for (int kPre = 0; kPre < numExtended; kPre++) { float a = activity->data[kPre]; if (a == 0.0f) continue; PVPatch * weights = conn->getWeights(kPre, arborID); // WARNING - assumes every value in weights maps into a valid value in GSyn // - assumes patch stride sf is 1 int nk = conn->fPatchSize() * weights->nx; int ny = weights->ny; int sy = conn->getPostNonextStrides()->sy; // stride in layer int syw = conn->yPatchStride(); // stride in patch pvdata_t * gSynPatchHead = this->getChannel(conn->getChannel()); pvdata_t * gSynPatchStart = gSynPatchHead + conn->getGSynPatchStart(kPre, arborID); pvwdata_t * data = conn->get_wData(arborID,kPre); for (int y = 0; y < ny; y++) { (conn->accumulateFunctionPointer)(nk, gSynPatchStart + y*sy, a, data + y*syw, NULL); } } // Set |w(:,:,f)|^2. Since this is a one-to-one connection with one presynaptic feature, // only have to do once for each feature of a single (x,y) site and then copy. int nxp = conn->xPatchSize(); int nyp = conn->yPatchSize(); int nfp = conn->fPatchSize(); int num_weights = nxp*nyp*nfp; assert(zUnitCellSize(pre->getXScale(), getXScale())==1); assert(zUnitCellSize(pre->getYScale(), getYScale())==1); assert(conn->getNumDataPatches()==1); for (int kf=0; kf<nfp; kf++) { pvwdata_t * weight = conn->get_wDataHead(arborID, 0); pvdata_t sum = 0.0; for (int k=0; k<num_weights; k+=nfp) { pvwdata_t w = weight[k + kf]; // Assumes stride in features is 1. //TODO-CER-2014.4.4 - convert weights sum += w*w; } wnormsq[kf] = sum; } pvdata_t * gSynStart = GSyn[conn->getChannel()]; int nx = getLayerLoc()->nx; int ny = getLayerLoc()->ny; int nxy = nx*ny; // TODO: Can I compute energyDropsBestFeature and minLocationsBestFeature without storing all the energyDrops and minimumLocations? for (int kxy=0; kxy<nxy; kxy++) { for (int kf=0; kf<nfp; kf++) { int k=kxy*nfp+kf; // Assumes stride in features is 1. minimumLocations[k] = gSynStart[k]/wnormsq[kf]; energyDrops[k] = -gSynStart[k]*minimumLocations[k]/2; } } for (int kxy=0; kxy<nxy; kxy++) { minFeatures[kxy] = -1; energyDropsBestFeature[kxy] = FLT_MAX; int index0 = kxy*nfp; // assumes stride in features is 1. if (foundFeatures[kxy]>=0) { energyDropsBestFeature[kxy] = energyDrops[kxy*nfp+foundFeatures[kxy]]; minFeatures[kxy] = foundFeatures[kxy]; } else { for (int kf=0; kf<nfp; kf++) { if (energyDrops[index0+kf] < energyDropsBestFeature[kxy]) { minFeatures[kxy] = kf; energyDropsBestFeature[kxy] = energyDrops[index0+kf]; } } } } for (int kxy=0; kxy<nxy; kxy++) { assert(minFeatures[kxy]>=0 && minFeatures[kxy]<nfp); int baseindex = kxy*nfp; minLocationsBestFeature[kxy] = minimumLocations[baseindex+minFeatures[kxy]]; } bool mask[nxy]; memset(mask, false, nxy*sizeof(*mask)); pvdata_t smallestEnergyDrop; int minloc; while (constrainMinima(), minloc = filterMinEnergies(mask, &smallestEnergyDrop), smallestEnergyDrop<FLT_MAX) { assert(foundFeatures[minloc]<0 || foundFeatures[minloc]==minFeatures[minloc]); foundFeatures[minloc] = minFeatures[minloc]; gSynSparse[minloc] += minLocationsBestFeature[minloc]; if (gSynSparse[minloc] < 1e-4) { gSynSparse[minloc]=0; foundFeatures[minloc] = -1; } int minlocx = kxPos(minloc,nx,ny,1); int maskstartx = minlocx-(nxp-1); if (maskstartx<0) maskstartx=0; int maskstopx = minlocx+nxp; if (maskstopx>nx) maskstopx=nx; int minlocy = kyPos(minloc,nx,ny,1); int maskstarty = minlocy-(nyp-1); if (maskstarty<0) maskstarty=0; int maskstopy = minlocy+nyp; if (maskstopy>ny) maskstopy=ny; for (int ky=maskstarty; ky<maskstopy; ky++) { for (int kx=maskstartx; kx<maskstopx; kx++) { mask[kIndex(kx,ky,0,nx,ny,1)]=true; } } } recvsyn_timer->stop(); updateReady = true; return 0; }
int PoolingConn::deliverPostsynapticPerspective(PVLayerCube const * activity, int arborID) { //Check channel number for noupdate if(getChannel() == CHANNEL_NOUPDATE) { return PV_SUCCESS; } assert(post->getChannel(getChannel())); assert(arborID >= 0); //Get number of neurons restricted target const int numPostRestricted = post->getNumNeurons(); float dt_factor = getConvertToRateDeltaTimeFactor(); const PVLayerLoc * sourceLoc = preSynapticLayer()->getLayerLoc(); const PVLayerLoc * targetLoc = post->getLayerLoc(); const int sourceNx = sourceLoc->nx; const int sourceNy = sourceLoc->ny; const int sourceNf = sourceLoc->nf; const int targetNx = targetLoc->nx; const int targetNy = targetLoc->ny; const int targetNf = targetLoc->nf; const PVHalo * sourceHalo = &sourceLoc->halo; const PVHalo * targetHalo = &targetLoc->halo; //get source layer's extended y stride int sy = (sourceNx+sourceHalo->lt+sourceHalo->rt)*sourceNf; //The start of the gsyn buffer pvdata_t * gSynPatchHead = post->getChannel(this->getChannel()); clearGateIdxBuffer(); int* gatePatchHead = NULL; if(needPostIndexLayer) { gatePatchHead = postIndexLayer->getChannel(CHANNEL_EXC); } long * startSourceExtBuf = getPostToPreActivity(); if(!startSourceExtBuf) { std::cout << "HyPerLayer::recvFromPost error getting preToPostActivity from connection. Is shrink_patches on?\n"; exit(EXIT_FAILURE); } float resetVal = 0; if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) { resetVal = -INFINITY; } for(int b = 0; b < parent->getNBatch(); b++) { #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for (int kTargetRes = 0; kTargetRes < numPostRestricted; kTargetRes++) { pvdata_t * activityBatch = activity->data + b * (sourceNx + sourceHalo->rt + sourceHalo->lt) * (sourceNy + sourceHalo->up + sourceHalo->dn) * sourceNf; pvdata_t * gSynPatchHeadBatch = gSynPatchHead + b * targetNx * targetNy * targetNf; //Change restricted to extended post neuron int kTargetExt = kIndexExtended(kTargetRes, targetNx, targetNy, targetNf, targetHalo->lt, targetHalo->rt, targetHalo->dn, targetHalo->up); //Read from buffer long startSourceExt = startSourceExtBuf[kTargetRes]; //Calculate target's start of gsyn pvdata_t * gSynPatchPos = gSynPatchHeadBatch + kTargetRes; //Initialize patch as a huge negative number *gSynPatchPos = resetVal; int* gatePatchPos = NULL; if(needPostIndexLayer) { gatePatchPos = gatePatchHead + b * postIndexLayer->getNumNeurons() + kTargetRes; //Initialize gatePatchPos as a negative number *gatePatchPos = -1; } float* activityStartBuf = &(activityBatch[startSourceExt]); pvwdata_t * weightY = NULL; //No weights in pooling int sf = postConn->fPatchSize(); int yPatchSize = postConn->yPatchSize(); int numPerStride = postConn->xPatchSize() * postConn->fPatchSize(); const PVLayerLoc * postLoc = post->getLayerLoc(); const int kfPost = featureIndex(kTargetExt, postLoc->nx + postLoc->halo.lt + postLoc->halo.rt, postLoc->ny + postLoc->halo.dn + postLoc->halo.up, postLoc->nf); int offset = kfPost; pvwdata_t w = 1.0; if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING) { float relative_XScale = pow(2, (post->getXScale() - pre->getXScale())); float relative_YScale = pow(2, (post->getYScale() - pre->getYScale())); w = 1.0/(nxp*nyp*relative_XScale*relative_YScale); } for (int ky = 0; ky < yPatchSize; ky++) { int kPreExt = startSourceExt + ky*sy+offset; const int kxPreExt = kxPos(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf); const int kyPreExt = kyPos(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf); const int kfPre = featureIndex(kPreExt, sourceLoc->nx + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->ny + sourceLoc->halo.dn + sourceLoc->halo.up, sourceLoc->nf); const int kxPreGlobalExt = kxPreExt + sourceLoc->kx0; const int kyPreGlobalExt = kyPreExt + sourceLoc->ky0; const int kPreGlobalExt = kIndex(kxPreGlobalExt, kyPreGlobalExt, kfPre, sourceLoc->nxGlobal + sourceLoc->halo.lt + sourceLoc->halo.rt, sourceLoc->nyGlobal + sourceLoc->halo.up + sourceLoc->halo.dn, sourceLoc->nf); float * activityY = &(activityStartBuf[ky*sy+offset]); (accumulateFunctionFromPostPointer)(kPreGlobalExt, numPerStride, gSynPatchPos, activityY, &w, dt_factor, gatePatchPos, sf); } } } return PV_SUCCESS; }
int PointLIFProbe::calcValues(double timevalue) { // TODO: Reduce duplicated code between PointProbe::calcValues and PointLIFProbe::calcValues. assert(this->getNumValues()==NUMBER_OF_VALUES); LIF * LIF_layer = dynamic_cast<LIF *>(getTargetLayer()); assert(LIF_layer != NULL); pvconductance_t const * G_E = LIF_layer->getConductance(CHANNEL_EXC) + batchLoc * LIF_layer->getNumNeurons(); pvconductance_t const * G_I = LIF_layer->getConductance(CHANNEL_INH) + batchLoc * LIF_layer->getNumNeurons(); pvconductance_t const * G_IB = LIF_layer->getConductance(CHANNEL_INHB) + batchLoc * LIF_layer->getNumNeurons(); pvdata_t const * V = getTargetLayer()->getV(); pvdata_t const * Vth = LIF_layer->getVth(); pvdata_t const * activity = getTargetLayer()->getLayerData(); assert(V && activity && G_E && G_I && G_IB && Vth); double * valuesBuffer = this->getValuesBuffer(); //We need to calculate which mpi process contains the target point, and send that info to the root process //Each process calculates local index const PVLayerLoc * loc = getTargetLayer()->getLayerLoc(); //Calculate local cords from global const int kx0 = loc->kx0; const int ky0 = loc->ky0; const int kb0 = loc->kb0; const int nx = loc->nx; const int ny = loc->ny; const int nf = loc->nf; const int nbatch = loc->nbatch; const int xLocLocal = xLoc - kx0; const int yLocLocal = yLoc - ky0; const int nbatchLocal = batchLoc - kb0; //if in bounds if( xLocLocal >= 0 && xLocLocal < nx && yLocLocal >= 0 && yLocLocal < ny && nbatchLocal >= 0 && nbatchLocal < nbatch){ const pvdata_t * V = getTargetLayer()->getV(); const pvdata_t * activity = getTargetLayer()->getLayerData(); //Send V and A to root const int k = kIndex(xLocLocal, yLocLocal, fLoc, nx, ny, nf); const int kbatch = k + nbatchLocal*getTargetLayer()->getNumNeurons(); valuesBuffer[0] = G_E[kbatch]; valuesBuffer[1] = G_I[kbatch]; valuesBuffer[2] = G_IB[kbatch]; valuesBuffer[3] = V[kbatch]; valuesBuffer[4] = Vth[kbatch]; const int kex = kIndexExtended(k, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); valuesBuffer[5] = activity[kex + nbatchLocal * getTargetLayer()->getNumExtended()]; //If not in root process, send to root process if(parent->columnId()!=0){ MPI_Send(valuesBuffer, NUMBER_OF_VALUES, MPI_DOUBLE, 0, 0, parent->icCommunicator()->communicator()); } } //Root process if(parent->columnId()==0){ //Calculate which rank target neuron is //TODO we need to calculate rank from batch as well int xRank = xLoc/nx; int yRank = yLoc/ny; int srcRank = rankFromRowAndColumn(yRank, xRank, parent->icCommunicator()->numCommRows(), parent->icCommunicator()->numCommColumns()); //If srcRank is not root process, MPI_Recv from that rank if(srcRank != 0){ MPI_Recv(valuesBuffer, NUMBER_OF_VALUES, MPI_DOUBLE, srcRank, 0, parent->icCommunicator()->communicator(), MPI_STATUS_IGNORE); } } return PV_SUCCESS; }
int BackwardsBatchNorm::updateState(double timef, double dt) { int status = PV_SUCCESS; //We are filling this activity buffer pvdata_t * thisA = clayer->activity->data; assert(thisA); //We need the normalized input vals, orig input vals, and the input gradients const pvdata_t * inputGradA = originalLayer->getCLayer()->activity->data; const pvdata_t * forwardA = forwardLayer->getCLayer()->activity->data; const pvdata_t * origInputA = forwardLayer->getOriginalLayer()->getCLayer()->activity->data; assert(inputGradA && forwardA && origInputA); //Get locs for all buffers const PVLayerLoc * thisLoc = getLayerLoc(); const PVLayerLoc * inputGradLoc = originalLayer->getLayerLoc(); const PVLayerLoc * forwardLoc = forwardLayer->getLayerLoc(); const PVLayerLoc * origInputLoc = forwardLayer->getOriginalLayer()->getLayerLoc(); int nbatch = thisLoc->nbatch; //All nx, ny, and nf should be the same int nx = thisLoc->nx; int ny = thisLoc->ny; int nf = thisLoc->nf; //Get buffer margins here int xThisMargin = thisLoc->halo.lt + thisLoc->halo.rt; int yThisMargin = thisLoc->halo.up + thisLoc->halo.dn; int xInputGradMargin = inputGradLoc->halo.lt + inputGradLoc->halo.rt; int yInputGradMargin = inputGradLoc->halo.up + inputGradLoc->halo.dn; int xForwardMargin = forwardLoc->halo.lt + forwardLoc->halo.rt; int yForwardMargin = forwardLoc->halo.up + forwardLoc->halo.dn; int xOrigInputMargin = origInputLoc->halo.lt + origInputLoc->halo.rt; int yOrigInputMargin = origInputLoc->halo.up + origInputLoc->halo.dn; //We also need various mean and var buffers from the forward layer const float* batchMean = forwardLayer->getBatchMean(); const float* batchVar = forwardLayer->getBatchVar(); float* batchMeanShift = forwardLayer->getBatchMeanShift(); float* batchVarShift = forwardLayer->getBatchVarShift(); float epsilon = forwardLayer->getEpsilon(); //Total number of neurons to divide by for each feature float normVal = parent->getNBatchGlobal() * thisLoc->nyGlobal * thisLoc->nxGlobal; //We're accumulating into delta buffers, so clear clearDelta(); //Ioffe et. al. Batch Normalization //Calculate deltaVar //TODO parallize over threads for(int iF = 0; iF < nf; iF++) { float secondTerm = -.5*(powf(batchVar[iF] + epsilon, -1.5)); for(int b = 0; b < nbatch; b++) { const pvdata_t* batchOrigInputA = origInputA + b * forwardLayer->getOriginalLayer()->getNumExtended(); const pvdata_t* batchInputGradA = inputGradA + b * originalLayer->getNumExtended(); for(int iY = 0; iY < ny; iY++) { for(int iX = 0; iX < nx; iX++) { int kExtOrigInput = kIndex(iX, iY, iF, nx+xOrigInputMargin, ny+yOrigInputMargin, nf); int kExtInputGrad = kIndex(iX, iY, iF, nx+xInputGradMargin, ny+yInputGradMargin, nf); float deltaNorm = batchInputGradA[kExtInputGrad] * batchVarShift[iF]; deltaVar[iF] += deltaNorm * (batchOrigInputA[kExtOrigInput] - batchMean[iF]); } } } //Multiply deltaVar by secondTerm deltaVar[iF] = deltaVar[iF] * secondTerm; } //Reduce deltaVar #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, deltaVar, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator()); #endif // PV_USE_MPI //Calculate deltaMean //Calculate first term first //TODO parallize over threads for(int iF = 0; iF < nf; iF++) { float multiplier = -1.0/(sqrtf(batchVar[iF]+epsilon)); for(int b = 0; b < nbatch; b++) { const pvdata_t* batchInputGradA = inputGradA + b * originalLayer->getNumExtended(); for(int iY = 0; iY < ny; iY++) { for(int iX = 0; iX < nx; iX++) { int kExtInputGrad = kIndex(iX, iY, iF, nx+xInputGradMargin, ny+yInputGradMargin, nf); float deltaNorm = batchInputGradA[kExtInputGrad] * batchVarShift[iF]; deltaMean[iF] += deltaNorm * multiplier; } } } } //Reduce deltaMean across mpi #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, deltaMean, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator()); #endif // PV_USE_MPI //Calculate second term //TODO parallize over threads for(int iF = 0; iF < nf; iF++) { float tmpMean = 0; for(int b = 0; b < nbatch; b++) { const pvdata_t* batchOrigInputA = origInputA + b * forwardLayer->getOriginalLayer()->getNumExtended(); for(int iY = 0; iY < ny; iY++) { for(int iX = 0; iX < nx; iX++) { int kExtOrigInput = kIndex(iX, iY, iF, nx+xOrigInputMargin, ny+yOrigInputMargin, nf); tmpMean += -2 * (batchOrigInputA[kExtOrigInput] - batchMean[iF]); } } } //Reduce tmpMean #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, &tmpMean, 1, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator()); #endif // PV_USE_MPI tmpMean = tmpMean / normVal; //Add second term to first term deltaMean[iF] += deltaVar[iF] * tmpMean; } //No more sums, go with efficient loop //TODO Is the efficient loop better for optimization or do we put //features on the outer most loop for precalculation of constants over features? for(int b = 0; b < nbatch; b++) { const pvdata_t* batchOrigInputA = origInputA + b * forwardLayer->getOriginalLayer()->getNumExtended(); const pvdata_t* batchInputGradA = inputGradA + b * originalLayer->getNumExtended(); pvdata_t* batchThisA = thisA + b * getNumExtended(); #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for collapse(3) #endif for(int iY = 0; iY < ny; iY++) { for(int iX = 0; iX < nx; iX++) { for(int iF = 0; iF < nf; iF++) { int kExtOrigInput = kIndex(iX, iY, iF, nx+xOrigInputMargin, ny+yOrigInputMargin, nf); int kExtInputGrad = kIndex(iX, iY, iF, nx+xInputGradMargin, ny+yInputGradMargin, nf); int kExtThis = kIndex(iX, iY, iF, nx+xThisMargin, ny+yThisMargin, nf); float deltaNorm = batchInputGradA[kExtInputGrad] * batchVarShift[iF]; float firstTerm = deltaNorm/sqrtf(batchVar[iF] + epsilon); float secondTerm = deltaVar[iF] * (2*(batchOrigInputA[kExtOrigInput] - batchMean[iF])/normVal); float thirdTerm = deltaMean[iF]/normVal; batchThisA[kExtThis] = firstTerm + secondTerm + thirdTerm; } } } } //We calculate delta varShift and deltaMeanShift here //TODO parallize over threads //Since we're summing into delta*shift buffers, we have to sequentialize over features for(int iF = 0; iF < nf; iF++) { for(int b = 0; b < nbatch; b++) { const pvdata_t* batchForwardA = forwardA + b * forwardLayer->getNumExtended(); const pvdata_t* batchInputGradA = inputGradA + b * originalLayer->getNumExtended(); for(int iY = 0; iY < ny; iY++) { for(int iX = 0; iX < nx; iX++) { int kExtInputGrad = kIndex(iX, iY, iF, nx+xInputGradMargin, ny+yInputGradMargin, nf); int kExtForwardA = kIndex(iX, iY, iF, nx+xForwardMargin, ny + yForwardMargin, nf); deltaVarShift[iF] += batchInputGradA[kExtInputGrad] * batchForwardA[kExtForwardA]; deltaMeanShift[iF] += batchInputGradA[kExtInputGrad]; } } } } //Reduce delta*Shift across all mpi #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, deltaVarShift, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator()); MPI_Allreduce(MPI_IN_PLACE, deltaMeanShift, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator()); #endif // PV_USE_MPI //TODO implement learning rule for meanShift and varShift return status; }
void MLPOutputLayer::multiclassNonlocalStats(){ const PVLayerLoc * loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; int numNeurons = getNumNeurons(); pvdata_t * A = getCLayer()->activity->data; pvdata_t * gtA = gtLayer->getCLayer()->activity->data; float sumsq = 0; //Winner take all in the output layer int currNumRight = 0; int currNumWrong = 0; assert(classBuffer); //Clear classBuffer for(int i = 0; i < nf; i++){ classBuffer[i] = 0; } //Only go through restricted //Calculate the sum squared error for(int ni = 0; ni < numNeurons; ni++){ int nExt = kIndexExtended(ni, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); int fi = featureIndex(nExt, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf); //Sum over x and y direction classBuffer[fi] += A[nExt]; sumsq += pow(A[nExt] - gtA[nExt], 2); } //Normalize classBuffer to find mean for(int i = 0; i < nf; i++){ classBuffer[i] /= nx*ny; } //Reduce all classBuffers through a mean #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, &sumsq, 1, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->communicator()); MPI_Allreduce(MPI_IN_PLACE, classBuffer, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->communicator()); //Normalize classBuffer across processors for(int i = 0; i < nf; i++){ classBuffer[i] /= parent->icCommunicator()->commSize(); } #endif // PV_USE_MPI //Find max float estMaxF = -1000; int estMaxFi = -1; float actualMaxF = -1000; int actualMaxFi = -1; for(int i = 0; i < nf; i++){ if(classBuffer[i] >= estMaxF){ estMaxF = classBuffer[i]; estMaxFi = i; } int nExt = kIndex(loc->halo.lt, loc->halo.up, i, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf); if(gtA[nExt] >= actualMaxF){ actualMaxF = gtA[nExt]; actualMaxFi = i; } } //Calculate stats //Found winning feature, compare to ground truth if(estMaxFi == actualMaxFi){ currNumRight++; } else{ currNumWrong++; } #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, &currNumRight, 1, MPI_INT, MPI_SUM, parent->icCommunicator()->communicator()); MPI_Allreduce(MPI_IN_PLACE, &currNumWrong, 1, MPI_INT, MPI_SUM, parent->icCommunicator()->communicator()); #endif // PV_USE_MPI numRight += currNumRight; numWrong += currNumWrong; progressNumRight += currNumRight; progressNumWrong += currNumWrong; //Print if need float timef = parent->simulationTime(); if(timef >= nextStatProgress){ //Update nextStatProgress nextStatProgress += statProgressPeriod; if (parent->columnId()==0) { float totalScore = 100*float(numRight)/float(numRight+numWrong); float progressScore = 100*float(progressNumRight)/float(progressNumRight+progressNumWrong); fprintf(stdout, "time:%f layer:\"%s\" total:%f%% progressStep:%f%% energy:%f\n", timef, name, totalScore, progressScore, sumsq/2); } //Reset progressStats progressNumRight = 0; progressNumWrong = 0; } }
int TransposePoolingConn::deliverPresynapticPerspective(PVLayerCube const * activity, int arborID) { //Check if we need to update based on connection's channel if(getChannel() == CHANNEL_NOUPDATE){ return PV_SUCCESS; } assert(post->getChannel(getChannel())); const PVLayerLoc * preLoc = preSynapticLayer()->getLayerLoc(); const PVLayerLoc * postLoc = postSynapticLayer()->getLayerLoc(); assert(arborID >= 0); const int numExtended = activity->numItems; //Grab postIdxLayer's data int* postIdxData = NULL; if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){ PoolingIndexLayer* postIndexLayer = originalConn->getPostIndexLayer(); assert(postIndexLayer); //Make sure this layer is an integer layer assert(postIndexLayer->getDataType() == PV_INT); DataStore * store = parent->icCommunicator()->publisherStore(postIndexLayer->getLayerId()); int delay = getDelay(arborID); //TODO this is currently a hack, need to properly implement data types. postIdxData = (int*) store->buffer(LOCAL, delay); } for(int b = 0; b < parent->getNBatch(); b++){ pvdata_t * activityBatch = activity->data + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf; pvdata_t * gSynPatchHeadBatch = post->getChannel(getChannel()) + b * postLoc->nx * postLoc->ny * postLoc->nf; int * postIdxDataBatch = NULL; if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){ postIdxDataBatch = postIdxData + b * originalConn->getPostIndexLayer()->getNumExtended(); } unsigned int * activeIndicesBatch = NULL; if(activity->isSparse){ activeIndicesBatch = activity->activeIndices + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf; } int numLoop; if(activity->isSparse){ numLoop = activity->numActive[b]; } else{ numLoop = numExtended; } #ifdef PV_USE_OPENMP_THREADS //Clear all thread gsyn buffer if(thread_gSyn){ int numNeurons = post->getNumNeurons(); #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int i = 0; i < parent->getNumThreads() * numNeurons; i++){ int ti = i/numNeurons; int ni = i % numNeurons; thread_gSyn[ti][ni] = 0; } } #endif // PV_USE_OPENMP_THREADS #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for schedule(static) #endif for (int loopIndex = 0; loopIndex < numLoop; loopIndex++) { int kPreExt; if(activity->isSparse){ kPreExt = activeIndicesBatch[loopIndex]; } else{ kPreExt = loopIndex; } float a = activityBatch[kPreExt]; if (a == 0.0f) continue; //If we're using thread_gSyn, set this here pvdata_t * gSynPatchHead; #ifdef PV_USE_OPENMP_THREADS if(thread_gSyn){ int ti = omp_get_thread_num(); gSynPatchHead = thread_gSyn[ti]; } else{ gSynPatchHead = gSynPatchHeadBatch; } #else // PV_USE_OPENMP_THREADS gSynPatchHead = gSynPatchHeadBatch; #endif // PV_USE_OPENMP_THREADS const int kxPreExt = kxPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kyPreExt = kyPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kfPre = featureIndex(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){ const int kxPreGlobalExt = kxPreExt + preLoc->kx0; const int kyPreGlobalExt = kyPreExt + preLoc->ky0; if(kxPreGlobalExt < preLoc->halo.lt || kxPreGlobalExt >= preLoc->nxGlobal + preLoc->halo.lt || kyPreGlobalExt < preLoc->halo.up || kyPreGlobalExt >= preLoc->nyGlobal + preLoc->halo.up){ continue; } //Convert stored global extended index into local extended index int postGlobalExtIdx = postIdxDataBatch[kPreExt]; // If all inputs are zero and input layer is sparse, postGlobalExtIdx will still be -1. if(postGlobalExtIdx == -1) { continue; } //Make sure the index is in bounds assert(postGlobalExtIdx >= 0 && postGlobalExtIdx < (postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt) * (postLoc->nyGlobal + postLoc->halo.up + postLoc->halo.dn) * postLoc->nf); const int kxPostGlobalExt = kxPos(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf); const int kyPostGlobalExt = kyPos(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf); const int kfPost = featureIndex(postGlobalExtIdx, postLoc->nxGlobal + postLoc->halo.lt + postLoc->halo.rt, postLoc->nyGlobal + postLoc->halo.dn + postLoc->halo.up, postLoc->nf); const int kxPostLocalRes = kxPostGlobalExt - postLoc->kx0 - postLoc->halo.lt; const int kyPostLocalRes = kyPostGlobalExt - postLoc->ky0 - postLoc->halo.up; if(kxPostLocalRes < 0 || kxPostLocalRes >= postLoc->nx|| kyPostLocalRes < 0 || kyPostLocalRes >= postLoc->ny){ continue; } const int kPostLocalRes = kIndex(kxPostLocalRes, kyPostLocalRes, kfPost, postLoc->nx, postLoc->ny, postLoc->nf); gSynPatchHeadBatch[kPostLocalRes] = a; } else{ PVPatch * weights = getWeights(kPreExt, arborID); const int nk = weights->nx * fPatchSize(); const int ny = weights->ny; pvgsyndata_t * postPatchStart = gSynPatchHead + getGSynPatchStart(kPreExt, arborID); const int sy = getPostNonextStrides()->sy; // stride in layer int offset = kfPre; int sf = fPatchSize(); pvwdata_t w = 1.0; if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING){ float relative_XScale = pow(2, (post->getXScale() - pre->getXScale())); float relative_YScale = pow(2, (post->getYScale() - pre->getYScale())); w = 1.0/(nxp*nyp*relative_XScale*relative_YScale); } void* auxPtr = NULL; for (int y = 0; y < ny; y++) { (accumulateFunctionPointer)(0, nk, postPatchStart + y*sy + offset, a, &w, auxPtr, sf); } } } #ifdef PV_USE_OPENMP_THREADS //Set back into gSyn if(thread_gSyn){ pvdata_t * gSynPatchHead = gSynPatchHeadBatch; int numNeurons = post->getNumNeurons(); //Looping over neurons first to be thread safe #pragma omp parallel for for(int ni = 0; ni < numNeurons; ni++){ for(int ti = 0; ti < parent->getNumThreads(); ti++){ if(pvpatchAccumulateType == ACCUMULATE_MAXPOOLING){ if(gSynPatchHead[ni] < fabs(thread_gSyn[ti][ni])){ gSynPatchHead[ni] = thread_gSyn[ti][ni]; } } else{ gSynPatchHead[ni] += thread_gSyn[ti][ni]; } } } } #endif } return PV_SUCCESS; }
int LCALIFLateralKernelConn::allocateDataStructures() { int status = HyPerConn::allocateDataStructures(); // Neurons don't inhibit themselves, only their neighbors; set self-interaction weights to mmzero. assert(nxp % 2 == 1 && nyp % 2 == 1 && getNumDataPatches()==nfp); for (int k=0; k<getNumDataPatches(); k++) { int n = kIndex((nxp-1)/2, (nyp-1)/2, k, nxp, nyp, nfp); get_wDataHead(0, k)[n] = 0.0f; } integratedSpikeCountCube = pvcube_new(pre->getLayerLoc(), pre->getNumExtended()); integratedSpikeCount = integratedSpikeCountCube->data; for (int k=0; k<pre->getNumExtended(); k++) { integratedSpikeCount[k] = integrationTimeConstant*getTargetRateKHz(); // Spike counts initialized to equilibrium value } mpi_datatype = Communicator::newDatatypes(pre->getLayerLoc()); if (mpi_datatype==NULL) { fprintf(stderr, "LCALIFLateralKernelConn \"%s\" error creating mpi_datatype\n", name); abort(); } // Compute the number of times each patch contributes to dw, for proper averaging. int num_arbors = numberOfAxonalArborLists(); interiorCounts = (float **) calloc(num_arbors, sizeof(float *)); if (interiorCounts==NULL) { fprintf(stderr, "LCALIFLateralKernelConn::initialize \"%s\" error: unable to allocate memory for interiorCounts pointer\n", name); } interiorCounts[0] = (float *) calloc(getNumDataPatches()*nxp*nyp*nfp, sizeof(float)); if (interiorCounts[0]==NULL) { fprintf(stderr, "LCALIFLateralKernelConn::initialize \"%s\" error: unable to allocate memory for interiorCounts\n", name); } for (int arbor=1; arbor<num_arbors; arbor++) { interiorCounts[arbor] = interiorCounts[0]+arbor*getNumDataPatches()*nxp*nyp*nfp; } const PVLayerLoc * preloc = pre->getLayerLoc(); int nxpre = preloc->nx; int nypre = preloc->ny; int nfpre = preloc->nf; int nExt = pre->getNumExtended(); int sya = getPostExtStrides()->sy; int nxglob = preloc->nxGlobal; int nyglob = preloc->nyGlobal; int kx0 = preloc->kx0; int ky0 = preloc->ky0; for (int arbor=0; arbor<numberOfAxonalArborLists(); arbor++) { for(int kExt=0; kExt<nExt;kExt++) { int xglob = kxPos(kExt, nxpre + preloc->halo.lt + preloc->halo.rt, nypre + preloc->halo.dn + preloc->halo.up, nfpre) + kx0 - preloc->halo.lt; int yglob = kyPos(kExt, nypre + preloc->halo.lt + preloc->halo.rt, nypre + preloc->halo.dn + preloc->halo.up, nfpre) + ky0 - preloc->halo.up; if (xglob < 0 || xglob >= nxglob || yglob < 0 || yglob >= nyglob) { continue; } PVPatch * weights = getWeights(kExt,arbor); int offset = (int) getAPostOffset(kExt, arbor); int ny = weights->ny; int nk = weights->nx * nfp; int interiorCountOffset = get_wData(arbor, kExt)-get_wDataStart(arbor); int lineoffsetw = 0; int lineoffseta = 0; for( int y=0; y<ny; y++ ) { for( int k=0; k<nk; k++ ) { int postactindex = offset+lineoffseta+k; if (postactindex != kExt) { // Neurons don't inhibit themselves interiorCounts[arbor][interiorCountOffset + lineoffsetw + k]++; } } lineoffsetw += syp; lineoffseta += sya; } } } int bufsize = numberOfAxonalArborLists() * getNumDataPatches() * nxp * nyp * nfp; // TODO-CER-2014.3.26 - Ensure that reduction is done when not using MPI #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, interiorCounts[0], bufsize, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->communicator()); #endif return status; }
int PointProbe::calcValues(double timevalue) { assert(this->getNumValues()==2); double * valuesBuffer = this->getValuesBuffer(); //We need to calculate which mpi process contains the target point, and send that info to the root process //Each process calculates local index const PVLayerLoc * loc = getTargetLayer()->getLayerLoc(); //Calculate local cords from global const int kx0 = loc->kx0; const int ky0 = loc->ky0; const int kb0 = loc->kb0; const int nx = loc->nx; const int ny = loc->ny; const int nf = loc->nf; const int nbatch = loc->nbatch; const int xLocLocal = xLoc - kx0; const int yLocLocal = yLoc - ky0; const int nbatchLocal = batchLoc - kb0; //if in bounds if( xLocLocal >= 0 && xLocLocal < nx && yLocLocal >= 0 && yLocLocal < ny && nbatchLocal >= 0 && nbatchLocal < nbatch){ const pvdata_t * V = getTargetLayer()->getV(); const pvdata_t * activity = getTargetLayer()->getLayerData(); //Send V and A to root const int k = kIndex(xLocLocal, yLocLocal, fLoc, nx, ny, nf); if(V){ valuesBuffer[0] = V[k + nbatchLocal*getTargetLayer()->getNumNeurons()]; } else { valuesBuffer[0] = 0.0; } if(activity){ const int kex = kIndexExtended(k, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); valuesBuffer[1] = activity[kex + nbatchLocal * getTargetLayer()->getNumExtended()]; } else { valuesBuffer[1] = 0.0; } //If not in root process, send to root process if(parent->columnId()!=0){ MPI_Send(&valuesBuffer, 2, MPI_DOUBLE, 0, 0, parent->icCommunicator()->communicator()); } } //Root process if(parent->columnId()==0){ //Calculate which rank target neuron is //TODO we need to calculate rank from batch as well int xRank = xLoc/nx; int yRank = yLoc/ny; int srcRank = rankFromRowAndColumn(yRank, xRank, parent->icCommunicator()->numCommRows(), parent->icCommunicator()->numCommColumns()); //If srcRank is not root process, MPI_Recv from that rank if(srcRank != 0){ MPI_Recv(&valuesBuffer, 2, MPI_DOUBLE, srcRank, 0, parent->icCommunicator()->communicator(), MPI_STATUS_IGNORE); } } return PV_SUCCESS; }
int PoolingConn::deliverPresynapticPerspective(PVLayerCube const * activity, int arborID) { //Check if we need to update based on connection's channel if(getChannel() == CHANNEL_NOUPDATE) { return PV_SUCCESS; } assert(post->getChannel(getChannel())); float dt_factor; if (getPvpatchAccumulateType()==ACCUMULATE_STOCHASTIC) { dt_factor = getParent()->getDeltaTime(); } else { dt_factor = getConvertToRateDeltaTimeFactor(); } const PVLayerLoc * preLoc = preSynapticLayer()->getLayerLoc(); const PVLayerLoc * postLoc = postSynapticLayer()->getLayerLoc(); assert(arborID >= 0); const int numExtended = activity->numItems; float resetVal = 0; if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) { resetVal = -INFINITY; float* gSyn = post->getChannel(getChannel()); //gSyn is res #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int i = 0; i < post->getNumNeuronsAllBatches(); i++) { gSyn[i] = resetVal; } } clearGateIdxBuffer(); for(int b = 0; b < parent->getNBatch(); b++) { pvdata_t * activityBatch = activity->data + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf; pvdata_t * gSynPatchHeadBatch = post->getChannel(getChannel()) + b * postLoc->nx * postLoc->ny * postLoc->nf; int* gatePatchHeadBatch = NULL; if(needPostIndexLayer) { gatePatchHeadBatch = postIndexLayer->getChannel(CHANNEL_EXC) + b * postIndexLayer->getNumNeurons(); } unsigned int * activeIndicesBatch = NULL; if(activity->isSparse) { activeIndicesBatch = activity->activeIndices + b * (preLoc->nx + preLoc->halo.rt + preLoc->halo.lt) * (preLoc->ny + preLoc->halo.up + preLoc->halo.dn) * preLoc->nf; } int numLoop; if(activity->isSparse) { numLoop = activity->numActive[b]; } else { numLoop = numExtended; } if(thread_gateIdxBuffer) { #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int i = 0; i < parent->getNumThreads() * post->getNumNeurons(); i++) { int ti = i/post->getNumNeurons(); int ni = i % post->getNumNeurons(); thread_gateIdxBuffer[ti][ni] = -1; } } #ifdef PV_USE_OPENMP_THREADS //Clear all gsyn buffers if(thread_gSyn) { int numNeurons = post->getNumNeurons(); #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int i = 0; i < parent->getNumThreads() * numNeurons; i++) { int ti = i/numNeurons; int ni = i % numNeurons; thread_gSyn[ti][ni] = resetVal; } } #endif // PV_USE_OPENMP_THREADS #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for schedule(static) #endif for (int loopIndex = 0; loopIndex < numLoop; loopIndex++) { int kPreExt; if(activity->isSparse) { kPreExt = activeIndicesBatch[loopIndex]; } else { kPreExt = loopIndex; } float a = activityBatch[kPreExt] * dt_factor; //if (a == 0.0f) continue; //If we're using thread_gSyn, set this here pvdata_t * gSynPatchHead; //float * gatePatchHead = NULL; int * gatePatchHead = NULL; #ifdef PV_USE_OPENMP_THREADS if(thread_gSyn) { int ti = omp_get_thread_num(); gSynPatchHead = thread_gSyn[ti]; } else { gSynPatchHead = gSynPatchHeadBatch; } if(needPostIndexLayer) { if(thread_gateIdxBuffer) { int ti = omp_get_thread_num(); gatePatchHead = thread_gateIdxBuffer[ti]; } else { gatePatchHead = gatePatchHeadBatch; } } #else // PV_USE_OPENMP_THREADS gSynPatchHead = gSynPatchHeadBatch; if(needPostIndexLayer) { gatePatchHead = gatePatchHeadBatch; } #endif // PV_USE_OPENMP_THREADS //deliverOnePreNeuronActivity(kPreExt, arborID, a, gSynPatchHead, gatePatchHead); PVPatch * weights = getWeights(kPreExt, arborID); const int nk = weights->nx * fPatchSize(); const int ny = weights->ny; const int sy = getPostNonextStrides()->sy; // stride in layer pvwdata_t * weightDataStart = NULL; pvgsyndata_t * postPatchStart = gSynPatchHead + getGSynPatchStart(kPreExt, arborID); int* postGatePatchStart = gatePatchHead + getGSynPatchStart(kPreExt, arborID); //float* postGatePatchStart = gatePatchHead + getGSynPatchStart(kPreExt, arborID); const int kxPreExt = kxPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kyPreExt = kyPos(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kfPre = featureIndex(kPreExt, preLoc->nx + preLoc->halo.lt + preLoc->halo.rt, preLoc->ny + preLoc->halo.dn + preLoc->halo.up, preLoc->nf); const int kxPreGlobalExt = kxPreExt + preLoc->kx0; const int kyPreGlobalExt = kyPreExt + preLoc->ky0; const int kPreGlobalExt = kIndex(kxPreGlobalExt, kyPreGlobalExt, kfPre, preLoc->nxGlobal + preLoc->halo.lt + preLoc->halo.rt, preLoc->nyGlobal + preLoc->halo.up + preLoc->halo.dn, preLoc->nf); int offset = kfPre; int sf = fPatchSize(); pvwdata_t w = 1.0; if(getPvpatchAccumulateType() == ACCUMULATE_SUMPOOLING) { float relative_XScale = pow(2, (post->getXScale() - pre->getXScale())); float relative_YScale = pow(2, (post->getYScale() - pre->getYScale())); w = 1.0/(nxp*nyp*relative_XScale*relative_YScale); } void* auxPtr = NULL; for (int y = 0; y < ny; y++) { if(needPostIndexLayer) { auxPtr = (postGatePatchStart+ y*sy + offset); } (accumulateFunctionPointer)(kPreGlobalExt, nk, postPatchStart + y*sy + offset, a, &w, auxPtr, sf); } } #ifdef PV_USE_OPENMP_THREADS //Accumulate back into gSyn // Should this be done in HyPerLayer where it can be done once, as opposed to once per connection? if(thread_gSyn) { pvdata_t * gSynPatchHead = gSynPatchHeadBatch; //float* gateIdxBuffer = postIndexLayer->getChannel(CHANNEL_EXC); int * gateIdxBuffer = NULL; if(needPostIndexLayer && thread_gateIdxBuffer) { gateIdxBuffer = gatePatchHeadBatch; } int numNeurons = post->getNumNeurons(); //Looping over neurons first to be thread safe #pragma omp parallel for for(int ni = 0; ni < numNeurons; ni++) { //Different for maxpooling if(getPvpatchAccumulateType() == ACCUMULATE_MAXPOOLING) { for(int ti = 0; ti < parent->getNumThreads(); ti++) { if(gSynPatchHead[ni] < thread_gSyn[ti][ni]) { gSynPatchHead[ni] = thread_gSyn[ti][ni]; if(needPostIndexLayer && thread_gateIdxBuffer) { gateIdxBuffer[ni] = thread_gateIdxBuffer[ti][ni]; assert(gateIdxBuffer >= 0); } } } } else { for(int ti = 0; ti < parent->getNumThreads(); ti++) { gSynPatchHead[ni] += thread_gSyn[ti][ni]; } } } } #endif } if(activity->isSparse) { pvdata_t * gSyn = post->getChannel(getChannel()); for (int k=0; k<post->getNumNeuronsAllBatches(); k++) { if (gSyn[k]==-INFINITY) { gSyn[k] = 0.0f; } } } return PV_SUCCESS; }