int MaskFromMemoryBuffer::updateState(double time, double dt) { if (imageLayer->getDataLeft() == dataLeft && imageLayer->getDataTop() == dataTop && imageLayer->getDataWidth() == dataRight-dataLeft && imageLayer->getDataHeight() && dataBottom-dataTop) { return PV_SUCCESS; // mask only needs to change if the imageLayer changes its active region } dataLeft = imageLayer->getDataLeft(); dataRight = dataLeft+imageLayer->getDataWidth(); dataTop = imageLayer->getDataTop(); dataBottom = dataTop + imageLayer->getDataHeight(); PVLayerLoc const * loc = getLayerLoc(); for(int b = 0; b < loc->nbatch; b++) { pvdata_t * ABatch = getActivity() + b * getNumExtended(); int const num_neurons = getNumNeurons(); #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif for(int ni = 0; ni < num_neurons; ni++) { PVHalo const * halo = &loc->halo; int const nx = loc->nx; int const ny = loc->ny; int const nf = loc->nf; int x = kxPos(ni, nx, ny, nf); int y = kyPos(ni, nx, ny, nf); pvadata_t a = (pvadata_t) (x>=dataLeft && x < dataRight && y >= dataTop && y < dataBottom); int nExt = kIndexExtended(ni, nx, ny, nf, halo->lt, halo->rt, halo->dn, halo->up); ABatch[nExt] = a; } } return PV_SUCCESS; }
int ImageTestLayer::updateStateWrapper(double time, double dt) { Image::updateStateWrapper(time, dt); const PVLayerLoc * loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; int nbatch = loc->nbatch; for(int b = 0; b < nbatch; b++){ pvdata_t * dataBatch = data + b * getNumExtended(); for(int nkRes = 0; nkRes < getNumNeurons(); nkRes++){ //Calculate extended index int nkExt = kIndexExtended(nkRes, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); //checkVal is the value from batch index 0 pvdata_t checkVal = dataBatch[nkExt] * 255; int kxGlobal = kxPos(nkRes, nx, ny, nf) + loc->kx0; int kyGlobal = kyPos(nkRes, nx, ny, nf) + loc->ky0; int kf = featureIndex(nkRes, nx, ny, nf); pvdata_t expectedVal = kIndex(kxGlobal, kyGlobal, kf, loc->nxGlobal, loc->nyGlobal, nf); if(fabs(checkVal - expectedVal) >= 1e-5){ std::cout << "ImageFileIO test Expected: " << expectedVal << " Actual: " << checkVal << "\n"; exit(-1); } } } return PV_SUCCESS; }
int SegmentifyTest::updateState(double timef, double dt){ //Do update state first Segmentify::updateState(timef, dt); const PVLayerLoc * loc = getLayerLoc(); pvdata_t * A = getActivity(); assert(A); for(int bi = 0; bi < loc->nbatch; bi++){ pvdata_t * batchA = A + bi * getNumExtended(); for(int yi = 0; yi < loc->ny; yi++){ for(int xi = 0; xi < loc->nx; xi++){ for(int fi = 0; fi < loc->nf; fi++){ int extIdx = (yi + loc->halo.up) * (loc->nx + loc->halo.lt + loc->halo.rt) * loc->nf + (xi + loc->halo.lt) * loc->nf + fi; float actualVal = batchA[extIdx]; float targetVal = getTargetVal(yi+loc->ky0, xi+loc->kx0, fi); checkOutputVals(yi+loc->ky0, xi+loc->kx0, fi, targetVal, actualVal); //std::cout << "Idx: (" << bi << "," << yi << "," << xi << "," << fi << ") Val: " << actualVal << " Target: " << targetVal << "\n"; } } } } return PV_SUCCESS; }
// set activity to global x/y/f position, using position in border/margin as required int PlasticConnTestLayer::setActivitytoGlobalPos(){ for (int kLocalExt = 0; kLocalExt < getNumExtended(); kLocalExt++){ int kxLocalExt = kxPos(kLocalExt, clayer->loc.nx + clayer->loc.halo.lt + clayer->loc.halo.rt, clayer->loc.ny + clayer->loc.halo.dn + clayer->loc.halo.up, clayer->loc.nf) - clayer->loc.halo.lt; int kxGlobalExt = kxLocalExt + clayer->loc.kx0; float xScaleLog2 = clayer->xScale; float x0 = xOriginGlobal(xScaleLog2); float dx = deltaX(xScaleLog2); float x_global_pos = (x0 + dx * kxGlobalExt); clayer->activity->data[kLocalExt] = x_global_pos; } return PV_SUCCESS; }
int AccumulateLayer::setActivity() { const PVLayerLoc * loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; int num_neurons = nx*ny*nf; int status = PV_SUCCESS; memset(clayer->activity->data, 0, sizeof(pvdata_t)*getNumExtended()); if( status == PV_SUCCESS ) status = applyVThresh_ANNLayer(num_neurons, getV(), AMin, VThresh, AShift, VWidth, getCLayer()->activity->data, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); if( status == PV_SUCCESS ) status = applyVMax_ANNLayer(num_neurons, getV(), AMax, getCLayer()->activity->data, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); return status; }
int RunningAverageLayer::updateState(double timef, double dt) { numUpdateTimes++; int status = PV_SUCCESS; double deltaT = parent->getDeltaTime(); //Check if an update is needed //Done in cloneVLayer //if(checkIfUpdateNeeded()){ int numNeurons = originalLayer->getNumNeurons(); pvdata_t * A = clayer->activity->data; const pvdata_t * originalA = originalLayer->getCLayer()->activity->data; const PVLayerLoc * loc = getLayerLoc(); const PVLayerLoc * locOriginal = originalLayer->getLayerLoc(); int nbatch = loc->nbatch; //Make sure all sizes match //assert(locOriginal->nb == loc->nb); assert(locOriginal->nx == loc->nx); assert(locOriginal->ny == loc->ny); assert(locOriginal->nf == loc->nf); for(int b = 0; b < nbatch; b++){ const pvdata_t * originalABatch = originalA + b * originalLayer->getNumExtended(); pvdata_t * ABatch = A + b * getNumExtended(); if (numUpdateTimes < numImagesToAverage*deltaT){ #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif // PV_USE_OPENMP_THREADS for(int k=0; k<numNeurons; k++) { int kExt = kIndexExtended(k, loc->nx, loc->ny, loc->nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); int kExtOriginal = kIndexExtended(k, locOriginal->nx, locOriginal->ny, locOriginal->nf, locOriginal->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); ABatch[kExt] = ((numUpdateTimes/deltaT-1) * ABatch[kExt] + originalABatch[kExtOriginal]) * deltaT / numUpdateTimes; } } else{ #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for #endif // PV_USE_OPENMP_THREADS for(int k=0; k<numNeurons; k++) { int kExt = kIndexExtended(k, loc->nx, loc->ny, loc->nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); int kExtOriginal = kIndexExtended(k, locOriginal->nx, locOriginal->ny, locOriginal->nf, locOriginal->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); ABatch[kExt] = ((numImagesToAverage-1) * ABatch[kExt] + originalABatch[kExtOriginal]) / numImagesToAverage; } } } //Update lastUpdateTime lastUpdateTime = parent->simulationTime(); //} return status; }
//Makes a layer such that the restricted space is the index, but with spinning order be [x, y, f] as opposed to [f, x, y] int InputLayer::updateState(double timef, double dt){ //Grab layer size const PVLayerLoc* loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; int nxGlobal = loc->nxGlobal; int nyGlobal = loc->nyGlobal; int kx0 = loc->kx0; int ky0 = loc->ky0; for(int b = 0; b < parent->getNBatch(); b++){ pvdata_t * A = getActivity() + b * getNumExtended(); //looping over ext for(int iY = 0; iY < ny+loc->halo.up+loc->halo.dn; iY++){ for(int iX = 0; iX < nx+loc->halo.lt+loc->halo.rt; iX++){ //Calculate x and y global extended int xGlobalExt = iX + loc->kx0; int yGlobalExt = iY + loc->ky0; //Calculate x and y in restricted space int xGlobalRes = xGlobalExt - loc->halo.lt; int yGlobalRes = yGlobalExt - loc->halo.up; //Calculate base value //xGlobal and yGlobalRes can be negative int baseActivityVal = yGlobalRes * nxGlobal + xGlobalRes; for(int iFeature = 0; iFeature < nf; iFeature++){ int ext_idx = kIndex(iX, iY, iFeature, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf); //Feature gives an offset, since it spins slowest int activityVal = baseActivityVal + iFeature * nxGlobal * nyGlobal; A[ext_idx] = activityVal; } } } } ////Printing for double checking //printf("\nOutMat\n"); ////looping over ext //for(int iFeature = 0; iFeature < nf; iFeature++){ // for(int iY = 0; iY < ny+loc->halo.up+loc->halo.dn; iY++){ // for(int iX = 0; iX < nx+loc->halo.lt+loc->halo.rt; iX++){ // int ext_idx = kIndex(iX, iY, iFeature, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf); // printf("%03d ", (int)A[ext_idx]); // } // printf("\n"); // } // printf("\n\n"); //} return PV_SUCCESS; }
int FilenameParsingGroundTruthLayer::updateState(double time, double dt) { update_timer->start(); pvdata_t * A = getCLayer()->activity->data; const PVLayerLoc * loc = getLayerLoc(); int num_neurons = getNumNeurons(); if (num_neurons != numClasses) { pvError() << "The number of neurons in " << getName() << " is not equal to the number of classes specified in " << parent->getOutputPath() << "/classes.txt\n"; } for(int b = 0; b < loc->nbatch; b++){ char * currentFilename = NULL; int filenameLen = 0; //TODO depending on speed of this layer, more efficient way would be to preallocate currentFilename buffer if(parent->icCommunicator()->commRank()==0){ currentFilename = strdup(movieLayer->getFilename(b)); //Get length of currentFilename and broadcast int filenameLen = (int) strlen(currentFilename) + 1; //+1 for the null terminator //Using local communicator, as each batch MPI will handle it's own run MPI_Bcast(&filenameLen, 1, MPI_INT, 0, parent->icCommunicator()->communicator()); //Braodcast filename to all other local processes MPI_Bcast(currentFilename, filenameLen, MPI_CHAR, 0, parent->icCommunicator()->communicator()); } else{ //Receive broadcast about length of filename MPI_Bcast(&filenameLen, 1, MPI_INT, 0, parent->icCommunicator()->communicator()); currentFilename = (char*)calloc(sizeof(char), filenameLen); //Receive filename MPI_Bcast(currentFilename, filenameLen, MPI_CHAR, 0, parent->icCommunicator()->communicator()); } std::string fil = currentFilename; pvdata_t * ABatch = A + b * getNumExtended(); for(int i = 0; i < num_neurons; i++){ int nExt = kIndexExtended(i, loc->nx, loc->ny, loc->nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); int fi = featureIndex(nExt, loc->nx+loc->halo.rt+loc->halo.lt, loc->ny+loc->halo.dn+loc->halo.up, loc->nf); int match = fil.find(classes[i]); if(0 <= match){ ABatch[nExt] = gtClassTrueValue; } else{ ABatch[nExt] = gtClassFalseValue; } } //Free buffer, TODO, preallocate buffer to avoid this free(currentFilename); } update_timer->stop(); return PV_SUCCESS; }
int GatePoolTestLayer::updateState(double timef, double dt) { //Do update state of ANN Layer first ANNLayer::updateState(timef, dt); //Grab layer size const PVLayerLoc* loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nxGlobal = loc->nxGlobal; int nyGlobal = loc->nyGlobal; int nf = loc->nf; int kx0 = loc->kx0; int ky0 = loc->ky0; bool isCorrect = true; //Grab the activity layer of current layer for(int b = 0; b < loc->nbatch; b++) { const pvdata_t * A = getActivity() + b * getNumExtended(); //We only care about restricted space, but iY and iX are extended for(int iY = loc->halo.up; iY < ny + loc->halo.up; iY++) { for(int iX = loc->halo.lt; iX < nx + loc->halo.lt; iX++) { for(int iFeature = 0; iFeature < nf; iFeature++) { int ext_idx = kIndex(iX, iY, iFeature, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf); float actualvalue = A[ext_idx]; int xval = (iX + kx0 - loc->halo.lt)/2; int yval = (iY + ky0 - loc->halo.up)/2; assert(xval >= 0 && xval < loc->nxGlobal); assert(yval >= 0 && yval < loc->nxGlobal); float expectedvalue; expectedvalue = iFeature * 64 + yval * 16 + xval * 2 + 4.5; expectedvalue*=4; if(fabs(actualvalue - expectedvalue) >= 1e-4) { pvErrorNoExit() << "Connection " << name << " Mismatch at (" << iX << "," << iY << ") : actual value: " << actualvalue << " Expected value: " << expectedvalue << ". Discrepancy is a whopping " << actualvalue - expectedvalue << "! Horrors!" << "\n"; isCorrect = false; } } } } } if(!isCorrect) { InterColComm * icComm = parent->icCommunicator(); MPI_Barrier(icComm->communicator()); // If there is an error, make sure that MPI doesn't kill the run before process 0 reports the error. exit(-1); } return PV_SUCCESS; }
int BIDSCloneLayer::allocateDataStructures() { int status = CloneVLayer::allocateDataStructures(); assert(GSyn==NULL); BIDSMovieCloneMap *blayer = dynamic_cast<BIDSMovieCloneMap*> (originalLayer->getParent()->getLayerFromName(jitterSourceName)); if (blayer==NULL) { fprintf(stderr, "BIDSCloneLayer \"%s\": jitterSource \"%s\" must be a BIDSMovieCloneMap.\n", name, jitterSourceName); abort(); } coords = blayer->getCoords(); numNodes = blayer->getNumNodes(); for(int i = 0; i < getNumExtended(); i++){ this->clayer->activity->data[i] = 0; } return status; }
int AccumulateLayer::doUpdateState(double time, double dt, const PVLayerLoc * loc, pvdata_t * A, pvdata_t * V, int num_channels, pvdata_t * gSynHead) { bool needsUpdate = false; if (syncedInputLayer != NULL) { if (getPhase() > syncedInputLayer->getPhase()) { needsUpdate = syncedInputLayer->getLastUpdateTime() >= lastUpdateTime; } else { needsUpdate = syncedInputLayer->getLastUpdateTime() > lastUpdateTime; } } if (needsUpdate) { memset(clayer->activity->data, 0, sizeof(pvdata_t)*getNumExtended()); } //update_timer->start(); //#ifdef PV_USE_OPENCL // if(gpuAccelerateFlag) { // updateStateOpenCL(time, dt); // //HyPerLayer::updateState(time, dt); // } // else { //#endif int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; int num_neurons = nx*ny*nf; updateV_AccumulateLayer(num_neurons, V, num_channels, gSynHead, A, AMax, AMin, VThresh, AShift, VWidth, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); //Moved to publish //if (this->writeSparseActivity){ // updateActiveIndices(); // added by GTK to allow for sparse output, can this be made an inline function??? //} //#ifdef PV_USE_OPENCL // } //#endif //update_timer->stop(); return PV_SUCCESS; }
int PursuitLayer::updateState(double time, double dt) { if (!updateReady) return PV_SUCCESS; int nx = getLayerLoc()->nx; int ny = getLayerLoc()->ny; int nf = getLayerLoc()->nf; PVHalo const * halo = &getLayerLoc()->halo; pvdata_t * activity = getActivity(); memset(activity, 0, getNumExtended()*sizeof(*activity)); int nxy = nx*ny; for (int kxy=0; kxy<nxy; kxy++) { int kf = foundFeatures[kxy]; if (kf>=0) { int kx = kxPos(kxy,nx,ny,1); int ky = kyPos(kxy,nx,ny,1); int kex = kIndex(kx+halo->lt, ky+halo->up, kf, nx+halo->lt+halo->rt, ny+halo->dn+halo->up, nf); /* Is this correct? Before splitting x- and y- margin widths, the ny argument was ny*nb, which seems weird. */ activity[kex] = gSynSparse[kxy]; } } //resetGSynBuffers_HyPerLayer(getNumNeurons(), getNumChannels(), GSyn[0]); updateReady = false; return PV_SUCCESS; }
int MoviePvpTestLayer::updateStateWrapper(double time, double dt) { MoviePvp::updateStateWrapper(time, dt); const PVLayerLoc * loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nf = loc->nf; int nbatch = loc->nbatch; for(int b = 0; b < nbatch; b++){ pvdata_t * dataBatch = data + b * getNumExtended(); int frameIdx; if(strcmp(getBatchMethod(), "byImage") == 0){ frameIdx = (time-1) * nbatch + b; } else if(strcmp(getBatchMethod(), "byMovie") == 0){ frameIdx = b * 2 + (time-1); } for(int nkRes = 0; nkRes < getNumNeurons(); nkRes++){ //Calculate extended index int nkExt = kIndexExtended(nkRes, nx, ny, nf, loc->halo.lt, loc->halo.rt, loc->halo.dn, loc->halo.up); //checkVal is the value from batch index 0 pvdata_t checkVal = dataBatch[nkExt]; int kxGlobal = kxPos(nkRes, nx, ny, nf) + loc->kx0; int kyGlobal = kyPos(nkRes, nx, ny, nf) + loc->ky0; int kf = featureIndex(nkRes, nx, ny, nf); pvdata_t expectedVal = kIndex(kxGlobal, kyGlobal, kf, loc->nxGlobal, loc->nyGlobal, nf) + frameIdx*192; if(fabs(checkVal - expectedVal) >= 1e-5){ std::cout << "ImageFileIO " << name << " test Expected: " << expectedVal << " Actual: " << checkVal << "\n"; //exit(-1); } } } return PV_SUCCESS; }
int SegmentLayer::updateState(double timef, double dt) { pvdata_t* srcA = originalLayer->getActivity(); pvdata_t* thisA = getActivity(); assert(srcA); assert(thisA); const PVLayerLoc* loc = getLayerLoc(); //Segment input layer based on segmentMethod if(strcmp(segmentMethod, "none") == 0){ int numBatchExtended = getNumExtendedAllBatches(); //Copy activity over //Since both buffers should be identical size, we can do a memcpy here memcpy(thisA, srcA, numBatchExtended * sizeof(pvdata_t)); } else{ //This case should never happen assert(0); } assert(loc->nf == 1); //Clear centerIdxs for(int bi = 0; bi < loc->nbatch; bi++){ centerIdx[bi].clear(); } for(int bi = 0; bi < loc->nbatch; bi++){ pvdata_t* batchA = thisA + bi * getNumExtended(); //Reset max/min buffers maxX.clear(); maxY.clear(); minX.clear(); minY.clear(); //Loop through this buffer to fill labelVec and idxVec //Looping through restricted, but indices are extended for(int yi = loc->halo.up; yi < loc->ny+loc->halo.up; yi++){ for(int xi = loc->halo.lt; xi < loc->nx+loc->halo.lt; xi++){ //Convert to local extended linear index int niLocalExt = yi * (loc->nx+loc->halo.lt+loc->halo.rt) + xi; //Convert yi and xi to global res index int globalResYi = yi - loc->halo.up + loc->ky0; int globalResXi = xi - loc->halo.lt + loc->kx0; //Get label value //Note that we're assuming that the activity here are integers, //even though the buffer is floats int labelVal = round(batchA[niLocalExt]); //Calculate max/min x and y for a single batch //If labelVal exists in map if(maxX.count(labelVal)){ //Here, we're assuming the 4 maps are in sync, so we use the //.at method, as it will throw an exception as opposed to the //[] operator, which will simply add the key into the map if(globalResXi > maxX.at(labelVal)){ maxX[labelVal] = globalResXi; } if(globalResXi < minX.at(labelVal)){ minX[labelVal] = globalResXi; } if(globalResYi > maxY.at(labelVal)){ maxY[labelVal] = globalResYi; } if(globalResYi < minY.at(labelVal)){ minY[labelVal] = globalResYi; } } //If doesn't exist, add into map with current vals else{ maxX[labelVal] = globalResXi; minX[labelVal] = globalResXi; maxY[labelVal] = globalResYi; minY[labelVal] = globalResYi; } } } //We need to mpi across processors in case a segment crosses an mpi boundary InterColComm * icComm = parent->icCommunicator(); int numMpi = icComm->commSize(); int rank = icComm->commRank(); //Local comm rank //Non root processes simply send buffer size and then buffers int numLabels = maxX.size(); if(rank != 0){ //Load buffers loadLabelBuf(); //Send number of labels first MPI_Send(&numLabels, 1, MPI_INT, 0, rank, icComm->communicator()); //Send labels, then max/min buffers MPI_Send(labelBuf, numLabels, MPI_INT, 0, rank, icComm->communicator()); MPI_Send(maxXBuf, numLabels, MPI_INT, 0, rank, icComm->communicator()); MPI_Send(maxYBuf, numLabels, MPI_INT, 0, rank, icComm->communicator()); MPI_Send(minXBuf, numLabels, MPI_INT, 0, rank, icComm->communicator()); MPI_Send(minYBuf, numLabels, MPI_INT, 0, rank, icComm->communicator()); //Receive the full centerIdxBuf from root process int numCenterIdx = 0; MPI_Bcast(&numCenterIdx, 1, MPI_INT, 0, icComm->communicator()); checkIdxBufSize(numCenterIdx); MPI_Bcast(allLabelsBuf, numCenterIdx, MPI_INT, 0, icComm->communicator()); MPI_Bcast(centerIdxBuf, numCenterIdx, MPI_INT, 0, icComm->communicator()); //Load buffer into centerIdx map loadCenterIdxMap(bi, numCenterIdx); } //Root process stores everything else{ //One recv per buffer for(int recvRank = 1; recvRank < numMpi; recvRank++){ int numRecvLabels = 0; MPI_Recv(&numRecvLabels, 1, MPI_INT, recvRank, recvRank, icComm->communicator(), NULL); checkLabelBufSize(numRecvLabels); MPI_Recv(labelBuf, numRecvLabels, MPI_INT, recvRank, recvRank, icComm->communicator(), NULL); MPI_Recv(maxXBuf, numRecvLabels, MPI_INT, recvRank, recvRank, icComm->communicator(), NULL); MPI_Recv(maxYBuf, numRecvLabels, MPI_INT, recvRank, recvRank, icComm->communicator(), NULL); MPI_Recv(minXBuf, numRecvLabels, MPI_INT, recvRank, recvRank, icComm->communicator(), NULL); MPI_Recv(minYBuf, numRecvLabels, MPI_INT, recvRank, recvRank, icComm->communicator(), NULL); for(int i = 0; i < numRecvLabels; i++){ int label = labelBuf[i]; //Add on to maps //If the label already exists, fill with proper max/min if(maxX.count(label)){ if(maxXBuf[i] > maxX.at(label)){ maxX[label] = maxXBuf[i]; } if(maxYBuf[i] > maxY.at(label)){ maxY[label] = maxYBuf[i]; } if(minXBuf[i] < minX.at(label)){ minX[label] = minXBuf[i]; } if(minYBuf[i] < minY.at(label)){ minY[label] = minYBuf[i]; } } else{ maxX[label] = maxXBuf[i]; maxY[label] = maxYBuf[i]; minX[label] = minXBuf[i]; minY[label] = minYBuf[i]; } } } //Maps are now filled with all segments from the image //Fill centerIdx based on max/min for(std::map<int, int>::iterator it = maxX.begin(); it != maxX.end(); ++it){ int label = it->first; int centerX = minX.at(label) + (maxX.at(label) - minX.at(label))/2; int centerY = minY.at(label) + (maxY.at(label) - minY.at(label))/2; //Convert centerpoints (in global res idx) to linear idx (in global res space) int centerIdxVal = centerY * (loc->nxGlobal) + centerX; //Add to centerIdxMap centerIdx[bi][label] = centerIdxVal; } //Fill centerpoint buffer int numCenterIdx = centerIdx[bi].size(); checkIdxBufSize(numCenterIdx); int idx = 0; for(std::map<int, int>::iterator it = centerIdx[bi].begin(); it != centerIdx[bi].end(); ++it){ allLabelsBuf[idx] = it->first; centerIdxBuf[idx] = it->second; idx++; } //Broadcast buffers MPI_Bcast(&numCenterIdx, 1, MPI_INT, 0, icComm->communicator()); MPI_Bcast(allLabelsBuf, numCenterIdx, MPI_INT, 0, icComm->communicator()); MPI_Bcast(centerIdxBuf, numCenterIdx, MPI_INT, 0, icComm->communicator()); } } //End batch loop //centerIdx now stores each center coordinate of each segment return PV_SUCCESS; }
int BackwardsBatchNorm::updateState(double timef, double dt) { int status = PV_SUCCESS; //We are filling this activity buffer pvdata_t * thisA = clayer->activity->data; assert(thisA); //We need the normalized input vals, orig input vals, and the input gradients const pvdata_t * inputGradA = originalLayer->getCLayer()->activity->data; const pvdata_t * forwardA = forwardLayer->getCLayer()->activity->data; const pvdata_t * origInputA = forwardLayer->getOriginalLayer()->getCLayer()->activity->data; assert(inputGradA && forwardA && origInputA); //Get locs for all buffers const PVLayerLoc * thisLoc = getLayerLoc(); const PVLayerLoc * inputGradLoc = originalLayer->getLayerLoc(); const PVLayerLoc * forwardLoc = forwardLayer->getLayerLoc(); const PVLayerLoc * origInputLoc = forwardLayer->getOriginalLayer()->getLayerLoc(); int nbatch = thisLoc->nbatch; //All nx, ny, and nf should be the same int nx = thisLoc->nx; int ny = thisLoc->ny; int nf = thisLoc->nf; //Get buffer margins here int xThisMargin = thisLoc->halo.lt + thisLoc->halo.rt; int yThisMargin = thisLoc->halo.up + thisLoc->halo.dn; int xInputGradMargin = inputGradLoc->halo.lt + inputGradLoc->halo.rt; int yInputGradMargin = inputGradLoc->halo.up + inputGradLoc->halo.dn; int xForwardMargin = forwardLoc->halo.lt + forwardLoc->halo.rt; int yForwardMargin = forwardLoc->halo.up + forwardLoc->halo.dn; int xOrigInputMargin = origInputLoc->halo.lt + origInputLoc->halo.rt; int yOrigInputMargin = origInputLoc->halo.up + origInputLoc->halo.dn; //We also need various mean and var buffers from the forward layer const float* batchMean = forwardLayer->getBatchMean(); const float* batchVar = forwardLayer->getBatchVar(); float* batchMeanShift = forwardLayer->getBatchMeanShift(); float* batchVarShift = forwardLayer->getBatchVarShift(); float epsilon = forwardLayer->getEpsilon(); //Total number of neurons to divide by for each feature float normVal = parent->getNBatchGlobal() * thisLoc->nyGlobal * thisLoc->nxGlobal; //We're accumulating into delta buffers, so clear clearDelta(); //Ioffe et. al. Batch Normalization //Calculate deltaVar //TODO parallize over threads for(int iF = 0; iF < nf; iF++) { float secondTerm = -.5*(powf(batchVar[iF] + epsilon, -1.5)); for(int b = 0; b < nbatch; b++) { const pvdata_t* batchOrigInputA = origInputA + b * forwardLayer->getOriginalLayer()->getNumExtended(); const pvdata_t* batchInputGradA = inputGradA + b * originalLayer->getNumExtended(); for(int iY = 0; iY < ny; iY++) { for(int iX = 0; iX < nx; iX++) { int kExtOrigInput = kIndex(iX, iY, iF, nx+xOrigInputMargin, ny+yOrigInputMargin, nf); int kExtInputGrad = kIndex(iX, iY, iF, nx+xInputGradMargin, ny+yInputGradMargin, nf); float deltaNorm = batchInputGradA[kExtInputGrad] * batchVarShift[iF]; deltaVar[iF] += deltaNorm * (batchOrigInputA[kExtOrigInput] - batchMean[iF]); } } } //Multiply deltaVar by secondTerm deltaVar[iF] = deltaVar[iF] * secondTerm; } //Reduce deltaVar #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, deltaVar, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator()); #endif // PV_USE_MPI //Calculate deltaMean //Calculate first term first //TODO parallize over threads for(int iF = 0; iF < nf; iF++) { float multiplier = -1.0/(sqrtf(batchVar[iF]+epsilon)); for(int b = 0; b < nbatch; b++) { const pvdata_t* batchInputGradA = inputGradA + b * originalLayer->getNumExtended(); for(int iY = 0; iY < ny; iY++) { for(int iX = 0; iX < nx; iX++) { int kExtInputGrad = kIndex(iX, iY, iF, nx+xInputGradMargin, ny+yInputGradMargin, nf); float deltaNorm = batchInputGradA[kExtInputGrad] * batchVarShift[iF]; deltaMean[iF] += deltaNorm * multiplier; } } } } //Reduce deltaMean across mpi #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, deltaMean, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator()); #endif // PV_USE_MPI //Calculate second term //TODO parallize over threads for(int iF = 0; iF < nf; iF++) { float tmpMean = 0; for(int b = 0; b < nbatch; b++) { const pvdata_t* batchOrigInputA = origInputA + b * forwardLayer->getOriginalLayer()->getNumExtended(); for(int iY = 0; iY < ny; iY++) { for(int iX = 0; iX < nx; iX++) { int kExtOrigInput = kIndex(iX, iY, iF, nx+xOrigInputMargin, ny+yOrigInputMargin, nf); tmpMean += -2 * (batchOrigInputA[kExtOrigInput] - batchMean[iF]); } } } //Reduce tmpMean #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, &tmpMean, 1, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator()); #endif // PV_USE_MPI tmpMean = tmpMean / normVal; //Add second term to first term deltaMean[iF] += deltaVar[iF] * tmpMean; } //No more sums, go with efficient loop //TODO Is the efficient loop better for optimization or do we put //features on the outer most loop for precalculation of constants over features? for(int b = 0; b < nbatch; b++) { const pvdata_t* batchOrigInputA = origInputA + b * forwardLayer->getOriginalLayer()->getNumExtended(); const pvdata_t* batchInputGradA = inputGradA + b * originalLayer->getNumExtended(); pvdata_t* batchThisA = thisA + b * getNumExtended(); #ifdef PV_USE_OPENMP_THREADS #pragma omp parallel for collapse(3) #endif for(int iY = 0; iY < ny; iY++) { for(int iX = 0; iX < nx; iX++) { for(int iF = 0; iF < nf; iF++) { int kExtOrigInput = kIndex(iX, iY, iF, nx+xOrigInputMargin, ny+yOrigInputMargin, nf); int kExtInputGrad = kIndex(iX, iY, iF, nx+xInputGradMargin, ny+yInputGradMargin, nf); int kExtThis = kIndex(iX, iY, iF, nx+xThisMargin, ny+yThisMargin, nf); float deltaNorm = batchInputGradA[kExtInputGrad] * batchVarShift[iF]; float firstTerm = deltaNorm/sqrtf(batchVar[iF] + epsilon); float secondTerm = deltaVar[iF] * (2*(batchOrigInputA[kExtOrigInput] - batchMean[iF])/normVal); float thirdTerm = deltaMean[iF]/normVal; batchThisA[kExtThis] = firstTerm + secondTerm + thirdTerm; } } } } //We calculate delta varShift and deltaMeanShift here //TODO parallize over threads //Since we're summing into delta*shift buffers, we have to sequentialize over features for(int iF = 0; iF < nf; iF++) { for(int b = 0; b < nbatch; b++) { const pvdata_t* batchForwardA = forwardA + b * forwardLayer->getNumExtended(); const pvdata_t* batchInputGradA = inputGradA + b * originalLayer->getNumExtended(); for(int iY = 0; iY < ny; iY++) { for(int iX = 0; iX < nx; iX++) { int kExtInputGrad = kIndex(iX, iY, iF, nx+xInputGradMargin, ny+yInputGradMargin, nf); int kExtForwardA = kIndex(iX, iY, iF, nx+xForwardMargin, ny + yForwardMargin, nf); deltaVarShift[iF] += batchInputGradA[kExtInputGrad] * batchForwardA[kExtForwardA]; deltaMeanShift[iF] += batchInputGradA[kExtInputGrad]; } } } } //Reduce delta*Shift across all mpi #ifdef PV_USE_MPI MPI_Allreduce(MPI_IN_PLACE, deltaVarShift, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator()); MPI_Allreduce(MPI_IN_PLACE, deltaMeanShift, nf, MPI_FLOAT, MPI_SUM, parent->icCommunicator()->globalCommunicator()); #endif // PV_USE_MPI //TODO implement learning rule for meanShift and varShift return status; }