int customexit(HyPerCol * hc, int argc, char ** argv) { pvadata_t correctvalue = 0.5f; pvadata_t tolerance = 1.0e-7f; if (hc->columnId()==0) { pvInfo().printf("Checking whether input layer has all values equal to %f ...\n", correctvalue); } HyPerLayer * inputlayer = hc->getLayerFromName("input"); assert(inputlayer); PVLayerLoc const * loc = inputlayer->getLayerLoc(); assert(loc->nf==1); const int numNeurons = inputlayer->getNumNeurons(); assert(numNeurons>0); int status = PV_SUCCESS; int numExtended = inputlayer->getNumExtended(); InterColComm * icComm = hc->icCommunicator(); pvadata_t * layerData = (pvadata_t *) icComm->publisherStore(inputlayer->getLayerId())->buffer(LOCAL); int rootproc = 0; if (icComm->commRank()==rootproc) { pvadata_t * databuffer = (pvadata_t *) malloc(numExtended*sizeof(pvadata_t)); assert(databuffer); for (int proc=0; proc<icComm->commSize(); proc++) { if (proc==rootproc) { memcpy(databuffer, layerData, numExtended*sizeof(pvadata_t)); } else { MPI_Recv(databuffer, numExtended*sizeof(pvadata_t),MPI_BYTE,proc,15,icComm->communicator(), MPI_STATUS_IGNORE); } // At this point, databuffer on rank 0 should contain the extended input layer on rank proc for (int k=0; k<numNeurons; k++) { int kExt = kIndexExtended(k,loc->nx,loc->ny,loc->nf,loc->halo.lt,loc->halo.rt,loc->halo.dn,loc->halo.up); pvadata_t value = databuffer[kExt]; if (fabs(value-correctvalue)>=tolerance) { pvErrorNoExit().printf("Rank %d, restricted index %d, extended index %d, value is %f instead of %f\n", proc, k, kExt, value, correctvalue); status = PV_FAILURE; } } } free(databuffer); if (status == PV_SUCCESS) { pvInfo().printf("%s succeeded.\n", argv[0]); } else { pvError().printf("%s failed.\n", argv[0]); } } else { MPI_Send(layerData,numExtended*sizeof(pvadata_t),MPI_BYTE,rootproc,15,icComm->communicator()); } MPI_Barrier(icComm->communicator()); return status; }
int ANNWeightedErrorLayer::allocateDataStructures() { int status = HyPerLayer::allocateDataStructures(); int nf = getLayerLoc()->nf; errWeights = (float *) calloc(nf, sizeof(float *)); for(int i_weight = 0; i_weight < nf; i_weight++){ errWeights[i_weight] = 1.0f; } PV_Stream * pvstream = NULL; InterColComm *icComm = getParent()->icCommunicator(); char errWeight_string[PV_PATH_MAX]; if (getParent()->icCommunicator()->commRank()==0) { PV_Stream * errWeights_stream = pvp_open_read_file(errWeightsFileName, icComm); for(int i_weight = 0; i_weight < nf; i_weight++){ char * fgetsstatus = fgets(errWeight_string, PV_PATH_MAX, errWeights_stream->fp); if( fgetsstatus == NULL ) { bool endoffile = feof(errWeights_stream->fp)!=0; if( endoffile ) { fprintf(stderr, "File of errWeights \"%s\" reached end of file before all %d errWeights were read. Exiting.\n", errWeightsFileName, nf); exit(EXIT_FAILURE); } else { int error = ferror(errWeights_stream->fp); assert(error); fprintf(stderr, "File of errWeights: error %d while reading. Exiting.\n", error); exit(error); } } else { // Remove linefeed from end of string errWeight_string[PV_PATH_MAX-1] = '\0'; int len = strlen(errWeight_string); if (len > 1) { if (errWeight_string[len-1] == '\n') { errWeight_string[len-1] = '\0'; } } } // fgetstatus // set errWeight = chance / relative fraction float errWeight_tmp = atof(errWeight_string); fprintf(stderr, "errWeight %i = %f\n", i_weight, errWeight_tmp); errWeights[i_weight] = (1.0/nf) / errWeight_tmp; } // i_weight } // commRank() == rootproc #ifdef PV_USE_MPI //broadcast errWeights MPI_Bcast(errWeights, nf, MPI_FLOAT, 0, icComm->communicator()); #endif // PV_USE_MPI }
int GatePoolTestLayer::updateState(double timef, double dt) { //Do update state of ANN Layer first ANNLayer::updateState(timef, dt); //Grab layer size const PVLayerLoc* loc = getLayerLoc(); int nx = loc->nx; int ny = loc->ny; int nxGlobal = loc->nxGlobal; int nyGlobal = loc->nyGlobal; int nf = loc->nf; int kx0 = loc->kx0; int ky0 = loc->ky0; bool isCorrect = true; //Grab the activity layer of current layer for(int b = 0; b < loc->nbatch; b++) { const pvdata_t * A = getActivity() + b * getNumExtended(); //We only care about restricted space, but iY and iX are extended for(int iY = loc->halo.up; iY < ny + loc->halo.up; iY++) { for(int iX = loc->halo.lt; iX < nx + loc->halo.lt; iX++) { for(int iFeature = 0; iFeature < nf; iFeature++) { int ext_idx = kIndex(iX, iY, iFeature, nx+loc->halo.lt+loc->halo.rt, ny+loc->halo.dn+loc->halo.up, nf); float actualvalue = A[ext_idx]; int xval = (iX + kx0 - loc->halo.lt)/2; int yval = (iY + ky0 - loc->halo.up)/2; assert(xval >= 0 && xval < loc->nxGlobal); assert(yval >= 0 && yval < loc->nxGlobal); float expectedvalue; expectedvalue = iFeature * 64 + yval * 16 + xval * 2 + 4.5; expectedvalue*=4; if(fabs(actualvalue - expectedvalue) >= 1e-4) { pvErrorNoExit() << "Connection " << name << " Mismatch at (" << iX << "," << iY << ") : actual value: " << actualvalue << " Expected value: " << expectedvalue << ". Discrepancy is a whopping " << actualvalue - expectedvalue << "! Horrors!" << "\n"; isCorrect = false; } } } } } if(!isCorrect) { InterColComm * icComm = parent->icCommunicator(); MPI_Barrier(icComm->communicator()); // If there is an error, make sure that MPI doesn't kill the run before process 0 reports the error. exit(-1); } return PV_SUCCESS; }
int SegmentLayer::updateState(double timef, double dt) { pvdata_t* srcA = originalLayer->getActivity(); pvdata_t* thisA = getActivity(); assert(srcA); assert(thisA); const PVLayerLoc* loc = getLayerLoc(); //Segment input layer based on segmentMethod if(strcmp(segmentMethod, "none") == 0){ int numBatchExtended = getNumExtendedAllBatches(); //Copy activity over //Since both buffers should be identical size, we can do a memcpy here memcpy(thisA, srcA, numBatchExtended * sizeof(pvdata_t)); } else{ //This case should never happen assert(0); } assert(loc->nf == 1); //Clear centerIdxs for(int bi = 0; bi < loc->nbatch; bi++){ centerIdx[bi].clear(); } for(int bi = 0; bi < loc->nbatch; bi++){ pvdata_t* batchA = thisA + bi * getNumExtended(); //Reset max/min buffers maxX.clear(); maxY.clear(); minX.clear(); minY.clear(); //Loop through this buffer to fill labelVec and idxVec //Looping through restricted, but indices are extended for(int yi = loc->halo.up; yi < loc->ny+loc->halo.up; yi++){ for(int xi = loc->halo.lt; xi < loc->nx+loc->halo.lt; xi++){ //Convert to local extended linear index int niLocalExt = yi * (loc->nx+loc->halo.lt+loc->halo.rt) + xi; //Convert yi and xi to global res index int globalResYi = yi - loc->halo.up + loc->ky0; int globalResXi = xi - loc->halo.lt + loc->kx0; //Get label value //Note that we're assuming that the activity here are integers, //even though the buffer is floats int labelVal = round(batchA[niLocalExt]); //Calculate max/min x and y for a single batch //If labelVal exists in map if(maxX.count(labelVal)){ //Here, we're assuming the 4 maps are in sync, so we use the //.at method, as it will throw an exception as opposed to the //[] operator, which will simply add the key into the map if(globalResXi > maxX.at(labelVal)){ maxX[labelVal] = globalResXi; } if(globalResXi < minX.at(labelVal)){ minX[labelVal] = globalResXi; } if(globalResYi > maxY.at(labelVal)){ maxY[labelVal] = globalResYi; } if(globalResYi < minY.at(labelVal)){ minY[labelVal] = globalResYi; } } //If doesn't exist, add into map with current vals else{ maxX[labelVal] = globalResXi; minX[labelVal] = globalResXi; maxY[labelVal] = globalResYi; minY[labelVal] = globalResYi; } } } //We need to mpi across processors in case a segment crosses an mpi boundary InterColComm * icComm = parent->icCommunicator(); int numMpi = icComm->commSize(); int rank = icComm->commRank(); //Local comm rank //Non root processes simply send buffer size and then buffers int numLabels = maxX.size(); if(rank != 0){ //Load buffers loadLabelBuf(); //Send number of labels first MPI_Send(&numLabels, 1, MPI_INT, 0, rank, icComm->communicator()); //Send labels, then max/min buffers MPI_Send(labelBuf, numLabels, MPI_INT, 0, rank, icComm->communicator()); MPI_Send(maxXBuf, numLabels, MPI_INT, 0, rank, icComm->communicator()); MPI_Send(maxYBuf, numLabels, MPI_INT, 0, rank, icComm->communicator()); MPI_Send(minXBuf, numLabels, MPI_INT, 0, rank, icComm->communicator()); MPI_Send(minYBuf, numLabels, MPI_INT, 0, rank, icComm->communicator()); //Receive the full centerIdxBuf from root process int numCenterIdx = 0; MPI_Bcast(&numCenterIdx, 1, MPI_INT, 0, icComm->communicator()); checkIdxBufSize(numCenterIdx); MPI_Bcast(allLabelsBuf, numCenterIdx, MPI_INT, 0, icComm->communicator()); MPI_Bcast(centerIdxBuf, numCenterIdx, MPI_INT, 0, icComm->communicator()); //Load buffer into centerIdx map loadCenterIdxMap(bi, numCenterIdx); } //Root process stores everything else{ //One recv per buffer for(int recvRank = 1; recvRank < numMpi; recvRank++){ int numRecvLabels = 0; MPI_Recv(&numRecvLabels, 1, MPI_INT, recvRank, recvRank, icComm->communicator(), NULL); checkLabelBufSize(numRecvLabels); MPI_Recv(labelBuf, numRecvLabels, MPI_INT, recvRank, recvRank, icComm->communicator(), NULL); MPI_Recv(maxXBuf, numRecvLabels, MPI_INT, recvRank, recvRank, icComm->communicator(), NULL); MPI_Recv(maxYBuf, numRecvLabels, MPI_INT, recvRank, recvRank, icComm->communicator(), NULL); MPI_Recv(minXBuf, numRecvLabels, MPI_INT, recvRank, recvRank, icComm->communicator(), NULL); MPI_Recv(minYBuf, numRecvLabels, MPI_INT, recvRank, recvRank, icComm->communicator(), NULL); for(int i = 0; i < numRecvLabels; i++){ int label = labelBuf[i]; //Add on to maps //If the label already exists, fill with proper max/min if(maxX.count(label)){ if(maxXBuf[i] > maxX.at(label)){ maxX[label] = maxXBuf[i]; } if(maxYBuf[i] > maxY.at(label)){ maxY[label] = maxYBuf[i]; } if(minXBuf[i] < minX.at(label)){ minX[label] = minXBuf[i]; } if(minYBuf[i] < minY.at(label)){ minY[label] = minYBuf[i]; } } else{ maxX[label] = maxXBuf[i]; maxY[label] = maxYBuf[i]; minX[label] = minXBuf[i]; minY[label] = minYBuf[i]; } } } //Maps are now filled with all segments from the image //Fill centerIdx based on max/min for(std::map<int, int>::iterator it = maxX.begin(); it != maxX.end(); ++it){ int label = it->first; int centerX = minX.at(label) + (maxX.at(label) - minX.at(label))/2; int centerY = minY.at(label) + (maxY.at(label) - minY.at(label))/2; //Convert centerpoints (in global res idx) to linear idx (in global res space) int centerIdxVal = centerY * (loc->nxGlobal) + centerX; //Add to centerIdxMap centerIdx[bi][label] = centerIdxVal; } //Fill centerpoint buffer int numCenterIdx = centerIdx[bi].size(); checkIdxBufSize(numCenterIdx); int idx = 0; for(std::map<int, int>::iterator it = centerIdx[bi].begin(); it != centerIdx[bi].end(); ++it){ allLabelsBuf[idx] = it->first; centerIdxBuf[idx] = it->second; idx++; } //Broadcast buffers MPI_Bcast(&numCenterIdx, 1, MPI_INT, 0, icComm->communicator()); MPI_Bcast(allLabelsBuf, numCenterIdx, MPI_INT, 0, icComm->communicator()); MPI_Bcast(centerIdxBuf, numCenterIdx, MPI_INT, 0, icComm->communicator()); } } //End batch loop //centerIdx now stores each center coordinate of each segment return PV_SUCCESS; }
int MapReduceKernelConn::reduceKernels(const int arborID) { int status = HyPerConn::reduceKernels(arborID); int rootproc = 0; InterColComm *icComm = parent->icCommunicator(); const int numPatches = getNumDataPatches(); const size_t patchSize = nxp * nyp * nfp * sizeof(pvdata_t); const size_t localSize = numPatches * patchSize; const size_t arborSize = localSize * this->numberOfAxonalArborLists(); if (icComm->commRank() == rootproc) { // write dW for this instantiation of PetaVision to disk status = HyPerConn::writeWeights(NULL, this->get_dwDataStart(), getNumDataPatches(), dWeightsList[dWeightFileIndex], parent->simulationTime(), /*writeCompressedWeights*/false, /*last*/ false); if (status != PV_SUCCESS) { fprintf(stderr, "MapReduceKernelConn::reduceKernels::HyPerConn::writeWeights: problem writing to file %s, " "SHUTTING DOWN\n", dWeightsList[dWeightFileIndex]); exit(EXIT_FAILURE); } // status // use dWeightsList to read in the weights written by other PetaVision instantiations double dW_time; double simulation_time = parent->simulationTime(); int filetype, datatype; int numParams = NUM_BIN_PARAMS + NUM_WGT_EXTRA_PARAMS; int params[NUM_BIN_PARAMS + NUM_WGT_EXTRA_PARAMS]; const PVLayerLoc *preLoc = this->preSynapticLayer()->getLayerLoc(); int file_count = 0; for (file_count = 0; file_count < num_dWeightFiles; file_count++) { if (file_count == dWeightFileIndex) { continue; } int num_attempts = 0; const int MAX_ATTEMPTS = 5; dW_time = 0; while (dW_time < simulation_time && num_attempts <= MAX_ATTEMPTS) { pvp_read_header(dWeightsList[file_count], icComm, &dW_time, &filetype, &datatype, params, &numParams); num_attempts++; } // while if (num_attempts > MAX_ATTEMPTS) { fprintf(stderr, "PV::MapReduceKernelConn::reduceKernels: problem reading arbor file %s, SHUTTING DOWN\n", dWeightsList[file_count]); status = EXIT_FAILURE; exit(EXIT_FAILURE); } // num_attempts > MAX_ATTEMPTS int status = PV::readWeights(NULL, get_dwDataStart(), this->numberOfAxonalArborLists(), this->getNumDataPatches(), nxp, nyp, nfp, dWeightsList[file_count], icComm, &dW_time, preLoc); if (status != PV_SUCCESS) { fprintf(stderr, "MapReduceKernelConn::reduceKernels::PV::readWeights: problem reading file %s, " "SHUTTING DOWN\n", dWeightsList[file_count]); exit(EXIT_FAILURE); } // status } // file_count < numWeightFiles // average dW from map-reduce pvwdata_t * dW_data = this->get_dwDataStart(0); for (int i_dW = 0; i_dW < arborSize; i_dW++) { dW_data[i_dW] /= num_dWeightFiles; } } // rootproc // broadcast map-reduced dWeights to all non-root processes MPI_Comm mpi_comm = icComm->communicator(); #ifdef PV_USE_MPI MPI_Bcast(this->get_wDataStart(0), arborSize, MPI_FLOAT, rootproc, mpi_comm); #endif return PV_BREAK; }