//============================================================================== double* PoissonData::getElemLoad(GlobalID elemID) { elem_->setElemID(elemID); elem_->setElemLength(1.0/L_); elem_->setTotalLength(1.0); //now get a pointer to this element's connectivity array and //calculate that connectivity (in place). int size = 0; GlobalID* elemConn = elem_->getElemConnPtr(size); if (size == 0) messageAbort("loadElemLoads: bad conn ptr."); calculateConnectivity(elemConn, size, elemID); elem_->calculateCoords(); if (outputLevel_>1) { double* x = elem_->getNodalX(size); double* y = elem_->getNodalY(size); FEI_COUT << localProc_ << ", elemID " << elemID << ", nodes: "; for(int j=0; j<size; j++) { FEI_COUT << elemConn[j] << " "; FEI_COUT << "("<<x[j]<<","<<y[j]<<") "; } FEI_COUT << FEI_ENDL; } elem_->calculateLoad(); return( elem_->getElemLoad(size)); }
//============================================================================== void PoissonData::calculateBCs() { // //This function figures out which nodes lie on the boundary. The ones that //do are added to the BC set, along with appropriate alpha/beta/gamma values. // for(int i=0; i<numLocalElements_; i++) { elem_->setElemID(elemIDs_[i]); elem_->setElemLength(1.0/L_); elem_->setTotalLength(1.0); //now get a pointer to this element's connectivity array and //calculate that connectivity (in place). int size = 0; GlobalID* nodeIDs = elem_->getElemConnPtr(size); if (size == 0) messageAbort("loadElements: bad conn ptr."); calculateConnectivity(nodeIDs, size, elemIDs_[i]); elem_->calculateCoords(); double* xcoord = elem_->getNodalX(size); double* ycoord = elem_->getNodalY(size); //now loop over the nodes and see if any are on a boundary. for(int j=0; j<size; j++) { if ((std::abs(xcoord[j]) < 1.e-49) || (std::abs(xcoord[j] - 1.0) < 1.e-49) || (std::abs(ycoord[j]) < 1.e-49) || (std::abs(ycoord[j] - 1.0) < 1.e-49)) { addBCNode(nodeIDs[j], xcoord[j], ycoord[j]); } } } }
//============================================================================== void PoissonData::calculateDistribution() { // //Calculate which elements this processor owns. The element domain is a //square, and we can assume that sqrt(numProcs_) divides evenly into //L_. We're working with a (logically) 2D processor arrangement. //Furthermore, the logical processor layout is such that processor 0 is at //the bottom left corner of a 2D grid, and a side of the grid is of length //sqrt(numProcs_). The element domain is numbered such that element 1 is at //the bottom left corner of the square, and element numbers increase from //left to right. i.e., element 1 is in position (1,1), element L is in //position (1,L), element L+1 is in position (2,1). // //Use 1-based numbering for the elements and the x- and y- coordinates in //the element grid, but use 0-based numbering for processor IDs and the //coordinates in the processor grid. // numLocalElements_ = (L_*L_)/numProcs_; elemIDs_ = new GlobalID[numLocalElements_]; if (!elemIDs_) messageAbort("ERROR allocating elemIDs_."); elemIDsAllocated_ = true; //0-based x-coordinate of this processor in the 2D processor grid. procX_ = localProc_%int_sqrt(numProcs_); //0-based maximum processor x-coordinate. maxProcX_ = int_sqrt(numProcs_) - 1; //0-based y-coordinate of this processor in the 2D processor grid. procY_ = localProc_/int_sqrt(numProcs_); //0-based maximum processor y-coordinate. maxProcY_ = int_sqrt(numProcs_) - 1; int sqrtElems = int_sqrt(numLocalElements_); int sqrtProcs = int_sqrt(numProcs_); //1-based first-element-on-this-processor startElement_ = 1 + procY_*sqrtProcs*numLocalElements_ + procX_*sqrtElems; if (outputLevel_>1) { FEI_COUT << localProc_ << ", calcDist.: numLocalElements: " << numLocalElements_ << ", startElement: " << startElement_ << FEI_ENDL; FEI_COUT << localProc_ << ", procX: " << procX_ << ", procY_: " << procY_ << ", maxProcX: " << maxProcX_ << ", maxProcY: " << maxProcY_ << FEI_ENDL; } int offset = 0; for(int i=0; i<sqrtElems; i++) { for(int j=0; j<sqrtElems; j++) { elemIDs_[offset] = (GlobalID)(startElement_ + i*L_ + j); offset++; } } }
//============================================================================== void Poisson_Elem::calculateCoords() { // //This function calculates nodal x- and y-coordinates for this element. //NOTE: element IDs are assumed to be 1-based. // if (!internalsAllocated_) messageAbort("calculateCoords: internals not allocated."); if (!elemLengthIsSet_) messageAbort("calculateCoords: elemLength not set."); if (!totalLengthIsSet_) messageAbort("calculateCoords: totalLength not set."); if (!ID_IsSet_) messageAbort("calculateCoords: elemID not set."); if (std::abs(elemLength_) < 1.e-49) messageAbort("calculateCoords: elemLength == 0."); int lowLeft = 0; int lowRight = 1; int upperRight = 2; int upperLeft = 3; int elemsPerSide = (int)std::ceil(totalLength_/elemLength_); int elemX = (int)globalElemID_%elemsPerSide; if (elemX==0) elemX = elemsPerSide; int elemY = ((int)globalElemID_ - elemX)/elemsPerSide + 1; //elemX and elemY are 1-based coordinates of this element in //the global square of elements. The origin is position (1,1), //which is at the bottom left of the square. nodalX_[lowLeft] = (elemX-1)*elemLength_; nodalX_[upperLeft] = nodalX_[lowLeft]; nodalX_[lowRight] = elemX*elemLength_; nodalX_[upperRight] = nodalX_[lowRight]; nodalY_[lowLeft] = (elemY-1)*elemLength_; nodalY_[lowRight] = nodalY_[lowLeft]; nodalY_[upperLeft] = elemY*elemLength_; nodalY_[upperRight] = nodalY_[upperLeft]; }
//============================================================================== void AztecDVBR_Matrix::setBindx(int nnzBlks, int* blkColInds) { // //This function simply allocates and fills the Amat_->bindx array. // Amat_->bindx = new int[nnzBlks]; for(int i=0; i<nnzBlks; i++) { Amat_->bindx[i] = blkColInds[i]; if (blkColInds[i] < 0) messageAbort("setBindx: negative block col index."); } }
//============================================================================== PoissonData::PoissonData(int L, int numProcs, int localProc, int outputLevel) { // //PoissonData constructor. // //Arguments: // // L: global square size (number-of-elements along side) // numProcs: number of processors participating in this FEI test. // localProc: local processor number. // outputLevel: affects the amount of screen output. // L_ = L; startElement_ = 0; numLocalElements_ = 0; numProcs_ = numProcs; localProc_ = localProc; outputLevel_ = outputLevel; check1(); elem_ = new Poisson_Elem(); int err = elem_->allocateInternals(1); err += elem_->allocateLoad(1); err += elem_->allocateStiffness(1); if (err) messageAbort("Allocation error in element."); fieldArraysAllocated_ = false; elemIDsAllocated_ = false; numFields_ = NULL; fieldIDs_ = NULL; elemIDs_ = NULL; calculateDistribution(); numElemBlocks_ = 1; elemBlockID_ = (GlobalID)0; elemSetID_ = 0; elemFormat_ = 0; nodesPerElement_ = 4; fieldsPerNode_ = 1; initializeFieldStuff(); }
//============================================================================== GlobalID* PoissonData::getElementConnectivity(GlobalID elemID) { //set the elemID on the internal Poisson_Elem instance. elem_->setElemID(elemID); elem_->setElemLength(1.0/L_); elem_->setTotalLength(1.0); //now get a pointer to the element's connectivity array and //calculate that connectivity (in place). int size = 0; GlobalID* elemConn = elem_->getElemConnPtr(size); if (size == 0) messageAbort("loadElements: bad conn ptr."); calculateConnectivity(elemConn, size, elemID); return(elemConn); }
//============================================================================== void PoissonData::check1() { // //Private function to be called from the constructor, simply makes sure that //the constructor's input arguments were reasonable. // //If they aren't, a message is printed on standard err, and abort() is called. // if (L_ <= 0) messageAbort("bar length L <= 0."); if (numProcs_ <= 0) messageAbort("numProcs <= 0."); if (L_%int_sqrt(numProcs_)) messageAbort("L must be an integer multiple of sqrt(numProcs)."); if (localProc_ < 0) messageAbort("localProc < 0."); if (localProc_ >= numProcs_) messageAbort("localProc >= numProcs."); if (outputLevel_ < 0) messageAbort("outputLevel < 0."); }
//============================================================================== bool AztecDVBR_Matrix::readFromFile(const char *filename){ // //readFromFile should be able to be called after the matrix is constructed, //and before allocate has been called. i.e., example usage should include: // // AztecDVBR_Matrix A(map, update); // A.readFromFile(fileName); // A.matvec(b, c); // //i.e., readFromFile can take the place of the allocate and loadComplete //calls. // FILE *infile = NULL; MPI_Comm thisComm = amap_->getCommunicator(); MPI_Barrier(thisComm); infile = fopen(filename, "r"); if (!infile) messageAbort("readFromFile: couldn't open file."); int* num_nz_blocks = NULL; int* blk_col_inds = NULL; readAllocateInfo(infile, num_nz_blocks, blk_col_inds); allocate(num_nz_blocks, blk_col_inds); delete [] num_nz_blocks; delete [] blk_col_inds; fclose(infile); infile = fopen(filename, "r"); readMatrixData(infile); fclose(infile); loadComplete(); return(true); }
//============================================================================== int AztecDVBR_Matrix::putBlockRow(int blkRow, double* val, int* blkColInds, int numNzBlks) const { if (!isAllocated()) return(1); int index; if (!inUpdate(blkRow, index)) { fei::console_out() << "AztecDVBR_Matrix::putBlockRow: ERROR: blkRow " << blkRow << " not in local update list." << FEI_ENDL; return(1); } //for each incoming block, we need to find its block column index //in the bindx array, then go to that same position in the indx //array to find out how many (point) entries are in that block. //We can then use the indx entry to go to the val array and store //the data. int offset = 0; for(int blk = 0; blk<numNzBlks; blk++) { int indb = getBindxOffset(blkColInds[blk], Amat_->bpntr[index], Amat_->bpntr[index+1]-1); if (indb < 0) messageAbort("putBlockRow: blk col not found in row."); int numEntries = Amat_->indx[indb+1] - Amat_->indx[indb]; int valOffset = Amat_->indx[indb]; //ok, now we're ready to store the stuff. for(int i=0; i<numEntries; i++) { Amat_->val[valOffset + i] = val[offset + i]; } offset += numEntries; } return(0); }
//============================================================================== void AztecDVBR_Matrix::calcRpntr() { // //This function will use information from the Aztec_BlockMap 'amap_' //to set the Amat_->rpntr array. // //rpntr[0..M] (where M = number-of-blocks) //rpntr[0] = 0 //rpntr[k+1] - rpntr[k] = size of block k // const int* blkSizes = amap_->getBlockSizes(); Amat_->rpntr = new int[N_update_+1]; Amat_->rpntr[0] = 0; for(int i=0; i<N_update_; i++) { Amat_->rpntr[i+1] = Amat_->rpntr[i] + blkSizes[i]; if (blkSizes[i] < 0) messageAbort("allocate: negative block size."); } }
//============================================================================== void PoissonData::getBottomSharedNodes(int& numShared, GlobalID* sharedNodeIDs, int* numProcsPerSharedNode, int** sharingProcs) { // //This function decides whether any of the nodes along the bottom edge, //including the left node but not the right node, are shared. It also //decides which processors the nodes are shared with. // if (numProcs_ == 1) { numShared = 0; return; } if (procY_ == 0) { //if this proc is on the bottom edge of the square... if (procX_ > 0) { //if this proc is not the bottom left proc... numShared = 1; int elemIndex = 0; elem_->setElemID(elemIDs_[elemIndex]); //now get a pointer to this element's connectivity array and //calculate that connectivity (in place). int size; GlobalID* nodes = elem_->getElemConnPtr(size); if (size == 0) messageAbort(": bad conn ptr."); calculateConnectivity(nodes, size, elemIDs_[elemIndex]); sharedNodeIDs[0] = nodes[0]; //elem's bottom left node is node 0 numProcsPerSharedNode[0] = 2; sharingProcs[0][0] = localProc_; sharingProcs[0][1] = localProc_ - 1; return; } else { //else this proc is the top right proc... numShared = 0; } } else { //else this proc is not on the bottom edge of the square... numShared = int_sqrt(numLocalElements_); int lowerRightElemIndex = int_sqrt(numLocalElements_) - 1; int sqrtElems = int_sqrt(numLocalElements_); int shOffset = 0; for(int i=0; i<sqrtElems; i++){ //stride across the bottom edge of the local elements, from //right to left... int size=0; int elemIndex = lowerRightElemIndex-i; elem_->setElemID(elemIDs_[elemIndex]); //now get a pointer to this element's connectivity array and //calculate that connectivity (in place). GlobalID* nodes = elem_->getElemConnPtr(size); if (size == 0) messageAbort(": bad conn ptr."); calculateConnectivity(nodes, size, elemIDs_[elemIndex]); //now put in the lower left node sharedNodeIDs[shOffset] = nodes[0]; sharingProcs[shOffset][0] = localProc_ - int_sqrt(numProcs_); sharingProcs[shOffset][1] = localProc_; numProcsPerSharedNode[shOffset++] = 2; } if (procX_ > 0) { //if this proc isn't on the left edge, the lower left node (the //last one we put into the shared node list) is shared by 4 procs. shOffset--; numProcsPerSharedNode[shOffset] = 4; sharingProcs[shOffset][2] = localProc_ - 1; sharingProcs[shOffset][3] = sharingProcs[shOffset][0] - 1; } } }
//============================================================================== void AztecDVBR_Matrix::calcIndx(int nnzBlks) { // //This function allocates and fills the Amat_->indx array, which holds info //on the number of entries in each nonzero block. // //indx[0..bpntr[M]], (where M = number of local block rows) //indx[0] = 0 //indx[k+1]-indx[k] = number of entries in nonzero block k // Amat_->indx = new int[nnzBlks+1]; //we need to obtain block sizes for all local nonzero blocks. rpntr //gives us the sizes for the blocks with column indices in the local //update set, but we'll have to do some message passing to obtain the //sizes of blocks with column indices in other procs' update sets. int numProcs = amap_->getProcConfig()[AZ_N_procs]; if (numProcs > 1) { //form a list of the column indices that are not local. calcRemoteInds(remoteInds_, numRemoteBlocks_); //now get sizes of blocks that correspond to remote rows. remoteBlockSizes_ = new int[numRemoteBlocks_]; getRemoteBlkSizes(remoteBlockSizes_, remoteInds_, numRemoteBlocks_); } //now we're ready to set the block sizes in Amat_->indx. int index; Amat_->indx[0] = 0; for(int i=0; i<amap_->getNumLocalBlocks(); i++) { int rowBlkSize = Amat_->rpntr[i+1] - Amat_->rpntr[i]; int colStart = Amat_->bpntr[i]; int colEnd = Amat_->bpntr[i+1] - 1; for(int j=colStart; j<=colEnd; j++) { if (inUpdate(Amat_->bindx[j], index)) { int colBlkSize = Amat_->rpntr[index+1] - Amat_->rpntr[index]; Amat_->indx[j+1] = Amat_->indx[j] + rowBlkSize*colBlkSize; } else { //it's a remoteIndex if (numProcs == 1) { char mesg[80]; sprintf(mesg,"calcIndx: blk col index %d not in update set.", Amat_->bindx[j]); messageAbort(mesg); } index = AZ_find_index(Amat_->bindx[j], remoteInds_, numRemoteBlocks_); if (index >= 0) { Amat_->indx[j+1] = Amat_->indx[j] + rowBlkSize*remoteBlockSizes_[index]; } else { //if it wasn't in update or remoteInds, then panic! messageAbort("calcIndx: block column index not found."); } } } // end for j loop nnzPerRow_[i] = Amat_->indx[colEnd+1] - Amat_->indx[colStart]; } // end for i loop localNNZ_ = Amat_->indx[nnzBlks]; }
//============================================================================== void AztecDVBR_Matrix::readAllocateInfo(FILE* infile, int*& num_nz_blocks, int*& blk_col_inds) { // //This function will read through infile and construct the lists //num_nz_blocks (which is the number of nonzero blocks per row) and //blk_col_inds (which is the block-column indices of those blocks). // //It is assumed that these two lists are empty when this function is //called. int i; if (num_nz_blocks) delete [] num_nz_blocks; if (blk_col_inds) delete [] blk_col_inds; num_nz_blocks = new int[N_update_]; //we'll use a 2-D array for constructing the set of block column indices, //because we need to keep them grouped by rows, and we aren't guaranteed //that they'll be grouped by rows in the file. int totalNumBlks = 0; int** blkColInds = new int*[N_update_]; for(i=0; i<N_update_; i++) { num_nz_blocks[i] = 0; blkColInds[i] = NULL; } int blkRows, blkCols, rows, cols; char line[256]; do { fgets(line,256,infile); } while(strchr(line,'%')); sscanf(line,"%d %d %d %d",&blkRows, &blkCols, &rows, &cols); if ((blkRows != blkCols) || (rows != cols)) messageAbort("readAllocateInfo: non-square matrix not allowed."); int br, bc, pr, pc, index; while (!feof(infile)) { do { fgets(line,256,infile); } while(strchr(line,'%')); if(feof(infile))break; sscanf(line, "%d %d %d %d", &br, &bc, &pr, &pc); if (inUpdate(br, index)) { if ((bc < 0) || bc >= blkCols) { char mesg[80]; sprintf(mesg,"readAllocateInfo: blkCols %d, 0-based col ind %d", blkCols, bc); fclose(infile); messageAbort(mesg); } insertList(bc, blkColInds[index], num_nz_blocks[index]); totalNumBlks++; } } //so we've read the whole file, now flatten the 2-D list blkColInds //into the required 1-D list blk_col_inds. blk_col_inds = new int[totalNumBlks]; int offset = 0; for(i=0; i<N_update_; i++) { for(int j=0; j<num_nz_blocks[i]; j++) { blk_col_inds[offset++] = blkColInds[i][j]; } delete [] blkColInds[i]; } delete [] blkColInds; }
//============================================================================== void AztecDVBR_Matrix::getRemoteBlkSizes(int* remoteBlkSizes, int* remoteInds, int len) { // //remoteInds is a sorted list of indices that correspond to rows //in remote processors' update lists. This function will spread the //indices to all processors so that they can provide the blk sizes, //then spread that information back to all processors. // #ifdef FEI_SER return; #else int numProcs = amap_->getProcConfig()[AZ_N_procs]; int thisProc = amap_->getProcConfig()[AZ_node]; MPI_Comm comm = amap_->getCommunicator(); int* lengths = new int[numProcs]; lengths[0] = 0; //gather up the lengths of the lists that each proc will be sending. MPI_Allgather(&len, 1, MPI_INT, lengths, 1, MPI_INT, comm); //now form a list of the offset at which each proc's contribution will //be placed in the all-gathered list. int* offsets = new int[numProcs]; offsets[0] = 0; int totalLength = lengths[0]; for(int i=1; i<numProcs; i++) { offsets[i] = offsets[i-1] + lengths[i-1]; totalLength += lengths[i]; } //now we can allocate the list to recv into. int* recvBuf = new int[totalLength]; //now we're ready to do the gather. MPI_Allgatherv(remoteInds, len, MPI_INT, recvBuf, lengths, offsets, MPI_INT, comm); //now we'll run through the list and put block sizes into a list of //the same length as the total recvBuf list. int* blkSizes = new int[totalLength]; int index; for(int j=0; j<totalLength; j++) { if (inUpdate(recvBuf[j], index)) { blkSizes[j] = Amat_->rpntr[index+1]-Amat_->rpntr[index]; } else blkSizes[j] = 0; } //now we'll reduce this info back onto all processors. We'll use MPI_SUM. //Since the sizes we did NOT supply hold a 0, and each spot in the list //should only have a nonzero size from 1 processor, the result will be //that each spot in the result list has the correct value. int* recvSizes = new int[totalLength]; MPI_Allreduce(blkSizes, recvSizes, totalLength, MPI_INT, MPI_SUM, comm); //and finally, we just need to run our section of the list of recv'd sizes, //and transfer them into the remoteBlkSizes list. int offset = offsets[thisProc]; for(int k=0; k<len; k++) { remoteBlkSizes[k] = recvSizes[offset + k]; if (recvSizes[offset+k] <= 0) messageAbort("getRemoteBlkSizes: recvd a size <= 0."); } delete [] lengths; delete [] offsets; delete [] recvBuf; delete [] blkSizes; delete [] recvSizes; #endif }