void random_distribution_1D( itype nrows, // Number of global matrix rows Epetra_Comm &comm, // Epetra communicator to be used in maps Epetra_Map **rowMap, // OUTPUT: pointer to row map to be created long long offsetEpetra64 ) { // Randomly assign matrix rows to processor's row Map. int me = comm.MyPID(); int np = comm.NumProc(); vector<itype> myGlobalElements(1.2 * (nrows / np) + 1); int nMyRows = 0; srandom(1); double denom = (double) RAND_MAX + 1.; for (itype i = 0; i < nrows; i++) { int p = (int) ((double) np * (double) random() / denom); if (p == me) { if (nMyRows >= myGlobalElements.size()) myGlobalElements.resize(1.5*myGlobalElements.size()); myGlobalElements[nMyRows] = i + offsetEpetra64; nMyRows++; } } *rowMap = new Epetra_Map(nrows, nMyRows, &myGlobalElements[0], 0, comm); }
bool global_check_for_flag_on_proc_0(const char* flag, int numargs, char** strargs, const Epetra_Comm& comm) { int mypid = comm.MyPID(); int numprocs = comm.NumProc(); int flag_found = 0; if (mypid==0) { for(int i=0; i<numargs; ++i) { if (strargs[i]==0) continue; if (strcmp(flag, strargs[i]) == 0) { flag_found = 1; break; } } } if (numprocs > 1) { comm.Broadcast(&flag_found, 1, 0); } bool return_value = flag_found==1 ? true : false; return( return_value ); }
Teuchos::RCP<const EpetraExt::MultiComm> Stokhos::buildMultiComm(const Epetra_Comm& globalComm, int num_global_stochastic_blocks, int num_spatial_procs) { Teuchos::RCP<const EpetraExt::MultiComm> globalMultiComm; #ifdef HAVE_MPI if (num_spatial_procs == -1) { // By default, use all procs for spatial parallelism //MPI_Comm_size(MPI_COMM_WORLD, &num_spatial_procs); num_spatial_procs = globalComm.NumProc(); } const Epetra_MpiComm& globalMpiComm = dynamic_cast<const Epetra_MpiComm&>(globalComm); globalMultiComm = Teuchos::rcp(new EpetraExt::MultiMpiComm(globalMpiComm.Comm(), num_spatial_procs, num_global_stochastic_blocks, Teuchos::VERB_NONE)); #else globalMultiComm = Teuchos::rcp(new EpetraExt::MultiSerialComm(num_global_stochastic_blocks)); #endif return globalMultiComm; }
Teuchos::RCP<Epetra_CrsMatrix> buildMatrix(int nx, Epetra_Comm & comm) { Epetra_Map map(nx*comm.NumProc(),0,comm); Teuchos::RCP<Epetra_CrsMatrix> mat = Teuchos::rcp(new Epetra_CrsMatrix(Copy,map,3)); int offsets[3] = {-1, 0, 1 }; double values[3] = { -1, 2, -1}; int maxGid = map.MaxAllGID(); for(int lid=0;lid<nx;lid++) { int gid = mat->GRID(lid); int numEntries = 3, offset = 0; int indices[3] = { gid+offsets[0], gid+offsets[1], gid+offsets[2] }; if(gid==0) { // left end point numEntries = 2; offset = 1; } // right end point else if(gid==maxGid) numEntries = 2; // insert rows mat->InsertGlobalValues(gid,numEntries,values+offset,indices+offset); } mat->FillComplete(); return mat; }
void show_matrix(const char *txt, const Epetra_RowMatrix &matrix, const Epetra_Comm &comm) { int me = comm.MyPID(); if (comm.NumProc() > 10){ if (me == 0){ std::cout << txt << std::endl; std::cout << "Printed matrix format only works for 10 or fewer processes" << std::endl; } return; } int numRows = matrix.NumGlobalRows(); int numCols = matrix.NumGlobalCols(); if ((numRows > 200) || (numCols > 500)){ if (me == 0){ std::cerr << txt << std::endl; std::cerr << "show_matrix: problem is too large to display" << std::endl; } return; } int *myA = new int [numRows * numCols]; make_my_A(matrix, myA, comm); printMatrix(txt, myA, NULL, NULL, numRows, numCols, comm); delete [] myA; }
int rebalanceEpetraProblem( RCP<Epetra_Map> &Map, RCP<Epetra_CrsMatrix> &A, RCP<Epetra_MultiVector> &B, RCP<Epetra_MultiVector> &X, Epetra_Comm &Comm ) { // Rebalance linear system across multiple processors. if ( Comm.NumProc() > 1 ) { RCP<Epetra_Map> newMap = rcp( new Epetra_Map( Map->NumGlobalElements(), Map->IndexBase(), Comm ) ); RCP<Epetra_Import> newImport = rcp( new Epetra_Import( *newMap, *Map ) ); // Create rebalanced versions of the linear system. RCP<Epetra_CrsMatrix> newA = rcp( new Epetra_CrsMatrix( BELOSEPETRACOPY, *newMap, 0 ) ); newA->Import( *A, *newImport, Insert ); newA->FillComplete(); RCP<Epetra_MultiVector> newB = rcp( new Epetra_MultiVector( *newMap, B->NumVectors() ) ); newB->Import( *B, *newImport, Insert ); RCP<Epetra_MultiVector> newX = rcp( new Epetra_MultiVector( *newMap, X->NumVectors() ) ); newX->Import( *X, *newImport, Insert ); // Set the pointers to the new rebalance linear system. A = newA; B = newB; X = newX; Map = newMap; } return (0); }
int generateHyprePrintOut(const char *filename, const Epetra_Comm &comm){ int MyPID = comm.MyPID(); int NumProc = comm.NumProc(); int N = 100; int ilower = MyPID * N; int iupper = (MyPID+1)*N-1; double filePID = (double)MyPID/(double)100000; std::ostringstream stream; // Using setprecision() puts it in the std::string stream << std::setiosflags(std::ios::fixed) << std::setprecision(5) << filePID; // Then just ignore the first character std::string fileName(filename); fileName += stream.str().substr(1,7); std::ofstream myfile(fileName.c_str()); if(myfile.is_open()){ myfile << ilower << " " << iupper << " " << ilower << " " << iupper << std::endl; for(int i = ilower; i <= iupper; i++){ for(int j=i-5; j <= i+5; j++){ if(j >= 0 && j < N*NumProc) myfile << i << " " << j << " " << (double)rand()/(double)RAND_MAX << std::endl; } } myfile.close(); return 0; } else { std::cout << "\nERROR:\nCouldn't open file.\n"; return -1; } }
void show_matrix(const char *txt, const Epetra_LinearProblem &problem, const Epetra_Comm &comm) { int me = comm.MyPID(); if (comm.NumProc() > 10){ if (me == 0){ std::cout << txt << std::endl; std::cout << "Printed matrix format only works for 10 or fewer processes" << std::endl; } return; } Epetra_RowMatrix *matrix = problem.GetMatrix(); Epetra_MultiVector *lhs = problem.GetLHS(); Epetra_MultiVector *rhs = problem.GetRHS(); int numRows = matrix->NumGlobalRows(); int numCols = matrix->NumGlobalCols(); if ((numRows > 200) || (numCols > 500)){ if (me == 0){ std::cerr << txt << std::endl; std::cerr << "show_matrix: problem is too large to display" << std::endl; } return; } int *myA = new int [numRows * numCols]; make_my_A(*matrix, myA, comm); int *myX = new int [numCols]; int *myB = new int [numRows]; memset(myX, 0, sizeof(int) * numCols); memset(myB, 0, sizeof(int) * numRows); const Epetra_BlockMap &lhsMap = lhs->Map(); const Epetra_BlockMap &rhsMap = rhs->Map(); int base = lhsMap.IndexBase(); for (int j=0; j < lhsMap.NumMyElements(); j++){ int colGID = lhsMap.GID(j); myX[colGID - base] = me + 1; } for (int i=0; i < rhsMap.NumMyElements(); i++){ int rowGID = rhsMap.GID(i); myB[rowGID - base] = me + 1; } printMatrix(txt, myA, myX, myB, numRows, numCols, comm); delete [] myA; delete [] myX; delete [] myB; }
//============================================================================== Poisson2dOperator::Poisson2dOperator(int nx, int ny, const Epetra_Comm & comm) : nx_(nx), ny_(ny), useTranspose_(false), comm_(comm), map_(0), numImports_(0), importIDs_(0), importMap_(0), importer_(0), importX_(0), Label_(0) { Label_ = "2D Poisson Operator"; int numProc = comm.NumProc(); // Get number of processors int myPID = comm.MyPID(); // My rank if (2*numProc > ny) { // ny must be >= 2*numProc (to avoid degenerate cases) ny = 2*numProc; ny_ = ny; std::cout << " Increasing ny to " << ny << " to avoid degenerate distribution on " << numProc << " processors." << std::endl; } int chunkSize = ny/numProc; int remainder = ny%numProc; if (myPID+1 <= remainder) chunkSize++; // add on remainder myny_ = chunkSize; map_ = new Epetra_Map(-1LL, ((long long)nx)*chunkSize, 0, comm_); if (numProc>1) { // Build import GID list to build import map and importer if (myPID>0) numImports_ += nx; if (myPID+1<numProc) numImports_ += nx; if (numImports_>0) importIDs_ = new long long[numImports_]; long long * ptr = importIDs_; long long minGID = map_->MinMyGID64(); long long maxGID = map_->MaxMyGID64(); if (myPID>0) for (int i=0; i< nx; i++) *ptr++ = minGID - nx + i; if (myPID+1<numProc) for (int i=0; i< nx; i++) *ptr++ = maxGID + i +1; // At the end of the above step importIDs_ will have a list of global IDs that are needed // to compute the matrix multiplication operation on this processor. Now build import map // and importer importMap_ = new Epetra_Map(-1LL, numImports_, importIDs_, 0LL, comm_); importer_ = new Epetra_Import(*importMap_, *map_); } }
int alternate_import_constructor_test(Epetra_Comm& Comm) { int rv=0; int nodes_per_proc=10; int numprocs = Comm.NumProc(); int mypid = Comm.MyPID(); // Only run if we have multiple procs & MPI if(numprocs==0) return 0; #ifndef HAVE_MPI return 0; #endif // Build Map 1 - linear Epetra_Map Map1((long long)-1,nodes_per_proc,(long long)0,Comm); // Build Map 2 - mod striped std::vector<long long> MyGIDs(nodes_per_proc); for(int i=0; i<nodes_per_proc; i++) MyGIDs[i] = (mypid*nodes_per_proc + i) % numprocs; Epetra_Map Map2((long long)-1,nodes_per_proc,&MyGIDs[0],(long long)0,Comm); // For testing Epetra_LongLongVector Source(Map1), Target(Map2); // Build Import 1 - normal Epetra_Import Import1(Map2,Map1); rv = rv|| test_import_gid("Alt test: 2 map constructor",Source,Target, Import1); // Build Import 2 - no-comm constructor int Nremote=Import1.NumRemoteIDs(); const int * RemoteLIDs = Import1.RemoteLIDs(); std::vector<int> RemotePIDs(Nremote+1); // I hate you, stl vector.... std::vector<int> AllPIDs; Epetra_Util::GetPids(Import1,AllPIDs,true); for(int i=0; i<Nremote; i++) { RemotePIDs[i]=AllPIDs[RemoteLIDs[i]]; } Epetra_Import Import2(Import1.TargetMap(),Import1.SourceMap(),Nremote,&RemotePIDs[0],Import1.NumExportIDs(),Import1.ExportLIDs(),Import1.ExportPIDs()); rv = rv || test_import_gid("Alt test: no comm constructor",Source,Target,Import2); // Build Import 3 - Remotes only Epetra_Import Import3(Import1.TargetMap(),Import1.SourceMap(),Nremote,&RemotePIDs[0]); rv = rv || test_import_gid("Alt test: remote only constructor",Source,Target, Import3); return rv; }
//============================================================================== // Epetra_BlockMap constructor function for a Epetra-defined uniform linear distribution of constant size elements. void Epetra_BlockMap::ConstructAutoUniform(long long NumGlobal_Elements, int Element_Size, int Index_Base, const Epetra_Comm& comm, bool IsLongLong) { // Each processor gets roughly numGlobalPoints/p points // This routine automatically defines a linear partitioning of a // map with numGlobalPoints across the processors // specified in the given Epetra_Comm if (NumGlobal_Elements < 0) throw ReportError("NumGlobal_Elements = " + toString(NumGlobal_Elements) + ". Should be >= 0.", -1); if (Element_Size <= 0) throw ReportError("ElementSize = " + toString(Element_Size) + ". Should be > 0.", -2); BlockMapData_ = new Epetra_BlockMapData(NumGlobal_Elements, Element_Size, Index_Base, comm, IsLongLong); int NumProc = comm.NumProc(); BlockMapData_->ConstantElementSize_ = true; BlockMapData_->LinearMap_ = true; int MyPID = comm.MyPID(); if(BlockMapData_->NumGlobalElements_ / NumProc > (long long) std::numeric_limits<int>::max()) throw ReportError("Epetra_BlockMap::ConstructAutoUniform: Error. Not enough space for elements on each processor", -99); BlockMapData_->NumMyElements_ = (int) (BlockMapData_->NumGlobalElements_ / NumProc); int remainder = (int) (BlockMapData_->NumGlobalElements_ % NumProc); // remainder will fit int int start_index = MyPID * (BlockMapData_->NumMyElements_ + 1); if (MyPID < remainder) BlockMapData_->NumMyElements_++; else start_index -= (MyPID - remainder); BlockMapData_->NumGlobalPoints_ = BlockMapData_->NumGlobalElements_ * BlockMapData_->ElementSize_; BlockMapData_->NumMyPoints_ = BlockMapData_->NumMyElements_ * BlockMapData_->ElementSize_; BlockMapData_->MinMyElementSize_ = BlockMapData_->ElementSize_; BlockMapData_->MaxMyElementSize_ = BlockMapData_->ElementSize_; BlockMapData_->MinElementSize_ = BlockMapData_->ElementSize_; BlockMapData_->MaxElementSize_ = BlockMapData_->ElementSize_; BlockMapData_->MinAllGID_ = BlockMapData_->IndexBase_; BlockMapData_->MaxAllGID_ = BlockMapData_->MinAllGID_ + BlockMapData_->NumGlobalElements_ - 1; BlockMapData_->MinMyGID_ = start_index + BlockMapData_->IndexBase_; BlockMapData_->MaxMyGID_ = BlockMapData_->MinMyGID_ + BlockMapData_->NumMyElements_ - 1; BlockMapData_->DistributedGlobal_ = IsDistributedGlobal(BlockMapData_->NumGlobalElements_, BlockMapData_->NumMyElements_); EndOfConstructorOps(); }
void build_maps( itype nrows, // Number of global matrix rows bool testEpetra64,// Flag indicating whether to adjust global row/column // indices to exercise Epetra64 capability. Epetra_Comm &comm, // Epetra communicator to be used in maps Epetra_Map **vectorMap, // OUTPUT: Map to be used for the vector Epetra_Map **rowMap, // OUTPUT: Map to be used for the matrix rows Epetra_Map **colMap, // OUTPUT: Map to be used for the matrix cols long long &offsetEpetra64, // OUTPUT for testing Epetra64: add offsetEpetra64 // to all row/column indices. bool verbose // print out generated maps ) { // Function to build the maps for 1D or 2D matrix distribution. // Output for 1D includes rowMap and NULL colMap and vectorMap. // Output for 2D includes rowMap, colMap and vectorMap. int me = comm.MyPID(); int np = comm.NumProc(); *rowMap = NULL; *colMap = NULL; *vectorMap = NULL; // offsetEpetra64 = (testEpetra64 ? (long long) INT_MAX - (long long) 5 : 0); offsetEpetra64 = (testEpetra64 ? (long long) 2 * INT_MAX : 0); // Generate 1D row-based decomposition. if ((me == 0) && verbose) cout << endl << "1D Distribution: " << endl << " np = " << np << endl; // Linear map similar to Trilinos default. itype nMyRows = nrows / np + (nrows % np > me); itype myFirstRow = me * (nrows / np) + MIN(nrows % np, me); itype *myGlobalRows = new itype[nMyRows]; for (itype i = 0; i < nMyRows; i++) myGlobalRows[i] = i + myFirstRow + offsetEpetra64; *rowMap = new Epetra_Map(nrows, nMyRows, &myGlobalRows[0], 0, comm); delete [] myGlobalRows; }
int rectangular(const Epetra_Comm& Comm, bool verbose) { int mypid = Comm.MyPID(); int numlocalrows = 3; Epetra_Map rowmap((long long) -1, numlocalrows, 0, Comm); long long numglobalrows = numlocalrows*Comm.NumProc(); long long numcols = 2*numglobalrows; Epetra_FECrsGraph fegraph(Copy, rowmap, numcols); long long* cols = new long long[numcols]; for(int j=0; j<numcols; ++j) cols[j] = j; Epetra_Map domainmap((long long) -1, numcols, 0, Comm); long long firstlocalrow = numlocalrows*mypid; long long lastlocalrow = numlocalrows*(mypid+1)-1; for(long long i=0; i<numglobalrows; ++i) { //if i is a local row, then skip it. We want each processor to only //load rows that belong on other processors. if (i >= firstlocalrow && i <= lastlocalrow) continue; EPETRA_CHK_ERR( fegraph.InsertGlobalIndices(1, &i, numcols, &(cols[0])) ); } EPETRA_CHK_ERR( fegraph.GlobalAssemble(domainmap, rowmap) ); if (verbose) { std::cout << "********************** fegraph **********************" << std::endl; std::cout << fegraph << std::endl; } delete [] cols; return(0); }
void MPIWrapper::allGatherCompact(const Epetra_Comm &Comm, FieldContainer<Scalar> &gatheredValues, FieldContainer<Scalar> &myValues, FieldContainer<int> &offsets) { int mySize = myValues.size(); int totalSize; Comm.SumAll(&mySize, &totalSize, 1); int myOffset = 0; Comm.ScanSum(&mySize,&myOffset,1); myOffset -= mySize; gatheredValues.resize(totalSize); for (int i=0; i<mySize; i++) { gatheredValues[myOffset+i] = myValues[i]; } MPIWrapper::entryWiseSum(Comm, gatheredValues); offsets.resize(Comm.NumProc()); offsets[Comm.MyPID()] = myOffset; MPIWrapper::entryWiseSum(Comm, offsets); }
//============================================================================ void Ifpack_BreakForDebugger(Epetra_Comm& Comm) { char hostname[80]; char buf[80]; if (Comm.MyPID() == 0) cout << "Host and Process Ids for tasks" << endl; for (int i = 0; i <Comm.NumProc() ; i++) { if (i == Comm.MyPID() ) { #if defined(TFLOP) || defined(JANUS_STLPORT) sprintf(buf, "Host: %s PID: %d", "janus", getpid()); #elif defined(_WIN32) sprintf(buf,"Windows compiler, unknown hostname and PID!"); #else gethostname(hostname, sizeof(hostname)); sprintf(buf, "Host: %s\tComm.MyPID(): %d\tPID: %d", hostname, Comm.MyPID(), getpid()); #endif printf("%s\n",buf); fflush(stdout); #if !( defined(_WIN32) ) sleep(1); #endif } } if(Comm.MyPID() == 0) { printf("\n"); printf("** Pausing to attach debugger...\n"); printf("** You may now attach debugger to the processes listed above.\n"); printf( "**\n"); printf( "** Enter a character to continue > "); fflush(stdout); char go; scanf("%c",&go); } Comm.Barrier(); }
void Trilinos_Util_ReadHpc2Epetra(char *data_file, const Epetra_Comm &comm, Epetra_Map *& map, Epetra_CrsMatrix *& A, Epetra_Vector *& x, Epetra_Vector *& b, Epetra_Vector *&xexact) { FILE *in_file ; int l; int * lp = &l; double v; double * vp = &v; #ifdef DEBUG bool debug = true; #else bool debug = false; #endif int size = comm.NumProc(); int rank = comm.MyPID(); printf("Reading matrix info from %s...\n",data_file); in_file = fopen( data_file, "r"); if (in_file == NULL) { printf("Error: Cannot open file: %s\n",data_file); exit(1); } int numGlobalEquations, total_nnz; fscanf(in_file,"%d",&numGlobalEquations); fscanf(in_file,"%d",&total_nnz); map = new Epetra_Map(numGlobalEquations, 0, comm); // Create map with uniform distribution A = new Epetra_CrsMatrix(Copy, *map, 0); // Construct matrix x = new Epetra_Vector(*map); b = new Epetra_Vector(*map); xexact = new Epetra_Vector(*map); int numMyEquations = map->NumMyPoints(); // Allocate arrays that are of length numMyEquations // Find max nnz per row for this processor int max_nnz = 0; for (int i=0; i<numGlobalEquations; i++) { fscanf(in_file, "%d",lp); /* row #, nnz in row */ if (map->MyGID(i)) max_nnz = EPETRA_MAX(max_nnz,l); } // Allocate arrays that are of length local_nnz double * list_of_vals = new double[max_nnz]; int *list_of_inds = new int [max_nnz]; {for (int i=0; i<numGlobalEquations; i++) { int cur_nnz; fscanf(in_file, "%d",&cur_nnz); if (map->MyGID(i)) // See if nnz for row should be added { if (debug) cout << "Process "<<rank <<" of "<<size<<" getting row "<<i<<endl; int nnz_kept = 0; for (int j=0; j<cur_nnz; j++) { fscanf(in_file, "%lf %d",vp,lp); if (v!=0.0) { list_of_vals[nnz_kept] = v; list_of_inds[nnz_kept] = l; nnz_kept++; } } A->InsertGlobalValues(i, nnz_kept, list_of_vals, list_of_inds); } else for (int j=0; j<cur_nnz; j++) fscanf(in_file, "%lf %d",vp,lp); // otherwise read and discard }} double xt, bt, xxt; {for (int i=0; i<numGlobalEquations; i++) { if (map->MyGID(i)) // See if entry should be added { if (debug) cout << "Process "<<rank<<" of " <<size<<" getting RHS "<<i<<endl; fscanf(in_file, "%lf %lf %lf",&xt, &bt, &xxt); int cur_local_row = map->LID(i); (*x)[cur_local_row] = xt; (*b)[cur_local_row] = bt; (*xexact)[cur_local_row] = xxt; } else fscanf(in_file, "%lf %lf %lf",vp, vp, vp); // or thrown away }} fclose(in_file); if (debug) cout << "Process "<<rank<<" of "<<size<<" has "<<numMyEquations << " rows. Min global row "<< map->MinMyGID() <<" Max global row "<< map->MaxMyGID() <<endl <<" and "<<A->NumMyNonzeros()<<" nonzeros."<<endl; A->FillComplete(); Epetra_Vector bcomp(*map); A->Multiply(false, *xexact, bcomp); double residual; bcomp.Norm2(&residual); if (comm.MyPID()==0) cout << "Norm of computed b = " << residual << endl; b->Norm2(&residual); if (comm.MyPID()==0) cout << "Norm of given b = " << residual << endl; bcomp.Update(-1.0, *b, 1.0); bcomp.Norm2(&residual); if (comm.MyPID()==0) cout << "Norm of difference between computed b and given b for xexact = " << residual << endl; delete [] list_of_vals; delete []list_of_inds; return; }
int checkmap(Epetra_Map & Map, int NumGlobalElements, int NumMyElements, int *MyGlobalElements, int IndexBase, Epetra_Comm& Comm, bool DistributedGlobal) { int i, ierr=0, forierr = 0; EPETRA_TEST_ERR(!Map.ConstantElementSize(),ierr); EPETRA_TEST_ERR(DistributedGlobal!=Map.DistributedGlobal(),ierr); EPETRA_TEST_ERR(Map.ElementSize()!=1,ierr); int *MyElementSizeList = new int[NumMyElements]; EPETRA_TEST_ERR(Map.ElementSizeList(MyElementSizeList)!=0,ierr); forierr = 0; for (i=0; i<NumMyElements; i++) forierr += MyElementSizeList[i]!=1; EPETRA_TEST_ERR(forierr,ierr); delete [] MyElementSizeList; const Epetra_Comm & Comm1 = Map.Comm(); EPETRA_TEST_ERR(Comm1.NumProc()!=Comm.NumProc(),ierr); EPETRA_TEST_ERR(Comm1.MyPID()!=Comm.MyPID(),ierr); EPETRA_TEST_ERR(Map.IndexBase()!=IndexBase,ierr); EPETRA_TEST_ERR(!Map.LinearMap() && MyGlobalElements==0,ierr); EPETRA_TEST_ERR(Map.LinearMap() && MyGlobalElements!=0,ierr); EPETRA_TEST_ERR(Map.MaxAllGID()!=NumGlobalElements-1+IndexBase,ierr); EPETRA_TEST_ERR(Map.MaxElementSize()!=1,ierr); int MaxLID = Map.MaxLID(); EPETRA_TEST_ERR(MaxLID!=NumMyElements-1,ierr); int MaxMyGID = (Comm.MyPID()+1)*NumMyElements-1+IndexBase; if (Comm.MyPID()>2) MaxMyGID+=3; if (!DistributedGlobal) MaxMyGID = NumMyElements-1+IndexBase; EPETRA_TEST_ERR(Map.MaxMyGID()!=MaxMyGID,ierr); EPETRA_TEST_ERR(Map.MinAllGID()!=IndexBase,ierr); EPETRA_TEST_ERR(Map.MinElementSize()!=1,ierr); EPETRA_TEST_ERR(Map.MinLID()!=0,ierr); int MinMyGID = Comm.MyPID()*NumMyElements+IndexBase; if (Comm.MyPID()>2) MinMyGID+=3; if (!DistributedGlobal) MinMyGID = 0; EPETRA_TEST_ERR(Map.MinMyGID()!=MinMyGID,ierr); int * MyGlobalElements1 = new int[NumMyElements]; EPETRA_TEST_ERR(Map.MyGlobalElements(MyGlobalElements1)!=0,ierr); forierr = 0; if (MyGlobalElements==0) { for (i=0; i<NumMyElements; i++) forierr += MyGlobalElements1[i]!=MinMyGID+i; EPETRA_TEST_ERR(forierr,ierr); } else { for (i=0; i<NumMyElements; i++) forierr += MyGlobalElements[i]!=MyGlobalElements1[i]; EPETRA_TEST_ERR(forierr,ierr); } EPETRA_TEST_ERR(Map.NumGlobalElements()!=NumGlobalElements,ierr); EPETRA_TEST_ERR(Map.NumGlobalPoints()!=NumGlobalElements,ierr); EPETRA_TEST_ERR(Map.NumMyElements()!=NumMyElements,ierr); EPETRA_TEST_ERR(Map.NumMyPoints()!=NumMyElements,ierr); int MaxMyGID2 = Map.GID(Map.LID(MaxMyGID)); EPETRA_TEST_ERR(MaxMyGID2 != MaxMyGID,ierr); int MaxLID2 = Map.LID(Map.GID(MaxLID)); EPETRA_TEST_ERR(MaxLID2 != MaxLID,ierr); EPETRA_TEST_ERR(Map.GID(MaxLID+1) != IndexBase-1,ierr);// MaxLID+1 doesn't exist EPETRA_TEST_ERR(Map.LID(MaxMyGID+1) != -1,ierr);// MaxMyGID+1 doesn't exist or is on a different processor EPETRA_TEST_ERR(!Map.MyGID(MaxMyGID),ierr); EPETRA_TEST_ERR(Map.MyGID(MaxMyGID+1),ierr); EPETRA_TEST_ERR(!Map.MyLID(MaxLID),ierr); EPETRA_TEST_ERR(Map.MyLID(MaxLID+1),ierr); EPETRA_TEST_ERR(!Map.MyGID(Map.GID(MaxLID)),ierr); EPETRA_TEST_ERR(Map.MyGID(Map.GID(MaxLID+1)),ierr); EPETRA_TEST_ERR(!Map.MyLID(Map.LID(MaxMyGID)),ierr); EPETRA_TEST_ERR(Map.MyLID(Map.LID(MaxMyGID+1)),ierr); // Check RemoteIDList function // Get some GIDs off of each processor to test int TotalNumEle, NumElePerProc, NumProc = Comm.NumProc(); int MinNumEleOnProc; int NumMyEle=Map.NumMyElements(); Comm.MinAll(&NumMyEle,&MinNumEleOnProc,1); if (MinNumEleOnProc > 5) NumElePerProc = 6; else NumElePerProc = MinNumEleOnProc; if (NumElePerProc > 0) { TotalNumEle = NumElePerProc*NumProc; int * MyGIDlist = new int[NumElePerProc]; int * GIDlist = new int[TotalNumEle]; int * PIDlist = new int[TotalNumEle]; int * LIDlist = new int[TotalNumEle]; for (i=0; i<NumElePerProc; i++) MyGIDlist[i] = MyGlobalElements1[i]; Comm.GatherAll(MyGIDlist,GIDlist,NumElePerProc);// Get a few values from each proc Map.RemoteIDList(TotalNumEle, GIDlist, PIDlist, LIDlist); int MyPID= Comm.MyPID(); forierr = 0; for (i=0; i<TotalNumEle; i++) { if (Map.MyGID(GIDlist[i])) { forierr += PIDlist[i] != MyPID; forierr += !Map.MyLID(Map.LID(GIDlist[i])) || Map.LID(GIDlist[i]) != LIDlist[i] || Map.GID(LIDlist[i]) != GIDlist[i]; } else { forierr += PIDlist[i] == MyPID; // If MyGID comes back false, the PID listed should be that of another proc } } EPETRA_TEST_ERR(forierr,ierr); delete [] MyGIDlist; delete [] GIDlist; delete [] PIDlist; delete [] LIDlist; } delete [] MyGlobalElements1; // Check RemoteIDList function (assumes all maps are linear, even if not stored that way) if (Map.LinearMap()) { int * GIDList = new int[3]; int * PIDList = new int[3]; int * LIDList = new int[3]; int MyPID = Map.Comm().MyPID(); int NumIDs = 0; //GIDList[NumIDs++] = Map.MaxAllGID()+1; // Should return -1 for both PID and LID if (Map.MinMyGID()-1>=Map.MinAllGID()) GIDList[NumIDs++] = Map.MinMyGID()-1; if (Map.MaxMyGID()+1<=Map.MaxAllGID()) GIDList[NumIDs++] = Map.MaxMyGID()+1; Map.RemoteIDList(NumIDs, GIDList, PIDList, LIDList); NumIDs = 0; //EPETRA_TEST_ERR(!(PIDList[NumIDs]==-1),ierr); //EPETRA_TEST_ERR(!(LIDList[NumIDs++]==-1),ierr); if (Map.MinMyGID()-1>=Map.MinAllGID()) EPETRA_TEST_ERR(!(PIDList[NumIDs++]==MyPID-1),ierr); if (Map.MaxMyGID()+1<=Map.MaxAllGID()) EPETRA_TEST_ERR(!(PIDList[NumIDs]==MyPID+1),ierr); if (Map.MaxMyGID()+1<=Map.MaxAllGID()) EPETRA_TEST_ERR(!(LIDList[NumIDs++]==0),ierr); delete [] GIDList; delete [] PIDList; delete [] LIDList; } return (ierr); }
// =========================================================================== void Galeri::grid::Generator:: getSquare(Epetra_Comm& comm, const int numGlobalElementsX, const int numGlobalElementsY, const int numDomainsX, const int numDomainsY, Galeri::grid::Loadable& domain, Galeri::grid::Loadable& boundary, const string what) { TEUCHOS_TEST_FOR_EXCEPTION(numDomainsX * numDomainsY != comm.NumProc(), std::logic_error, "the number of processor should equal numDomainsX * numDomainsY" << ", now numProcs = " << comm.NumProc() << " and numDomainsX * numDomainsY = " << numDomainsX * numDomainsY); TEUCHOS_TEST_FOR_EXCEPTION(numGlobalElementsX % numDomainsX != 0, std::logic_error, "numGlobalElementsX must be a multiple of numDomainsX"); TEUCHOS_TEST_FOR_EXCEPTION(numGlobalElementsY % numDomainsY != 0, std::logic_error, "numGlobalElementsY must be a multiple of numDomainsY"); double lx = 1.0; double ly = 1.0; // these are the global number of elements and vertices int numGlobalElements = numGlobalElementsX * numGlobalElementsY; if (what == "Triangle") numGlobalElements *= 2; int numGlobalVertices = (numGlobalElementsX + 1) * (numGlobalElementsY + 1); int numGlobalVerticesX = numGlobalElementsX + 1; int numGlobalVerticesY = numGlobalElementsY + 1; // these are the mesh sizes, hx and hy double deltax = lx / numGlobalElementsX; double deltay = ly / numGlobalElementsY; // (px, py) are the coordinates of this processor. int px = comm.MyPID() % numDomainsX; int py = comm.MyPID() / numDomainsX; // (numMyElementsX, numMyElementsY) are the number of elements // in the square assigned to this processor, and // (numMyVerticesX, numMyVerticesY) the number of vertices. int numMyElementsX = numGlobalElementsX / numDomainsX; int numMyElementsY = numGlobalElementsY / numDomainsY; int numMyVerticesX = numMyElementsX + 1; int numMyVerticesY = numMyElementsY + 1; // (sx, sy) are the coordinates of the first element of this processor. int sx = px * numMyElementsX; int sy = py * numMyElementsY; // and these are the number of vertices and elements assigned // to this processor. int numMyElements = numMyElementsX * numMyElementsY; if (what == "Triangle") numMyElements *= 2; int numMyVertices = (numMyElementsX + 1) * (numMyElementsY + 1); Triangle triangle; domain.initialize(comm, numGlobalElements, numMyElements, triangle); int elementOffset = numMyElements * comm.MyPID(); int vertexOffset = px * numMyElementsX + py * numMyElementsY * numGlobalVerticesX; int count = 0; if (what == "Triangle") { for (int iy = 0; iy < numMyElementsY; ++iy) { for (int ix = 0; ix < numMyElementsX; ++ix) { int GEID = elementOffset + count++; int GVID = vertexOffset + ix + iy * numGlobalVerticesX; domain.setGlobalConnectivity(GEID, 0, GVID); domain.setGlobalConnectivity(GEID, 1, GVID + 1); domain.setGlobalConnectivity(GEID, 2, GVID + 2 + numGlobalElementsX); GEID = elementOffset + count++; domain.setGlobalConnectivity(GEID, 0, GVID + 2 + numGlobalElementsX); domain.setGlobalConnectivity(GEID, 1, GVID + 1 + numGlobalElementsX); domain.setGlobalConnectivity(GEID, 2, GVID); } } } else { for (int iy = 0; iy < numMyElementsY; ++iy) { for (int ix = 0; ix < numMyElementsX; ++ix) { int GEID = elementOffset + count++; int GVID = vertexOffset + ix + iy * numGlobalVerticesX; domain.setGlobalConnectivity(GEID, 0, GVID); domain.setGlobalConnectivity(GEID, 1, GVID + 1); domain.setGlobalConnectivity(GEID, 2, GVID + 2 + numGlobalElementsX); domain.setGlobalConnectivity(GEID, 3, GVID + 1 + numGlobalElementsX); } } } domain.freezeConnectivity(); for (int iy = 0; iy < numMyVerticesY; ++iy) { for (int ix = 0; ix < numMyVerticesX; ++ix) { int GVID = vertexOffset + ix + iy * numGlobalVerticesX; domain.setGlobalCoordinates(GVID, 0, (sx + ix) * deltax); domain.setGlobalCoordinates(GVID, 1, (sy + iy) * deltay); } } domain.freezeCoordinates(); // now build boundary faces int numMyBoundaries = 0; if (py == 0) numMyBoundaries += numMyElementsX; if (py == numDomainsY - 1) numMyBoundaries += numMyElementsX; if (px == 0) numMyBoundaries += numMyElementsY; if (px == numDomainsX - 1) numMyBoundaries += numMyElementsY; int pos = 0; vector<int> list(numMyBoundaries); if (py == 0) { int offset = px * numMyElementsX; for (int i = 0; i < numMyElementsX; ++i) list[pos++] = offset + i; } if (px == numDomainsX - 1) { int offset = numGlobalElementsX + py * numMyElementsY; for (int i = 0; i < numMyElementsY; ++i) list[pos++] = offset + i; } if (py == numDomainsY - 1) { int offset = numGlobalElementsX + numGlobalElementsY + px * numMyElementsX; for (int i = 0; i < numMyElementsX; ++i) list[pos++] = offset + i; } if (px == 0) { int offset = 2 * numGlobalElementsX + numGlobalElementsY + py * numMyElementsY; for (int i = 0; i < numMyElementsY; ++i) list[pos++] = offset + i; } TEUCHOS_TEST_FOR_EXCEPTION(pos != numMyBoundaries, std::logic_error, "internal error in boundary list definition, " << pos << " vs. " << numMyBoundaries); Segment segment; boundary.initialize(comm, -1, numMyBoundaries, segment, &list[0]); // now insert the actual vertices in the grid if (py == 0) { int offset = px * numMyElementsX; for (int i = 0; i < numMyElementsX; ++i) { boundary.setGlobalConnectivity(offset + i, 0, offset + i); boundary.setGlobalConnectivity(offset + i, 1, offset + i + 1); } } if (px == numDomainsX - 1) { int offset = numGlobalVerticesX * py * numMyElementsY + numGlobalElementsX; int offset2 = numGlobalElementsX + py * numMyElementsY; for (int i = 0; i < numMyElementsY; ++i) { boundary.setGlobalConnectivity(offset2 + i, 0, offset + i * numGlobalVerticesX); boundary.setGlobalConnectivity(offset2 + i, 1, offset + (i + 1) * numGlobalVerticesX); } } if (py == numDomainsY - 1) { int offset = numGlobalVerticesX * numGlobalElementsY + px * numMyElementsX; int offset2 = numGlobalElementsX + numGlobalElementsY + px * numMyElementsX; for (int i = 0; i < numMyElementsX; ++i) { boundary.setGlobalConnectivity(offset2 + i, 0, offset + i); boundary.setGlobalConnectivity(offset2 + i, 1, offset + i + 1); } } if (px == 0) { int offset = numGlobalVerticesX * py * numMyElementsY; int offset2 = 2 * numGlobalElementsX + numGlobalElementsY + py * numMyElementsY; for (int i = 0; i < numMyElementsY; ++i) { boundary.setGlobalConnectivity(offset2 + i, 0, offset + i * numGlobalVerticesX); boundary.setGlobalConnectivity(offset2 + i, 1, offset + (i + 1) * numGlobalVerticesX); } } boundary.freezeConnectivity(); if (py == 0) { int offset = px * numMyElementsX + 1; for (int i = 0; i < numMyElementsX + 1; ++i) { boundary.setGlobalCoordinates(offset + i, 0, deltax * (offset + i)); boundary.setGlobalCoordinates(offset + i, 1, 0.0); } } if (px == numDomainsX - 1) { int offset = numGlobalVerticesX + py * numMyElementsY - 1; int offset2 = px * numMyElementsX; for (int i = 0; i < numMyElementsY + 1; ++i) { boundary.setGlobalCoordinates(offset + i * numGlobalVerticesX, 0, lx); boundary.setGlobalCoordinates(offset + i * numGlobalVerticesX, 1, deltay * (offset2 + i)); } } if (py == numDomainsY - 1) { int offset = px * numMyElementsX; int offset2 = numGlobalVerticesX * numGlobalElementsY + px * numMyElementsX; for (int i = 0; i < numMyElementsX + 1; ++i) { boundary.setGlobalCoordinates(offset2 + i, 0, deltax * (offset + i)); boundary.setGlobalCoordinates(offset2 + i, 1, ly); } } if (px == 0) { int offset = numGlobalVerticesX * py * numMyElementsY; int offset2 = py * numMyElementsX; for (int i = 0; i < numMyElementsY + 1; ++i) { boundary.setGlobalCoordinates(offset + i * numGlobalVerticesX, 0, 0.0); boundary.setGlobalCoordinates(offset + i * numGlobalVerticesX, 1, deltay * (offset2 + i)); } } boundary.freezeCoordinates(); }
void random_distribution_2D( int mypRow, // processor's row (in 2D) int mypCol, // processor's col (in 2D) int npRows, // number of processor rows int npCols, // number of processor cols itype nrows, // Number of global matrix rows Epetra_Comm &comm, // Epetra communicator to be used in maps Epetra_Map **vectorMap, // OUTPUT: Map to be used for the vector Epetra_Map **rowMap, // OUTPUT: Map to be used for the matrix rows Epetra_Map **colMap, // OUTPUT: Map to be used for the matrix cols long long offsetEpetra64 ) { // Randomly assign matrix rows to processor's vector Map. // Build appropriate GlobalElements lists for row and column maps at the // same time. int me = comm.MyPID(); int np = comm.NumProc(); int nMyEntries = 0; int nMyRows = 0; int nMyCols = 0; vector<itype> myGlobalElements(1.2 * nrows / np + 1); vector<itype> myGlobalRowElements(1.2 * nrows / npRows + 1); vector<itype> myGlobalColElements(1.2 * nrows / npCols + 1); srandom(1); double denom = (double) RAND_MAX + 1.; for (itype i = 0; i < nrows; i++) { // Compute rank to receive the vector entry i int p = (int) ((double) np * (double) random() / denom); if (p == me) { // Add entry i to my vector map if (nMyEntries >= myGlobalElements.size()) myGlobalElements.resize(1.5*myGlobalElements.size()); myGlobalElements[nMyEntries] = i + offsetEpetra64; nMyEntries++; } if (mypRow == TWODPROW(p, npRows, npCols)) { // Add entry i to my row map if (nMyRows >= myGlobalRowElements.size()) myGlobalRowElements.resize(1.5*myGlobalRowElements.size()); myGlobalRowElements[nMyRows] = i + offsetEpetra64; nMyRows++; } if (mypCol == TWODPCOL(p, npRows, npCols)) { // Add entry i to my col map if (nMyCols >= myGlobalColElements.size()) myGlobalColElements.resize(1.5*myGlobalColElements.size()); myGlobalColElements[nMyCols] = i + offsetEpetra64; nMyCols++; } } *vectorMap = new Epetra_Map(-1, nMyEntries, &myGlobalElements[0], 0, comm); *rowMap = new Epetra_Map(-1, nMyRows, &myGlobalRowElements[0], 0, comm); *colMap = new Epetra_Map(-1, nMyCols, &myGlobalColElements[0], 0, comm); }
int four_quads(const Epetra_Comm& Comm, bool preconstruct_graph, bool verbose) { if (verbose) { cout << "******************* four_quads ***********************"<<endl; } //This function assembles a matrix representing a finite-element mesh //of four 2-D quad elements. There are 9 nodes in the problem. The //same problem is assembled no matter how many processors are being used //(within reason). It may not work if more than 9 processors are used. // // *------*------* // 6| 7| 8| // | E2 | E3 | // *------*------* // 3| 4| 5| // | E0 | E1 | // *------*------* // 0 1 2 // //Nodes are denoted by * with node-numbers below and left of each node. //E0, E1 and so on are element-numbers. // //Each processor will contribute a sub-matrix of size 4x4, filled with 1's, //for each element. Thus, the coefficient value at position 0,0 should end up //being 1.0*numProcs, the value at position 4,4 should be 1.0*4*numProcs, etc. // //Depending on the number of processors being used, the locations of the //specific matrix positions (in terms of which processor owns them) will vary. // int numProcs = Comm.NumProc(); int numNodes = 9; int numElems = 4; int numNodesPerElem = 4; int blockSize = 1; int indexBase = 0; //Create a map using epetra-defined linear distribution. Epetra_BlockMap map(numNodes, blockSize, indexBase, Comm); Epetra_CrsGraph* graph = NULL; int* nodes = new int[numNodesPerElem]; int i, j, k, err = 0; if (preconstruct_graph) { graph = new Epetra_CrsGraph(Copy, map, 1); //we're going to fill the graph with indices, but remember it will only //accept indices in rows for which map.MyGID(row) is true. for(i=0; i<numElems; ++i) { switch(i) { case 0: nodes[0] = 0; nodes[1] = 1; nodes[2] = 4; nodes[3] = 3; break; case 1: nodes[0] = 1; nodes[1] = 2; nodes[2] = 5; nodes[3] = 4; break; case 2: nodes[0] = 3; nodes[1] = 4; nodes[2] = 7; nodes[3] = 6; break; case 3: nodes[0] = 4; nodes[1] = 5; nodes[2] = 8; nodes[3] = 7; break; } for(j=0; j<numNodesPerElem; ++j) { if (map.MyGID(nodes[j])) { err = graph->InsertGlobalIndices(nodes[j], numNodesPerElem, nodes); if (err<0) return(err); } } } EPETRA_CHK_ERR( graph->FillComplete() ); } Epetra_FEVbrMatrix* A = NULL; if (preconstruct_graph) { A = new Epetra_FEVbrMatrix(Copy, *graph); } else { A = new Epetra_FEVbrMatrix(Copy, map, 1); } //EPETRA_CHK_ERR( A->PutScalar(0.0) ); double* values_1d = new double[numNodesPerElem*numNodesPerElem]; double** values_2d = new double*[numNodesPerElem]; for(i=0; i<numNodesPerElem*numNodesPerElem; ++i) values_1d[i] = 1.0; int offset = 0; for(i=0; i<numNodesPerElem; ++i) { values_2d[i] = &(values_1d[offset]); offset += numNodesPerElem; } for(i=0; i<numElems; ++i) { switch(i) { case 0: nodes[0] = 0; nodes[1] = 1; nodes[2] = 4; nodes[3] = 3; break; case 1: nodes[0] = 1; nodes[1] = 2; nodes[2] = 5; nodes[3] = 4; break; case 2: nodes[0] = 3; nodes[1] = 4; nodes[2] = 7; nodes[3] = 6; break; case 3: nodes[0] = 4; nodes[1] = 5; nodes[2] = 8; nodes[3] = 7; break; } for(j=0; j<numNodesPerElem; ++j) { if (preconstruct_graph) { err = A->BeginSumIntoGlobalValues(nodes[j], numNodesPerElem, nodes); if (err<0) return(err); } else { err = A->BeginInsertGlobalValues(nodes[j], numNodesPerElem, nodes); if (err<0) return(err); } for(k=0; k<numNodesPerElem; ++k) { err = A->SubmitBlockEntry(values_1d, blockSize, blockSize, blockSize); if (err<0) return(err); } err = A->EndSubmitEntries(); if (err<0) return(err); } } EPETRA_CHK_ERR( A->GlobalAssemble() ); Epetra_FEVbrMatrix* Acopy = new Epetra_FEVbrMatrix(*A); if (verbose) { cout << "A:"<<*A << endl; cout << "Acopy:"<<*Acopy<<endl; } Epetra_Vector x(A->RowMap()), y(A->RowMap()); x.PutScalar(1.0); y.PutScalar(0.0); Epetra_Vector x2(Acopy->RowMap()), y2(Acopy->RowMap()); x2.PutScalar(1.0); y2.PutScalar(0.0); A->Multiply(false, x, y); Acopy->Multiply(false, x2, y2); double ynorm2, y2norm2; y.Norm2(&ynorm2); y2.Norm2(&y2norm2); if (ynorm2 != y2norm2) { cerr << "norm2(A*ones) != norm2(*Acopy*ones)"<<endl; return(-99); } Epetra_FEVbrMatrix* Acopy2 = new Epetra_FEVbrMatrix(Copy, A->RowMap(), A->ColMap(), 1); *Acopy2 = *Acopy; Epetra_Vector x3(Acopy->RowMap()), y3(Acopy->RowMap()); x3.PutScalar(1.0); y3.PutScalar(0.0); Acopy2->Multiply(false, x3, y3); double y3norm2; y3.Norm2(&y3norm2); if (y3norm2 != y2norm2) { cerr << "norm2(Acopy*ones) != norm2(Acopy2*ones)"<<endl; return(-999); } int len = 20; int* indices = new int[len]; double* values = new double[len]; int numIndices; if (map.MyGID(0)) { int lid = map.LID(0); EPETRA_CHK_ERR( A->ExtractMyRowCopy(lid, len, numIndices, values, indices) ); if (numIndices != 4) { return(-1); } if (indices[0] != lid) { return(-2); } if (values[0] != 1.0*numProcs) { cout << "ERROR: values[0] ("<<values[0]<<") should be "<<numProcs<<endl; return(-3); } } if (map.MyGID(4)) { int lid = map.LID(4); EPETRA_CHK_ERR( A->ExtractMyRowCopy(lid, len, numIndices, values, indices) ); if (numIndices != 9) { return(-4); } int lcid = A->LCID(4); // if (indices[lcid] != 4) { // cout << "ERROR: indices[4] ("<<indices[4]<<") should be " // <<A->LCID(4)<<endl; // return(-5); // } if (values[lcid] != 4.0*numProcs) { cout << "ERROR: values["<<lcid<<"] ("<<values[lcid]<<") should be " <<4*numProcs<<endl; return(-6); } } delete [] values_2d; delete [] values_1d; delete [] nodes; delete [] indices; delete [] values; delete A; delete Acopy2; delete Acopy; delete graph; return(0); }
//------------------------------------------------------------------------------ int check_colpermute_crsgraph(Epetra_Comm& Comm, bool verbose) { int MyPID = Comm.MyPID(); int NumProc = Comm.NumProc(); Comm.Barrier(); bool verbose1 = verbose; if (verbose) verbose = (MyPID==0); if (verbose) { cerr << "================check_colpermute_crsgraph==========" <<endl; } int NumMyElements = 5; int NumGlobalElements = NumMyElements*NumProc; Epetra_Map Map(NumGlobalElements, NumMyElements, 0, Comm); int* p = new int[NumMyElements]; int firstGlobalRow = MyPID*NumMyElements; if (verbose) { cout << "Permutation P:"<<endl; } int i; for(i=0; i<NumMyElements; ++i) { int row = firstGlobalRow+i; p[i] = NumGlobalElements - row - 1; if (verbose1) { cout << "p["<<firstGlobalRow+i<<"]: "<<p[i]<<endl; } } Epetra_CrsGraph Agrph(Copy, Map, 1); int col; //set up a tri-diagonal graph. for(i=0; i<NumMyElements; ++i) { int row = firstGlobalRow+i; col = NumGlobalElements - row - 1; Agrph.InsertGlobalIndices(row, 1, &col); if (col > 0) { int colm1 = col-1; Agrph.InsertGlobalIndices(row, 1, &colm1); } if (col < NumGlobalElements-1) { int colp1 = col+1; Agrph.InsertGlobalIndices(row, 1, &colp1); } } Agrph.FillComplete(); if (verbose1) { cout << "*************** graph Agrph: ********************"<<endl; cout << Agrph << endl; } EpetraExt::Permutation<Epetra_CrsGraph> P(Copy, Map, p); bool column_permutation = true; Epetra_CrsGraph& Bgrph = P(Agrph, column_permutation); if (verbose1) { cout <<"************* column-permuted graph Bgrph: ****************"<<endl; cout << Bgrph << endl; } delete [] p; return(0); }
int MatrixMarketFileToBlockMap( const char *filename, const Epetra_Comm & comm, Epetra_BlockMap * & map) { const int lineLength = 1025; char line[lineLength]; char token[lineLength]; int M, N, numProc, MaxElementSize, MinElementSize, NumMyElements, IndexBase, NumGlobalElements, firstGid; FILE * handle = 0; bool inHeader = true; handle = fopen(filename,"r"); if (handle == 0) EPETRA_CHK_ERR(-1); // file not found while (inHeader) { if(fgets(line, lineLength, handle)==0) return(-1); if(sscanf(line, "%s", token)==0) return(-1); if (!strcmp(token, "%NumProc:")) inHeader = false; } if(fgets(line, lineLength, handle)==0) return(-1); // numProc value if(sscanf(line, "%s %d", token, &numProc)==0) return(-1); if(fgets(line, lineLength, handle)==0) return(-1); // MaxElementSize header line if(fgets(line, lineLength, handle)==0) return(-1); // MaxElementSize value if(sscanf(line, "%s %d", token, &MaxElementSize)==0) return(-1); if(fgets(line, lineLength, handle)==0) return(-1); // MinElementSize header line if(fgets(line, lineLength, handle)==0) return(-1); // MinElementSize value if(sscanf(line, "%s %d", token, &MinElementSize)==0) return(-1); if(fgets(line, lineLength, handle)==0) return(-1); // IndexBase header line if(fgets(line, lineLength, handle)==0) return(-1); // IndexBase value if(sscanf(line, "%s %d", token, &IndexBase)==0) return(-1); if(fgets(line, lineLength, handle)==0) return(-1); // NumGlobalElements header line if(fgets(line, lineLength, handle)==0) return(-1); // NumGlobalElements value if(sscanf(line, "%s %d", token, &NumGlobalElements)==0) return(-1); int ierr = 0; if (comm.NumProc()==numProc) { if(fgets(line, lineLength, handle)==0) return(-1); // NumMyElements header line firstGid = 0; for (int i=0; i<comm.MyPID(); i++) { if(fgets(line, lineLength, handle)==0) return(-1); // ith NumMyElements value if(sscanf(line, "%s %d", token, &NumMyElements)==0) return(-1); firstGid += NumMyElements; } if(fgets(line, lineLength, handle)==0) return(-1); // This PE's NumMyElements value if(sscanf(line, "%s %d", token, &NumMyElements)==0) return(-1); for (int i=comm.MyPID()+1; i<numProc; i++) { if(fgets(line, lineLength, handle)==0) return(-1); // ith NumMyElements value (dump these) } } else { ierr = 1; // Warning error, different number of processors. if(fgets(line, lineLength, handle)==0) return(-1); // NumMyElements header line for (int i=0; i<numProc; i++) { if(fgets(line, lineLength, handle)==0) return(-1); // ith NumMyElements value (dump these) } NumMyElements = NumGlobalElements/comm.NumProc(); firstGid = comm.MyPID()*NumMyElements; int remainder = NumGlobalElements%comm.NumProc(); if (comm.MyPID()<remainder) NumMyElements++; int extra = remainder; if (comm.MyPID()<remainder) extra = comm.MyPID(); firstGid += extra; } if(fgets(line, lineLength, handle)==0) return(-1); // Number of rows, columns if(sscanf(line, "%d %d", &M, &N)==0) return(-1); bool doSizes = (N>1); Epetra_IntSerialDenseVector v1(NumMyElements); Epetra_IntSerialDenseVector v2(NumMyElements); for (int i=0; i<firstGid; i++) { if(fgets(line, lineLength, handle)==0) return(-1); // dump these } if (doSizes) { for (int i=0; i<NumMyElements; i++) { if(fgets(line, lineLength, handle)==0) return(-1); if(sscanf(line, "%d %d", &v1[i], &v2[i])==0) return(-1); // load v1, v2 } } else { for (int i=0; i<NumMyElements; i++) { if(fgets(line, lineLength, handle)==0) return(-1); if(sscanf(line, "%d", &v1[i])==0) return(-1); // load v1 v2[i] = MinElementSize; // Fill with constant size } } if (fclose(handle)) return(-1); comm.Barrier(); if (MinElementSize==1 && MaxElementSize==1) map = new Epetra_Map(-1, NumMyElements, v1.Values(), IndexBase, comm); else map = new Epetra_BlockMap(-1, NumMyElements, v1.Values(), v2.Values(), IndexBase, comm); return(0); }
void exampleRoutine (const Epetra_Comm& comm, std::ostream& out) { using std::endl; // Print out the Epetra software version. if (comm.MyPID () == 0) { out << Epetra_Version () << endl << endl; } // The type of global indices. You could just set this to int, // but we want the example to work for Epetra64 as well. #ifdef EPETRA_NO_32BIT_GLOBAL_INDICES // Epetra was compiled only with 64-bit global index support, so use // 64-bit global indices. typedef long long global_ordinal_type; #else // Epetra was compiled with 32-bit global index support. If // EPETRA_NO_64BIT_GLOBAL_INDICES is defined, it does not also // support 64-bit indices. typedef int global_ordinal_type; #endif // EPETRA_NO_32BIT_GLOBAL_INDICES ////////////////////////////////////////////////////////////////////// // Create some Epetra_Map objects ////////////////////////////////////////////////////////////////////// // // Epetra has local and global Maps. Local maps describe objects // that are replicated over all participating MPI processes. Global // maps describe distributed objects. You can do imports and // exports between local and global maps; this is how you would turn // locally replicated objects into distributed objects and vice // versa. // // The total (global, i.e., over all MPI processes) number of // entries in the Map. This has the same type as that of global // indices, so it can represent very large values if Epetra was // built with 64-bit global index support. // // For this example, we scale the global number of entries in the // Map with the number of MPI processes. That way, you can run this // example with any number of MPI processes and every process will // still have a positive number of entries. const global_ordinal_type numGlobalEntries = comm.NumProc () * 5; // Tpetra can index the entries of a Map starting with 0 (C style), // 1 (Fortran style), or any base you want. 1-based indexing is // handy when interfacing with Fortran. We choose 0-based indexing // here. This also has the same type as that of global indices. const global_ordinal_type indexBase = 0; // Construct a Map that puts the same number of equations on each // (MPI) process. The Epetra_Comm is passed in by value, but that's // OK, because Epetra_Comm has shallow copy semantics. (Its copy // constructor and assignment operator do not call MPI_Comm_dup; // they just pass along the MPI_Comm.) Epetra_Map contigMap (numGlobalEntries, indexBase, comm); // contigMap is contiguous by construction. if (! contigMap.LinearMap ()) { throw std::logic_error ("The supposedly contiguous Map isn't contiguous."); } // Let's create a second Map. It will have the same number of // global entries per process, but will distribute them differently, // in round-robin (1-D cyclic) fashion instead of contiguously. // We'll use the version of the Map constructor that takes, on each // MPI process, a list of the global indices in the Map belonging to // that process. You can use this constructor to construct an // overlapping (also called "not 1-to-1") Map, in which one or more // entries are owned by multiple processes. We don't do that here; // we make a nonoverlapping (also called "1-to-1") Map. const int numGblIndsPerProc = 5; global_ordinal_type* gblIndList = new global_ordinal_type [numGblIndsPerProc]; const int numProcs = comm.NumProc (); const int myRank = comm.MyPID (); for (int k = 0; k < numGblIndsPerProc; ++k) { gblIndList[k] = myRank + k*numProcs; } Epetra_Map cyclicMap (numGlobalEntries, numGblIndsPerProc, gblIndList, indexBase, comm); // The above constructor makes a deep copy of the input index list, // so it's safe to deallocate that list after this constructor // completes. if (gblIndList != NULL) { delete [] gblIndList; gblIndList = NULL; } // If there's more than one MPI process in the communicator, // then cyclicMap is definitely NOT contiguous. if (comm.NumProc () > 1 && cyclicMap.LinearMap ()) { throw std::logic_error ("The cyclic Map claims to be contiguous."); } // contigMap and cyclicMap should always be compatible. However, if // the communicator contains more than 1 process, then contigMap and // cyclicMap are NOT the same. // if (! contigMap.isCompatible (*cyclicMap)) { // throw std::logic_error ("contigMap should be compatible with cyclicMap, " // "but it's not."); // } if (comm.NumProc () > 1 && contigMap.SameAs (cyclicMap)) { throw std::logic_error ("contigMap should not be the same as cyclicMap."); } ////////////////////////////////////////////////////////////////////// // We have maps now, so we can create vectors. ////////////////////////////////////////////////////////////////////// // Create an Epetra_Vector with the contiguous Map we created above. // This version of the constructor will fill the vector with zeros. // The Vector constructor takes a Map by value, but that's OK, // because Epetra_Map has shallow copy semantics. It uses reference // counting internally to avoid copying data unnecessarily. Epetra_Vector x (contigMap); // The copy constructor performs a deep copy. // x and y have the same Map. Epetra_Vector y (x); // Create a Vector with the 1-D cyclic Map. Calling the constructor // with false for the second argument leaves the data uninitialized, // so that you can fill it later without paying the cost of // initially filling it with zeros. Epetra_Vector z (cyclicMap, false); // Set the entries of z to (pseudo)random numbers. Please don't // consider this a good parallel pseudorandom number generator. (void) z.Random (); // Set the entries of x to all ones. (void) x.PutScalar (1.0); // Define some constants for use below. const double alpha = 3.14159; const double beta = 2.71828; const double gamma = -10.0; // x = beta*x + alpha*z // // This is a legal operation! Even though the Maps of x and z are // not the same, their Maps are compatible. Whether it makes sense // or not depends on your application. (void) x.Update (alpha, z, beta); (void) y.PutScalar (42.0); // Set all entries of y to 42.0 // y = gamma*y + alpha*x + beta*z y.Update (alpha, x, beta, z, gamma); // Compute the 2-norm of y. // // The norm may have a different type than scalar_type. // For example, if scalar_type is complex, then the norm is real. // The ScalarTraits "traits class" gives us the type of the norm. double theNorm = 0.0; (void) y.Norm2 (&theNorm); // Print the norm of y on Proc 0. out << "Norm of y: " << theNorm << endl; }
//------------------------------------------------------------------------------- int check_colpermute_crsmatrix(Epetra_Comm& Comm, bool verbose) { int MyPID = Comm.MyPID(); int NumProc = Comm.NumProc(); Comm.Barrier(); bool verbose1 = verbose; if (verbose) verbose = (MyPID==0); if (verbose) { cerr << "================check_colpermute_crsmatrix==========" <<endl; } int NumMyElements = 5; int NumGlobalElements = NumMyElements*NumProc; Epetra_Map Map(NumGlobalElements, NumMyElements, 0, Comm); int* p = new int[NumMyElements]; int firstGlobalRow = MyPID*NumMyElements; if (verbose) { cout << "Permutation P:"<<endl; } int i; for(i=0; i<NumMyElements; ++i) { int row = firstGlobalRow+i; p[i] = NumGlobalElements - row - 1; if (verbose1) { cout << "p["<<firstGlobalRow+i<<"]: "<<p[i]<<endl; } } Epetra_CrsMatrix A(Copy, Map, 1); int col; double val; //set up a tri-diagonal graph. for(i=0; i<NumMyElements; ++i) { int row = firstGlobalRow+i; col = NumGlobalElements - row - 1; val = 1.0*col; A.InsertGlobalValues(row, 1, &val, &col); if (col > 0) { int colm1 = col-1; val = 1.0*colm1; A.InsertGlobalValues(row, 1, &val, &colm1); } if (col < NumGlobalElements-1) { int colp1 = col+1; val = 1.0*colp1; A.InsertGlobalValues(row, 1, &val, &colp1); } } A.FillComplete(); if (verbose1) { cout << "*************** matrix A: ********************"<<endl; cout << A << endl; } EpetraExt::Permutation<Epetra_CrsMatrix> P(Copy, Map, p); bool column_permutation = true; Epetra_CrsMatrix& B = P(A, column_permutation); if (verbose1) { cout <<"************* column-permuted matrix B: ****************"<<endl; cout << B << endl; } delete [] p; return(0); }
//------------------------------------------------------------------------------- int check_rowpermute_crsmatrix_global_diagonal(Epetra_Comm& Comm, bool verbose) { int MyPID = Comm.MyPID(); int NumProc = Comm.NumProc(); Comm.Barrier(); bool verbose1 = verbose; if (verbose) verbose = (MyPID==0); if (verbose) { cerr << "================check_rowpermute_crsmatrix_global_diagonal==========" <<endl; } int NumMyElements = 5; int NumGlobalElements = NumMyElements*NumProc; Epetra_Map Map(NumGlobalElements, NumMyElements, 0, Comm); int* p = new int[NumMyElements]; int firstGlobalRow = MyPID*NumMyElements; //Now set up a permutation that will GLOBALLY reverse the order of all rows. //(i.e., if there are multiple processors, there will be inter-processor //data movement as rows are migrated.) int i; Epetra_CrsMatrix A(Copy, Map, 1); int col; double val; //set up a diagonal matrix A. It's diagonal because that's the easiest //to fill and to examine output before and after permutation... for(i=0; i<NumMyElements; ++i) { int row = firstGlobalRow+i; val = 1.0*row; col = row; A.InsertGlobalValues(row, 1, &val, &col); } A.FillComplete(); if (verbose1) { cout << "******************* matrix A: ****************************"<<endl; cout << A << endl; } if (verbose) { cout << "Permutation P:"<<endl; } for(i=0; i<NumMyElements; ++i) { int globalrow = NumGlobalElements-(firstGlobalRow+i)-1; p[i] = globalrow; if (verbose1) { cout << "p["<<firstGlobalRow+i<<"]: "<<p[i]<<endl; } } EpetraExt::Permutation<Epetra_CrsMatrix> Pglobal(Copy, Map, p); Epetra_CrsMatrix& Bglobal = Pglobal(A); if (verbose1) { cout << "******************* permuted matrix Bglobal: *******************" <<endl; cout << Bglobal << endl; } return(0); }
int fevec3(Epetra_Comm& Comm, bool verbose) { int ierr = 0; int NumGlobalElems = 4; int elemSize = 40; int indexBase = 0; Epetra_BlockMap Map(NumGlobalElems, elemSize, indexBase, Comm); int Numprocs = Comm.NumProc(); int MyPID = Comm.MyPID(); if (Numprocs != 2) return(0); int NumCols = 3; int* Indices = new int[NumCols]; int* numValuesPerID = new int[NumCols]; for(int i=0; i<NumCols; ++i) { numValuesPerID[i] = elemSize; } double* Values = new double[NumCols*elemSize]; // Create vectors Epetra_FEVector b(Map, 1); Epetra_FEVector x0(Map, 1); // source terms NumCols = 2; if(MyPID==0) // indices corresponding to element 0 on processor 0 { Indices[0] = 0; Indices[1] = 3; for(int ii=0; ii<NumCols*elemSize; ++ii) { Values[ii] = 1./2.; } } else { Indices[0] = 1; Indices[1] = 2; for(int ii=0; ii<NumCols*elemSize; ++ii) { Values[ii] = 0.; } } EPETRA_TEST_ERR( b.SumIntoGlobalValues(NumCols, Indices, numValuesPerID, Values), ierr); EPETRA_TEST_ERR( b.GlobalAssemble(), ierr); if (verbose&&MyPID==0) { cout << "b:"<<endl; } if (verbose) { b.Print(cout); } x0 = b; if (verbose&&MyPID==0) { cout << "x:"<<endl; } if (verbose) { x0.Print(cout); } delete [] Values; delete [] Indices; delete [] numValuesPerID; return(0); }
void Trilinos_Util_distrib_vbr_matrix(const Epetra_Comm & Comm, int *N_global, int *N_blk_global, int *n_nonzeros, int *n_blk_nonzeros, int *N_update, int **update, double **val, int **indx, int **rpntr, int **cpntr, int **bpntr, int **bindx, double **x, double **b, double **xexact) #undef DEBUG { int i, n_global_nonzeros, n_global_blk_nonzeros; int N_local; int j, row, have_xexact = 0 ; int *rpntr1, *bindx1, *bpntr1, *indx1; double *val1, *b1, *x1, *xexact1=0; int MyPID = Comm.MyPID(); int NumProc = Comm.NumProc(); printf("Processor %d of %d entering distrib_matrix.\n", MyPID,NumProc) ; /*************** Distribute global matrix to all processors ************/ if(MyPID == 0) { if ((*xexact) != NULL) have_xexact = 1; printf("%s", "Broadcasting exact solution\n"); } if(NumProc > 1) { Comm.Broadcast( N_global, 1, 0); Comm.Broadcast( N_blk_global, 1, 0); Comm.Broadcast( n_nonzeros, 1, 0); Comm.Broadcast( n_blk_nonzeros, 1, 0); Comm.Broadcast( &have_xexact, 1, 0); printf("Processor %d of %d done with global parameter broadcast.\n", MyPID,NumProc) ; if(MyPID != 0) { *bpntr = (int *) calloc(*N_blk_global+1,sizeof(int)) ; *rpntr = (int *) calloc(*N_blk_global+1,sizeof(int)) ; *bindx = (int *) calloc(*n_blk_nonzeros+1,sizeof(int)) ; *indx = (int *) calloc(*n_blk_nonzeros+1,sizeof(int)) ; *val = (double *) calloc(*n_nonzeros+1,sizeof(double)) ; printf("Processor %d of %d done with global calloc.\n", MyPID,NumProc) ; } Comm.Broadcast( (*bpntr), (*N_blk_global+1), 0); Comm.Broadcast( (*rpntr), (*N_blk_global+1), 0); Comm.Broadcast( (*bindx), (*n_blk_nonzeros+1), 0); Comm.Broadcast( (*indx), (*n_blk_nonzeros+1), 0); Comm.Broadcast( (*val), (*n_nonzeros+1), 0); printf("Processor %d of %d done with matrix broadcast.\n", MyPID,NumProc) ; /* Set rhs and initialize guess */ if(MyPID != 0) { (*b) = (double *) calloc(*N_global,sizeof(double)) ; (*x) = (double *) calloc(*N_global,sizeof(double)) ; if (have_xexact) (*xexact) = (double *) calloc(*N_global,sizeof(double)) ; } Comm.Broadcast( (*x), (*N_global), 0); Comm.Broadcast( (*b), (*N_global), 0); if (have_xexact) Comm.Broadcast((*xexact), (*N_global), 0); printf("Processor %d of %d done with rhs/guess broadcast.\n", MyPID,NumProc) ; } /********************** Generate update map *************************/ //read_update(N_update, update, proc_config, *N_blk_global, 1, linear) ; Epetra_Map map(*N_blk_global, 0, Comm); *N_update = map.NumMyElements(); (*update) = (int *) calloc(*N_update,sizeof(int)) ; map.MyGlobalElements(*update); printf("Processor %d of %d has %d rows of %d total block rows.\n", MyPID,NumProc,*N_update,*N_blk_global) ; /*************** Construct local matrix from global matrix ************/ /* The local matrix is a copy of the rows assigned to this processor. It is stored in MSR format and still has global indices */ if(NumProc > 1) { n_global_nonzeros = *n_nonzeros; n_global_blk_nonzeros = *n_blk_nonzeros; *n_nonzeros = 0; *n_blk_nonzeros = 0; N_local = 0; for (i=0; i<*N_update; i++) { row = (*update)[i]; *n_nonzeros += (*indx)[(*bpntr)[row+1]] - (*indx)[(*bpntr)[row]]; *n_blk_nonzeros += (*bpntr)[row+1] - (*bpntr)[row]; N_local += (*rpntr)[row+1] - (*rpntr)[row]; } printf("Processor %d of %d has %d nonzeros of %d total nonzeros.\n", MyPID,NumProc, *n_nonzeros,n_global_nonzeros) ; printf("Processor %d of %d has %d block nonzeros of %d total block nonzeros.\n", MyPID,NumProc, *n_blk_nonzeros,n_global_blk_nonzeros) ; printf("Processor %d of %d has %d equations of %d total equations.\n", MyPID,NumProc, N_local,*N_global) ; #ifdef DEBUG { double sum1 = 0.0; for (i=0;i<*N_global; i++) sum1 += (*b)[i]; printf("Processor %d of %d has sum of b = %12.4g.\n", MyPID,NumProc,sum1) ; } #endif /* DEBUG */ /* Allocate memory for local matrix */ bpntr1 = (int *) calloc(*N_update+1,sizeof(int)) ; rpntr1 = (int *) calloc(*N_update+1,sizeof(int)) ; bindx1 = (int *) calloc(*n_blk_nonzeros+1,sizeof(int)) ; indx1 = (int *) calloc(*n_blk_nonzeros+1,sizeof(int)) ; val1 = (double *) calloc(*n_nonzeros+1,sizeof(double)) ; b1 = (double *) calloc(N_local,sizeof(double)) ; x1 = (double *) calloc(N_local,sizeof(double)) ; if (have_xexact) xexact1 = (double *) calloc(N_local,sizeof(double)) ; { int cur_blk_size, indx_offset, len_val, row_offset, row_offset1; double *val_ptr, *val1_ptr; bpntr1[0] = 0; indx1[0] = 0; rpntr1[0] = 0; for (i=0; i<*N_update; i++) { row = (*update)[i]; cur_blk_size = (*rpntr)[row+1] - (*rpntr)[row]; rpntr1[i+1] = rpntr1[i] + cur_blk_size; row_offset = (*rpntr)[row]; row_offset1 = rpntr1[i]; for (j = 0; j<cur_blk_size; j++) { b1[row_offset1+j] = (*b)[row_offset+j]; x1[row_offset1+j] = (*x)[row_offset+j]; if (have_xexact) xexact1[row_offset1+j] = (*xexact)[row_offset+j]; } bpntr1[i+1] = bpntr1[i]; #ifdef DEBUG printf("Proc %d of %d: Global row = %d: Local row = %d: b = %12.4g: x = %12.4g: bindx = %d: val = %12.4g \n", MyPID,NumProc, row, i, b1[i], x1[i], bindx1[i], val1[i]) ; #endif indx_offset = (*indx)[(*bpntr)[row]] - indx1[bpntr1[i]]; for (j = (*bpntr)[row]; j < (*bpntr)[row+1]; j++) { indx1[bpntr1 [i+1] + 1] = (*indx)[j+1] - indx_offset; bindx1[bpntr1 [i+1] ] = (*bindx)[j]; bpntr1[i+1] ++; } len_val = indx1[bpntr1[i+1]] - indx1[bpntr1[i]]; val_ptr = (*val)+(*indx)[(*bpntr)[row]]; val1_ptr = val1+indx1[bpntr1[i]]; for (j = 0; j<len_val; j++) { *val1_ptr = *val_ptr; val_ptr++; val1_ptr++; } } } printf("Processor %d of %d done with extracting local operators.\n", MyPID,NumProc) ; if (have_xexact) { printf( "The residual using VBR format and exact solution on processor %d is %12.4g\n", MyPID, Trilinos_Util_svbrres (N_local, *N_global, *N_update, val1, indx1, bindx1, rpntr1, (*rpntr), bpntr1, bpntr1+1, (*xexact), b1)); } /* Release memory for global matrix, rhs and solution */ free ((void *) (*val)); free ((void *) (*indx)); free ((void *) (*bindx)); free ((void *) (*bpntr)); free ((void *) (*rpntr)); free ((void *) (*b)); free ((void *) (*x)); if (have_xexact) free((void *) *xexact); /* Return local matrix through same pointers. */ *val = val1; *indx = indx1; *bindx = bindx1; *bpntr = bpntr1; *rpntr = rpntr1; *b = b1; *x = x1; if (have_xexact) *xexact = xexact1; } if (have_xexact && NumProc == 1) { printf( "The residual using VBR format and exact solution on processor %d is %12.4g\n", MyPID, Trilinos_Util_svbrres (*N_global, *N_global, *N_update, (*val), (*indx), (*bindx), (*rpntr), (*rpntr), (*bpntr), (*bpntr)+1, (*xexact), (*b))); } printf("Processor %d of %d leaving distrib_matrix.\n", MyPID,NumProc) ; /* end distrib_matrix */ }
void linear_distribution_2D( int mypRow, // processor's row (in 2D) int mypCol, // processor's col (in 2D) int npRows, // number of processor rows int npCols, // number of processor cols itype nrows, // Number of global matrix rows Epetra_Comm &comm, // Epetra communicator to be used in maps Epetra_Map **vectorMap, // OUTPUT: Map to be used for the vector Epetra_Map **rowMap, // OUTPUT: Map to be used for the matrix rows Epetra_Map **colMap, // OUTPUT: Map to be used for the matrix cols long long offsetEpetra64 ) { // The vector will be distributed linearly: // [0 1 2 3 4 5 6 7 8] // // If nrows is not divisible by np, extra rows will be distributed among // the processors. (See below.) int me = comm.MyPID(); int np = comm.NumProc(); // Create vector map first vector<itype> entries(np+1, nrows / np); // Initial # entries per proc int nExtraEntries = nrows % np; // Distribute the extra entries evenly among processors. // To evenly distribute them extra entries among processor rows and // columns, we distribute them along diagonals of the matrix distribution. // For our example, assume we have seven extra values (the max possible // with np=8). Then we give one extra entry each to ranks // [0, 3, 4, 7, 1, 2, 5]. For fewer extra entries, we follow the same // order of assignment, and just stop early. for (int cnt = 0, i = 0; (cnt < nExtraEntries) && (i < npRows); i++) { for (int j = 0; (cnt < nExtraEntries) && (j < npCols); cnt++, j++) { int rankForExtra = TWODPRANK(i, j, npRows, npCols); entries[rankForExtra+1]++; // Store in rankForExtra+1 to simply later // prefix sum. } } // Perform prefix sum of entries. entries[0] = 0; for (int i = 1; i <= np; i++) entries[i] = entries[i-1] + entries[i]; // Now entries contains the first vector entry for each rank. // Create the global elements for the vector. int nMyGlobalElements = entries[me+1]-entries[me]; vector<itype> myGlobalElements(nMyGlobalElements+1); for (int i = 0; i < nMyGlobalElements; i++) myGlobalElements[i] = entries[me] + i + offsetEpetra64; *vectorMap = new Epetra_Map(-1, nMyGlobalElements, &myGlobalElements[0], 0, comm); // Column map: Easy; consecutive entries for all ranks in column. int firstRank = mypCol * npRows; // First rank in my column nMyGlobalElements = 0; for (int i = firstRank; i < firstRank + npRows; i++) nMyGlobalElements += entries[i+1] - entries[i]; itype myFirstCol = entries[firstRank]; myGlobalElements.resize(nMyGlobalElements+1); for (int i = 0; i < nMyGlobalElements; i++) myGlobalElements[i] = myFirstCol + i + offsetEpetra64; *colMap = new Epetra_Map(-1, nMyGlobalElements, &myGlobalElements[0], 0, comm); // Row map: trickier since corresponding vector entries are not // consecutive firstRank = mypRow; // First rank in my row nMyGlobalElements = 0; for (int i = 0; i < npCols; i++) { int rank = firstRank + i * npRows; nMyGlobalElements += entries[rank+1] - entries[rank]; } myGlobalElements.resize(nMyGlobalElements+1); for (int cnt = 0, i = 0; i < npCols; i++) { int rank = firstRank + i * npRows; for (itype j = entries[rank]; j < entries[rank+1]; j++) myGlobalElements[cnt++] = j + offsetEpetra64; } *rowMap = new Epetra_Map(-1, nMyGlobalElements, &myGlobalElements[0], 0, comm); }
//------------------------------------------------------------------------------ int check_rowpermute_multivector_local(Epetra_Comm& Comm, bool verbose) { int MyPID = Comm.MyPID(); int NumProc = Comm.NumProc(); Comm.Barrier(); bool verbose1 = verbose; if (verbose) verbose = (MyPID==0); if (verbose) { cerr << "================check_rowpermute_multivector_local==========" <<endl; } int NumMyElements = 5; int NumGlobalElements = NumMyElements*NumProc; Epetra_Map Map(NumGlobalElements, NumMyElements, 0, Comm); int* p = new int[NumMyElements]; int firstGlobalRow = MyPID*NumMyElements; //Set up a permutation that will reverse the order of all LOCAL rows. (i.e., //this test won't cause any inter-processor data movement.) if (verbose) { cout << "Permutation P:"<<endl; } int i; for(i=0; i<NumMyElements; ++i) { p[i] = firstGlobalRow+NumMyElements-1-i; if (verbose1) { cout << "p["<<firstGlobalRow+i<<"]: "<<p[i]<<endl; } } Epetra_MultiVector v(Map, 3); double* v0 = v[0]; double* v1 = v[1]; double* v2 = v[2]; for(i=0; i<NumMyElements; ++i) { v0[i] = 1.0*(firstGlobalRow+i) + 0.1; v1[i] = 1.0*(firstGlobalRow+i) + 0.2; v2[i] = 1.0*(firstGlobalRow+i) + 0.3; } if (verbose1) { cout << "*************** MultiVector v: ********************"<<endl; cout << v << endl; } EpetraExt::Permutation<Epetra_MultiVector> P(Copy, Map, p); Epetra_MultiVector& Pv = P(v); if (verbose1) { cout <<"************* permuted MultiVector Pv: ****************"<<endl; cout << Pv << endl; } return(0); }
//------------------------------------------------------------------------------ int check_rowpermute_crsgraph_local_diagonal(Epetra_Comm& Comm, bool verbose) { int MyPID = Comm.MyPID(); int NumProc = Comm.NumProc(); Comm.Barrier(); bool verbose1 = verbose; if (verbose) verbose = (MyPID==0); if (verbose) { cerr << "================check_rowpermute_crsgraph_local_diagonal==========" <<endl; } int NumMyElements = 5; int NumGlobalElements = NumMyElements*NumProc; Epetra_Map Map(NumGlobalElements, NumMyElements, 0, Comm); int* p = new int[NumMyElements]; int firstGlobalRow = MyPID*NumMyElements; //Set up a permutation that will reverse the order of all LOCAL rows. (i.e., //this test won't cause any inter-processor data movement.) if (verbose) { cout << "Permutation P:"<<endl; } int i; for(i=0; i<NumMyElements; ++i) { p[i] = firstGlobalRow+NumMyElements-1-i; if (verbose1) { cout << "p["<<firstGlobalRow+i<<"]: "<<p[i]<<endl; } } Epetra_CrsGraph Agrph(Copy, Map, 1); int col; //set up a diagonal graph. It's diagonal because that's the easiest //to fill and to examine output before and after permutation... for(i=0; i<NumMyElements; ++i) { int row = firstGlobalRow+i; col = row; Agrph.InsertGlobalIndices(row, 1, &col); } Agrph.FillComplete(); if (verbose1) { cout << "*************** graph Agrph: ********************"<<endl; cout << Agrph << endl; } EpetraExt::Permutation<Epetra_CrsGraph> P(Copy, Map, p); Epetra_CrsGraph& Bgrph = P(Agrph); if (verbose1) { cout <<"************* permuted graph Bgrph: ****************"<<endl; cout << Bgrph << endl; } return(0); }